update-copyright.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622
  1. #!/usr/bin/python
  2. #
  3. # Copyright (C) 2013-2022 Free Software Foundation, Inc.
  4. #
  5. # This script is free software; you can redistribute it and/or modify
  6. # it under the terms of the GNU General Public License as published by
  7. # the Free Software Foundation; either version 3, or (at your option)
  8. # any later version.
  9. # This script adjusts the copyright notices at the top of source files
  10. # so that they have the form:
  11. #
  12. # Copyright XXXX-YYYY Free Software Foundation, Inc.
  13. #
  14. # It doesn't change code that is known to be maintained elsewhere or
  15. # that carries a non-FSF copyright.
  16. #
  17. # Pass --this-year to the script if you want it to add the current year
  18. # to all applicable notices. Pass --quilt if you are using quilt and
  19. # want files to be added to the quilt before being changed.
  20. #
  21. # By default the script will update all directories for which the
  22. # output has been vetted. You can instead pass the names of individual
  23. # directories, including those that haven't been approved. So:
  24. #
  25. # update-copyright.pl --this-year
  26. #
  27. # is the command that would be used at the beginning of a year to update
  28. # all copyright notices (and possibly at other times to check whether
  29. # new files have been added with old years). On the other hand:
  30. #
  31. # update-copyright.pl --this-year libjava
  32. #
  33. # would run the script on just libjava/.
  34. #
  35. # This script was copied from gcc's contrib/ and modified to suit
  36. # binutils. In contrast to the gcc script, this one will update
  37. # the testsuite and --version output strings too.
  38. import os
  39. import re
  40. import sys
  41. import time
  42. import subprocess
  43. class Errors:
  44. def __init__ (self):
  45. self.num_errors = 0
  46. def report (self, filename, string):
  47. if filename:
  48. string = filename + ': ' + string
  49. sys.stderr.write (string + '\n')
  50. self.num_errors += 1
  51. def ok (self):
  52. return self.num_errors == 0
  53. class GenericFilter:
  54. def __init__ (self):
  55. self.skip_files = set()
  56. self.skip_dirs = set()
  57. self.skip_extensions = set()
  58. self.fossilised_files = set()
  59. self.own_files = set()
  60. self.skip_files |= set ([
  61. # Skip licence files.
  62. 'COPYING',
  63. 'COPYING.LIB',
  64. 'COPYING3',
  65. 'COPYING3.LIB',
  66. 'COPYING.LIBGLOSS',
  67. 'COPYING.NEWLIB',
  68. 'LICENSE',
  69. 'fdl.texi',
  70. 'gpl_v3.texi',
  71. 'fdl-1.3.xml',
  72. 'gpl-3.0.xml',
  73. # Skip auto- and libtool-related files
  74. 'aclocal.m4',
  75. 'compile',
  76. 'config.guess',
  77. 'config.sub',
  78. 'depcomp',
  79. 'install-sh',
  80. 'libtool.m4',
  81. 'ltmain.sh',
  82. 'ltoptions.m4',
  83. 'ltsugar.m4',
  84. 'ltversion.m4',
  85. 'lt~obsolete.m4',
  86. 'missing',
  87. 'mkdep',
  88. 'mkinstalldirs',
  89. 'move-if-change',
  90. 'shlibpath.m4',
  91. 'symlink-tree',
  92. 'ylwrap',
  93. # Skip FSF mission statement, etc.
  94. 'gnu.texi',
  95. 'funding.texi',
  96. 'appendix_free.xml',
  97. # Skip imported texinfo files.
  98. 'texinfo.tex',
  99. ])
  100. self.skip_extensions |= set ([
  101. # Maintained by the translation project.
  102. '.po',
  103. # Automatically-generated.
  104. '.pot',
  105. ])
  106. self.skip_dirs |= set ([
  107. 'autom4te.cache',
  108. ])
  109. def get_line_filter (self, dir, filename):
  110. if filename.startswith ('ChangeLog'):
  111. # Ignore references to copyright in changelog entries.
  112. return re.compile ('\t')
  113. return None
  114. def skip_file (self, dir, filename):
  115. if filename in self.skip_files:
  116. return True
  117. (base, extension) = os.path.splitext (os.path.join (dir, filename))
  118. if extension in self.skip_extensions:
  119. return True
  120. if extension == '.in':
  121. # Skip .in files produced by automake.
  122. if os.path.exists (base + '.am'):
  123. return True
  124. # Skip files produced by autogen
  125. if (os.path.exists (base + '.def')
  126. and os.path.exists (base + '.tpl')):
  127. return True
  128. # Skip configure files produced by autoconf
  129. if filename == 'configure':
  130. if os.path.exists (base + '.ac'):
  131. return True
  132. if os.path.exists (base + '.in'):
  133. return True
  134. return False
  135. def skip_dir (self, dir, subdir):
  136. return subdir in self.skip_dirs
  137. def is_fossilised_file (self, dir, filename):
  138. if filename in self.fossilised_files:
  139. return True
  140. # Only touch current current ChangeLogs.
  141. if filename != 'ChangeLog' and filename.find ('ChangeLog') >= 0:
  142. return True
  143. return False
  144. def by_package_author (self, dir, filename):
  145. return filename in self.own_files
  146. class Copyright:
  147. def __init__ (self, errors):
  148. self.errors = errors
  149. # Characters in a range of years. Include '.' for typos.
  150. ranges = '[0-9](?:[-0-9.,\s]|\s+and\s+)*[0-9]'
  151. # Non-whitespace characters in a copyright holder's name.
  152. name = '[\w.,-]'
  153. # Matches one year.
  154. self.year_re = re.compile ('[0-9]+')
  155. # Matches part of a year or copyright holder.
  156. self.continuation_re = re.compile (ranges + '|' + name)
  157. # Matches a full copyright notice:
  158. self.copyright_re = re.compile (
  159. # 1: 'Copyright (C)', etc.
  160. '([Cc]opyright'
  161. '|[Cc]opyright\s+\([Cc]\)'
  162. '|[Cc]opyright\s+%s'
  163. '|[Cc]opyright\s+©'
  164. '|[Cc]opyright\s+@copyright{}'
  165. '|@set\s+copyright[\w-]+)'
  166. # 2: the years. Include the whitespace in the year, so that
  167. # we can remove any excess.
  168. '(\s*(?:' + ranges + ',?'
  169. '|@value\{[^{}]*\})\s*)'
  170. # 3: 'by ', if used
  171. '(by\s+)?'
  172. # 4: the copyright holder. Don't allow multiple consecutive
  173. # spaces, so that right-margin gloss doesn't get caught
  174. # (e.g. gnat_ugn.texi).
  175. '(' + name + '(?:\s?' + name + ')*)?')
  176. # A regexp for notices that might have slipped by. Just matching
  177. # 'copyright' is too noisy, and 'copyright.*[0-9]' falls foul of
  178. # HTML header markers, so check for 'copyright' and two digits.
  179. self.other_copyright_re = re.compile ('(^|[^\._])copyright[^=]*[0-9][0-9]',
  180. re.IGNORECASE)
  181. self.comment_re = re.compile('#+|[*]+|;+|%+|//+|@c |dnl ')
  182. self.holders = { '@copying': '@copying' }
  183. self.holder_prefixes = set()
  184. # True to 'quilt add' files before changing them.
  185. self.use_quilt = False
  186. # If set, force all notices to include this year.
  187. self.max_year = None
  188. # Goes after the year(s). Could be ', '.
  189. self.separator = ' '
  190. def add_package_author (self, holder, canon_form = None):
  191. if not canon_form:
  192. canon_form = holder
  193. self.holders[holder] = canon_form
  194. index = holder.find (' ')
  195. while index >= 0:
  196. self.holder_prefixes.add (holder[:index])
  197. index = holder.find (' ', index + 1)
  198. def add_external_author (self, holder):
  199. self.holders[holder] = None
  200. class BadYear():
  201. def __init__ (self, year):
  202. self.year = year
  203. def __str__ (self):
  204. return 'unrecognised year: ' + self.year
  205. def parse_year (self, string):
  206. year = int (string)
  207. if len (string) == 2:
  208. if year > 70:
  209. return year + 1900
  210. elif len (string) == 4:
  211. return year
  212. raise self.BadYear (string)
  213. def year_range (self, years):
  214. year_list = [self.parse_year (year)
  215. for year in self.year_re.findall (years)]
  216. assert len (year_list) > 0
  217. return (min (year_list), max (year_list))
  218. def set_use_quilt (self, use_quilt):
  219. self.use_quilt = use_quilt
  220. def include_year (self, year):
  221. assert not self.max_year
  222. self.max_year = year
  223. def canonicalise_years (self, dir, filename, filter, years):
  224. # Leave texinfo variables alone.
  225. if years.startswith ('@value'):
  226. return years
  227. (min_year, max_year) = self.year_range (years)
  228. # Update the upper bound, if enabled.
  229. if self.max_year and not filter.is_fossilised_file (dir, filename):
  230. max_year = max (max_year, self.max_year)
  231. # Use a range.
  232. if min_year == max_year:
  233. return '%d' % min_year
  234. else:
  235. return '%d-%d' % (min_year, max_year)
  236. def strip_continuation (self, line):
  237. line = line.lstrip()
  238. match = self.comment_re.match (line)
  239. if match:
  240. line = line[match.end():].lstrip()
  241. return line
  242. def is_complete (self, match):
  243. holder = match.group (4)
  244. return (holder
  245. and (holder not in self.holder_prefixes
  246. or holder in self.holders))
  247. def update_copyright (self, dir, filename, filter, file, line, match):
  248. orig_line = line
  249. next_line = None
  250. pathname = os.path.join (dir, filename)
  251. intro = match.group (1)
  252. if intro.startswith ('@set'):
  253. # Texinfo year variables should always be on one line
  254. after_years = line[match.end (2):].strip()
  255. if after_years != '':
  256. self.errors.report (pathname,
  257. 'trailing characters in @set: '
  258. + after_years)
  259. return (False, orig_line, next_line)
  260. else:
  261. # If it looks like the copyright is incomplete, add the next line.
  262. while not self.is_complete (match):
  263. try:
  264. next_line = file.next()
  265. except StopIteration:
  266. break
  267. # If the next line doesn't look like a proper continuation,
  268. # assume that what we've got is complete.
  269. continuation = self.strip_continuation (next_line)
  270. if not self.continuation_re.match (continuation):
  271. break
  272. # Merge the lines for matching purposes.
  273. orig_line += next_line
  274. line = line.rstrip() + ' ' + continuation
  275. next_line = None
  276. # Rematch with the longer line, at the original position.
  277. match = self.copyright_re.match (line, match.start())
  278. assert match
  279. holder = match.group (4)
  280. # Use the filter to test cases where markup is getting in the way.
  281. if filter.by_package_author (dir, filename):
  282. assert holder not in self.holders
  283. elif not holder:
  284. self.errors.report (pathname, 'missing copyright holder')
  285. return (False, orig_line, next_line)
  286. elif holder not in self.holders:
  287. self.errors.report (pathname,
  288. 'unrecognised copyright holder: ' + holder)
  289. return (False, orig_line, next_line)
  290. else:
  291. # See whether the copyright is associated with the package
  292. # author.
  293. canon_form = self.holders[holder]
  294. if not canon_form:
  295. return (False, orig_line, next_line)
  296. # Make sure the author is given in a consistent way.
  297. line = (line[:match.start (4)]
  298. + canon_form
  299. + line[match.end (4):])
  300. # Remove any 'by'
  301. line = line[:match.start (3)] + line[match.end (3):]
  302. # Update the copyright years.
  303. years = match.group (2).strip()
  304. if (self.max_year
  305. and match.start(0) > 0 and line[match.start(0)-1] == '"'
  306. and not filter.is_fossilised_file (dir, filename)):
  307. # A printed copyright date consists of the current year
  308. canon_form = '%d' % self.max_year
  309. else:
  310. try:
  311. canon_form = self.canonicalise_years (dir, filename, filter, years)
  312. except self.BadYear as e:
  313. self.errors.report (pathname, str (e))
  314. return (False, orig_line, next_line)
  315. line = (line[:match.start (2)]
  316. + ' ' + canon_form + self.separator
  317. + line[match.end (2):])
  318. # Use the standard (C) form.
  319. if intro.endswith ('right'):
  320. intro += ' (C)'
  321. elif intro.endswith ('(c)'):
  322. intro = intro[:-3] + '(C)'
  323. line = line[:match.start (1)] + intro + line[match.end (1):]
  324. # Strip trailing whitespace
  325. line = line.rstrip() + '\n'
  326. return (line != orig_line, line, next_line)
  327. def process_file (self, dir, filename, filter):
  328. pathname = os.path.join (dir, filename)
  329. if filename.endswith ('.tmp'):
  330. # Looks like something we tried to create before.
  331. try:
  332. os.remove (pathname)
  333. except OSError:
  334. pass
  335. return
  336. lines = []
  337. changed = False
  338. line_filter = filter.get_line_filter (dir, filename)
  339. with open (pathname, 'r') as file:
  340. prev = None
  341. for line in file:
  342. while line:
  343. next_line = None
  344. # Leave filtered-out lines alone.
  345. if not (line_filter and line_filter.match (line)):
  346. match = self.copyright_re.search (line)
  347. if match:
  348. res = self.update_copyright (dir, filename, filter,
  349. file, line, match)
  350. (this_changed, line, next_line) = res
  351. changed = changed or this_changed
  352. # Check for copyright lines that might have slipped by.
  353. elif self.other_copyright_re.search (line):
  354. self.errors.report (pathname,
  355. 'unrecognised copyright: %s'
  356. % line.strip())
  357. lines.append (line)
  358. line = next_line
  359. # If something changed, write the new file out.
  360. if changed and self.errors.ok():
  361. tmp_pathname = pathname + '.tmp'
  362. with open (tmp_pathname, 'w') as file:
  363. for line in lines:
  364. file.write (line)
  365. if self.use_quilt:
  366. subprocess.call (['quilt', 'add', pathname])
  367. os.rename (tmp_pathname, pathname)
  368. def process_tree (self, tree, filter):
  369. for (dir, subdirs, filenames) in os.walk (tree):
  370. # Don't recurse through directories that should be skipped.
  371. for i in xrange (len (subdirs) - 1, -1, -1):
  372. if filter.skip_dir (dir, subdirs[i]):
  373. del subdirs[i]
  374. # Handle the files in this directory.
  375. for filename in filenames:
  376. if filter.skip_file (dir, filename):
  377. sys.stdout.write ('Skipping %s\n'
  378. % os.path.join (dir, filename))
  379. else:
  380. self.process_file (dir, filename, filter)
  381. class CmdLine:
  382. def __init__ (self, copyright = Copyright):
  383. self.errors = Errors()
  384. self.copyright = copyright (self.errors)
  385. self.dirs = []
  386. self.default_dirs = []
  387. self.chosen_dirs = []
  388. self.option_handlers = dict()
  389. self.option_help = []
  390. self.add_option ('--help', 'Print this help', self.o_help)
  391. self.add_option ('--quilt', '"quilt add" files before changing them',
  392. self.o_quilt)
  393. self.add_option ('--this-year', 'Add the current year to every notice',
  394. self.o_this_year)
  395. def add_option (self, name, help, handler):
  396. self.option_help.append ((name, help))
  397. self.option_handlers[name] = handler
  398. def add_dir (self, dir, filter = GenericFilter()):
  399. self.dirs.append ((dir, filter))
  400. def o_help (self, option = None):
  401. sys.stdout.write ('Usage: %s [options] dir1 dir2...\n\n'
  402. 'Options:\n' % sys.argv[0])
  403. format = '%-15s %s\n'
  404. for (what, help) in self.option_help:
  405. sys.stdout.write (format % (what, help))
  406. sys.stdout.write ('\nDirectories:\n')
  407. format = '%-25s'
  408. i = 0
  409. for (dir, filter) in self.dirs:
  410. i += 1
  411. if i % 3 == 0 or i == len (self.dirs):
  412. sys.stdout.write (dir + '\n')
  413. else:
  414. sys.stdout.write (format % dir)
  415. sys.exit (0)
  416. def o_quilt (self, option):
  417. self.copyright.set_use_quilt (True)
  418. def o_this_year (self, option):
  419. self.copyright.include_year (time.localtime().tm_year)
  420. def main (self):
  421. for arg in sys.argv[1:]:
  422. if arg[:1] != '-':
  423. self.chosen_dirs.append (arg)
  424. elif arg in self.option_handlers:
  425. self.option_handlers[arg] (arg)
  426. else:
  427. self.errors.report (None, 'unrecognised option: ' + arg)
  428. if self.errors.ok():
  429. if len (self.chosen_dirs) == 0:
  430. self.chosen_dirs = self.default_dirs
  431. if len (self.chosen_dirs) == 0:
  432. self.o_help()
  433. else:
  434. for chosen_dir in self.chosen_dirs:
  435. canon_dir = os.path.join (chosen_dir, '')
  436. count = 0
  437. for (dir, filter) in self.dirs:
  438. if (dir + os.sep).startswith (canon_dir):
  439. count += 1
  440. self.copyright.process_tree (dir, filter)
  441. if count == 0:
  442. self.errors.report (None, 'unrecognised directory: '
  443. + chosen_dir)
  444. sys.exit (0 if self.errors.ok() else 1)
  445. #----------------------------------------------------------------------------
  446. class TopLevelFilter (GenericFilter):
  447. def skip_dir (self, dir, subdir):
  448. return True
  449. class ConfigFilter (GenericFilter):
  450. def __init__ (self):
  451. GenericFilter.__init__ (self)
  452. def skip_file (self, dir, filename):
  453. if filename.endswith ('.m4'):
  454. pathname = os.path.join (dir, filename)
  455. with open (pathname) as file:
  456. # Skip files imported from gettext.
  457. if file.readline().find ('gettext-') >= 0:
  458. return True
  459. return GenericFilter.skip_file (self, dir, filename)
  460. class LdFilter (GenericFilter):
  461. def __init__ (self):
  462. GenericFilter.__init__ (self)
  463. self.skip_extensions |= set ([
  464. # ld testsuite output match files.
  465. '.ro',
  466. ])
  467. class BinutilsCopyright (Copyright):
  468. def __init__ (self, errors):
  469. Copyright.__init__ (self, errors)
  470. canon_fsf = 'Free Software Foundation, Inc.'
  471. self.add_package_author ('Free Software Foundation', canon_fsf)
  472. self.add_package_author ('Free Software Foundation.', canon_fsf)
  473. self.add_package_author ('Free Software Foundation Inc.', canon_fsf)
  474. self.add_package_author ('Free Software Foundation, Inc', canon_fsf)
  475. self.add_package_author ('Free Software Foundation, Inc.', canon_fsf)
  476. self.add_package_author ('The Free Software Foundation', canon_fsf)
  477. self.add_package_author ('The Free Software Foundation, Inc.', canon_fsf)
  478. self.add_package_author ('Software Foundation, Inc.', canon_fsf)
  479. self.add_external_author ('Carnegie Mellon University')
  480. self.add_external_author ('John D. Polstra.')
  481. self.add_external_author ('Linaro Ltd.')
  482. self.add_external_author ('MIPS Computer Systems, Inc.')
  483. self.add_external_author ('Red Hat Inc.')
  484. self.add_external_author ('Regents of the University of California.')
  485. self.add_external_author ('The Regents of the University of California.')
  486. self.add_external_author ('Third Eye Software, Inc.')
  487. self.add_external_author ('Ulrich Drepper')
  488. self.add_external_author ('Synopsys Inc.')
  489. class BinutilsCmdLine (CmdLine):
  490. def __init__ (self):
  491. CmdLine.__init__ (self, BinutilsCopyright)
  492. self.add_dir ('.', TopLevelFilter())
  493. self.add_dir ('bfd')
  494. self.add_dir ('binutils')
  495. self.add_dir ('config', ConfigFilter())
  496. self.add_dir ('cpu')
  497. self.add_dir ('elfcpp')
  498. self.add_dir ('etc')
  499. self.add_dir ('gas')
  500. self.add_dir ('gdb')
  501. self.add_dir ('gold')
  502. self.add_dir ('gprof')
  503. self.add_dir ('include')
  504. self.add_dir ('ld', LdFilter())
  505. self.add_dir ('libctf')
  506. self.add_dir ('libdecnumber')
  507. self.add_dir ('libiberty')
  508. self.add_dir ('opcodes')
  509. self.add_dir ('readline')
  510. self.add_dir ('sim')
  511. self.default_dirs = [
  512. 'bfd',
  513. 'binutils',
  514. 'elfcpp',
  515. 'etc',
  516. 'gas',
  517. 'gold',
  518. 'gprof',
  519. 'include',
  520. 'ld',
  521. 'libctf',
  522. 'libiberty',
  523. 'opcodes',
  524. ]
  525. BinutilsCmdLine().main()