gen_autofdo_event.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. #!/usr/bin/python
  2. # Generate Intel taken branches Linux perf event script for autofdo profiling.
  3. # Copyright (C) 2016 Free Software Foundation, Inc.
  4. #
  5. # GCC is free software; you can redistribute it and/or modify it under
  6. # the terms of the GNU General Public License as published by the Free
  7. # Software Foundation; either version 3, or (at your option) any later
  8. # version.
  9. #
  10. # GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  11. # WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12. # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  13. # for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with GCC; see the file COPYING3. If not see
  17. # <http://www.gnu.org/licenses/>. */
  18. # Run it with perf record -b -e EVENT program ...
  19. # The Linux Kernel needs to support the PMU of the current CPU, and
  20. # It will likely not work in VMs.
  21. # Add --all to print for all cpus, otherwise for current cpu.
  22. # Add --script to generate shell script to run correct event.
  23. #
  24. # Requires internet (https) access. This may require setting up a proxy
  25. # with export https_proxy=...
  26. #
  27. import urllib2
  28. import sys
  29. import json
  30. import argparse
  31. import collections
  32. baseurl = "https://download.01.org/perfmon"
  33. target_events = (u'BR_INST_RETIRED.NEAR_TAKEN',
  34. u'BR_INST_EXEC.TAKEN',
  35. u'BR_INST_RETIRED.TAKEN_JCC',
  36. u'BR_INST_TYPE_RETIRED.COND_TAKEN')
  37. ap = argparse.ArgumentParser()
  38. ap.add_argument('--all', '-a', help='Print for all CPUs', action='store_true')
  39. ap.add_argument('--script', help='Generate shell script', action='store_true')
  40. args = ap.parse_args()
  41. eventmap = collections.defaultdict(list)
  42. def get_cpustr():
  43. cpuinfo = os.getenv("CPUINFO")
  44. if cpuinfo is None:
  45. cpuinfo = '/proc/cpuinfo'
  46. f = open(cpuinfo, 'r')
  47. cpu = [None, None, None, None]
  48. for j in f:
  49. n = j.split()
  50. if n[0] == 'vendor_id':
  51. cpu[0] = n[2]
  52. elif n[0] == 'model' and n[1] == ':':
  53. cpu[2] = int(n[2])
  54. elif n[0] == 'cpu' and n[1] == 'family':
  55. cpu[1] = int(n[3])
  56. elif n[0] == 'stepping' and n[1] == ':':
  57. cpu[3] = int(n[2])
  58. if all(v is not None for v in cpu):
  59. break
  60. # stepping for SKX only
  61. stepping = cpu[0] == "GenuineIntel" and cpu[1] == 6 and cpu[2] == 0x55
  62. if stepping:
  63. return "%s-%d-%X-%X" % tuple(cpu)
  64. return "%s-%d-%X" % tuple(cpu)[:3]
  65. def find_event(eventurl, model):
  66. print >>sys.stderr, "Downloading", eventurl
  67. u = urllib2.urlopen(eventurl)
  68. events = json.loads(u.read())
  69. u.close()
  70. found = 0
  71. for j in events:
  72. if j[u'EventName'] in target_events:
  73. event = "cpu/event=%s,umask=%s/" % (j[u'EventCode'], j[u'UMask'])
  74. if u'PEBS' in j and j[u'PEBS'] > 0:
  75. event += "p"
  76. if args.script:
  77. eventmap[event].append(model)
  78. else:
  79. print j[u'EventName'], "event for model", model, "is", event
  80. found += 1
  81. return found
  82. if not args.all:
  83. cpu = get_cpu_str()
  84. if not cpu:
  85. sys.exit("Unknown CPU type")
  86. url = baseurl + "/mapfile.csv"
  87. print >>sys.stderr, "Downloading", url
  88. u = urllib2.urlopen(url)
  89. found = 0
  90. cpufound = 0
  91. for j in u:
  92. n = j.rstrip().split(',')
  93. if len(n) >= 4 and (args.all or n[0] == cpu) and n[3] == "core":
  94. if args.all:
  95. components = n[0].split("-")
  96. model = components[2]
  97. model = int(model, 16)
  98. cpufound += 1
  99. found += find_event(baseurl + n[2], model)
  100. u.close()
  101. if args.script:
  102. print '''#!/bin/sh
  103. # Profile workload for gcc profile feedback (autofdo) using Linux perf.
  104. # Auto generated. To regenerate for new CPUs run
  105. # contrib/gen_autofdo_event.py --script --all in gcc source
  106. # usages:
  107. # gcc-auto-profile program (profile program and children)
  108. # gcc-auto-profile -a sleep X (profile all for X secs, may need root)
  109. # gcc-auto-profile -p PID sleep X (profile PID)
  110. # gcc-auto-profile --kernel -a sleep X (profile kernel)
  111. # gcc-auto-profile --all -a sleep X (profile kernel and user space)
  112. # Identify branches taken event for CPU.
  113. #
  114. FLAGS=u
  115. if [ "$1" = "--kernel" ] ; then
  116. FLAGS=k
  117. shift
  118. fi
  119. if [ "$1" = "--all" ] ; then
  120. FLAGS=uk
  121. shift
  122. fi
  123. if ! grep -q Intel /proc/cpuinfo ; then
  124. echo >&2 "Only Intel CPUs supported"
  125. exit 1
  126. fi
  127. if grep -q hypervisor /proc/cpuinfo ; then
  128. echo >&2 "Warning: branch profiling may not be functional in VMs"
  129. fi
  130. case `egrep -q "^cpu family\s*: 6" /proc/cpuinfo &&
  131. egrep "^model\s*:" /proc/cpuinfo | head -n1` in'''
  132. for event, mod in eventmap.iteritems():
  133. for m in mod[:-1]:
  134. print "model*:\ %s|\\" % m
  135. print 'model*:\ %s) E="%s$FLAGS" ;;' % (mod[-1], event)
  136. print '''*)
  137. echo >&2 "Unknown CPU. Run contrib/gen_autofdo_event.py --all --script to update script."
  138. exit 1 ;;'''
  139. print "esac"
  140. print "set -x"
  141. print 'if ! perf record -e $E -b "$@" ; then'
  142. print ' # PEBS may not actually be working even if the processor supports it'
  143. print ' # (e.g., in a virtual machine). Trying to run without /p.'
  144. print ' set +x'
  145. print ' echo >&2 "Retrying without /p."'
  146. print ' E="$(echo "${E}" | sed -e \'s/\/p/\//\')"'
  147. print ' set -x'
  148. print ' exec perf record -e $E -b "$@"'
  149. print ' set +x'
  150. print 'fi'
  151. if cpufound == 0 and not args.all:
  152. sys.exit('CPU %s not found' % cpu)
  153. if found == 0:
  154. sys.exit('Branch event not found')