123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175 |
- #!/usr/bin/python
- # Generate Intel taken branches Linux perf event script for autofdo profiling.
- # Copyright (C) 2016 Free Software Foundation, Inc.
- #
- # GCC is free software; you can redistribute it and/or modify it under
- # the terms of the GNU General Public License as published by the Free
- # Software Foundation; either version 3, or (at your option) any later
- # version.
- #
- # GCC is distributed in the hope that it will be useful, but WITHOUT ANY
- # WARRANTY; without even the implied warranty of MERCHANTABILITY or
- # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- # for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with GCC; see the file COPYING3. If not see
- # <http://www.gnu.org/licenses/>. */
- # Run it with perf record -b -e EVENT program ...
- # The Linux Kernel needs to support the PMU of the current CPU, and
- # It will likely not work in VMs.
- # Add --all to print for all cpus, otherwise for current cpu.
- # Add --script to generate shell script to run correct event.
- #
- # Requires internet (https) access. This may require setting up a proxy
- # with export https_proxy=...
- #
- import urllib2
- import sys
- import json
- import argparse
- import collections
- baseurl = "https://download.01.org/perfmon"
- target_events = (u'BR_INST_RETIRED.NEAR_TAKEN',
- u'BR_INST_EXEC.TAKEN',
- u'BR_INST_RETIRED.TAKEN_JCC',
- u'BR_INST_TYPE_RETIRED.COND_TAKEN')
- ap = argparse.ArgumentParser()
- ap.add_argument('--all', '-a', help='Print for all CPUs', action='store_true')
- ap.add_argument('--script', help='Generate shell script', action='store_true')
- args = ap.parse_args()
- eventmap = collections.defaultdict(list)
- def get_cpustr():
- cpuinfo = os.getenv("CPUINFO")
- if cpuinfo is None:
- cpuinfo = '/proc/cpuinfo'
- f = open(cpuinfo, 'r')
- cpu = [None, None, None, None]
- for j in f:
- n = j.split()
- if n[0] == 'vendor_id':
- cpu[0] = n[2]
- elif n[0] == 'model' and n[1] == ':':
- cpu[2] = int(n[2])
- elif n[0] == 'cpu' and n[1] == 'family':
- cpu[1] = int(n[3])
- elif n[0] == 'stepping' and n[1] == ':':
- cpu[3] = int(n[2])
- if all(v is not None for v in cpu):
- break
- # stepping for SKX only
- stepping = cpu[0] == "GenuineIntel" and cpu[1] == 6 and cpu[2] == 0x55
- if stepping:
- return "%s-%d-%X-%X" % tuple(cpu)
- return "%s-%d-%X" % tuple(cpu)[:3]
- def find_event(eventurl, model):
- print >>sys.stderr, "Downloading", eventurl
- u = urllib2.urlopen(eventurl)
- events = json.loads(u.read())
- u.close()
- found = 0
- for j in events:
- if j[u'EventName'] in target_events:
- event = "cpu/event=%s,umask=%s/" % (j[u'EventCode'], j[u'UMask'])
- if u'PEBS' in j and j[u'PEBS'] > 0:
- event += "p"
- if args.script:
- eventmap[event].append(model)
- else:
- print j[u'EventName'], "event for model", model, "is", event
- found += 1
- return found
- if not args.all:
- cpu = get_cpu_str()
- if not cpu:
- sys.exit("Unknown CPU type")
- url = baseurl + "/mapfile.csv"
- print >>sys.stderr, "Downloading", url
- u = urllib2.urlopen(url)
- found = 0
- cpufound = 0
- for j in u:
- n = j.rstrip().split(',')
- if len(n) >= 4 and (args.all or n[0] == cpu) and n[3] == "core":
- if args.all:
- components = n[0].split("-")
- model = components[2]
- model = int(model, 16)
- cpufound += 1
- found += find_event(baseurl + n[2], model)
- u.close()
- if args.script:
- print '''#!/bin/sh
- # Profile workload for gcc profile feedback (autofdo) using Linux perf.
- # Auto generated. To regenerate for new CPUs run
- # contrib/gen_autofdo_event.py --script --all in gcc source
- # usages:
- # gcc-auto-profile program (profile program and children)
- # gcc-auto-profile -a sleep X (profile all for X secs, may need root)
- # gcc-auto-profile -p PID sleep X (profile PID)
- # gcc-auto-profile --kernel -a sleep X (profile kernel)
- # gcc-auto-profile --all -a sleep X (profile kernel and user space)
- # Identify branches taken event for CPU.
- #
- FLAGS=u
- if [ "$1" = "--kernel" ] ; then
- FLAGS=k
- shift
- fi
- if [ "$1" = "--all" ] ; then
- FLAGS=uk
- shift
- fi
- if ! grep -q Intel /proc/cpuinfo ; then
- echo >&2 "Only Intel CPUs supported"
- exit 1
- fi
- if grep -q hypervisor /proc/cpuinfo ; then
- echo >&2 "Warning: branch profiling may not be functional in VMs"
- fi
- case `egrep -q "^cpu family\s*: 6" /proc/cpuinfo &&
- egrep "^model\s*:" /proc/cpuinfo | head -n1` in'''
- for event, mod in eventmap.iteritems():
- for m in mod[:-1]:
- print "model*:\ %s|\\" % m
- print 'model*:\ %s) E="%s$FLAGS" ;;' % (mod[-1], event)
- print '''*)
- echo >&2 "Unknown CPU. Run contrib/gen_autofdo_event.py --all --script to update script."
- exit 1 ;;'''
- print "esac"
- print "set -x"
- print 'if ! perf record -e $E -b "$@" ; then'
- print ' # PEBS may not actually be working even if the processor supports it'
- print ' # (e.g., in a virtual machine). Trying to run without /p.'
- print ' set +x'
- print ' echo >&2 "Retrying without /p."'
- print ' E="$(echo "${E}" | sed -e \'s/\/p/\//\')"'
- print ' set -x'
- print ' exec perf record -e $E -b "$@"'
- print ' set +x'
- print 'fi'
- if cpufound == 0 and not args.all:
- sys.exit('CPU %s not found' % cpu)
- if found == 0:
- sys.exit('Branch event not found')
|