From 9ab4e56e94d56848a3e883e4e1610986a523dbe5 Mon Sep 17 00:00:00 2001 From: Eric Andersen Date: Wed, 2 Aug 2006 21:19:09 +0000 Subject: Add support for mklibs (strips unused syms from shared libs such as uClibc) based on a patch from akvadrako, and using a version of mklibs.py that was massively hacked up by andersee and mjn3 for uClibc support. --- toolchain/mklibs/Config.in | 6 + toolchain/mklibs/mklibs.mk | 49 ++++ toolchain/mklibs/mklibs.py | 597 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 652 insertions(+) create mode 100644 toolchain/mklibs/Config.in create mode 100644 toolchain/mklibs/mklibs.mk create mode 100644 toolchain/mklibs/mklibs.py (limited to 'toolchain/mklibs') diff --git a/toolchain/mklibs/Config.in b/toolchain/mklibs/Config.in new file mode 100644 index 000000000..faf0ab46a --- /dev/null +++ b/toolchain/mklibs/Config.in @@ -0,0 +1,6 @@ +config BR2_MKLIBS + bool "Run mklibs on the built root filesystem" + default n + help + Recompiles all the shared libraries to only include the + symbols actually needed to run the binaries on the target diff --git a/toolchain/mklibs/mklibs.mk b/toolchain/mklibs/mklibs.mk new file mode 100644 index 000000000..b7010176a --- /dev/null +++ b/toolchain/mklibs/mklibs.mk @@ -0,0 +1,49 @@ +###################################################################### +# +# mklibs +# +###################################################################### +MKLIBS_PROGRAM:=$(STAGING_DIR)/bin/mklibs.py + +$(MKLIBS_PROGRAM): toolchain/mklibs/mklibs.py + cp -a toolchain/mklibs/mklibs.py $@ + +mklibs-clean: + rm -f $(MKLIBS_PROGRAM) + +mklibs-dirclean: + true + +############################################################# +# +# Run mklibs +# +############################################################# +MKLIBS_PYTHON:=$(shell which python) +ifeq ($(MKLIBS_PYTHON),) + MKLIBS_PYTHON=/usr/bin/python +endif + +$(STAGING_DIR)/mklibs-stamp: $(MKLIBS_PROGRAM) $(MKLIBS_PYTHON) $(STAGING_DIR)/lib/* + find $(TARGET_DIR) -type f -perm +100 -exec \ + file -r -N -F '' {} + | \ + awk ' /executable.*dynamically/ { print $$1 }' > $(STAGING_DIR)/mklibs-progs + cd $(TARGET_DIR); PATH=$(PATH):$(STAGING_DIR)/bin $(MKLIBS_PYTHON) $(MKLIBS_PROGRAM) \ + --target $(REAL_GNU_TARGET_NAME) --root $(STAGING_DIR) -d ./ \ + `cat $(STAGING_DIR)/mklibs-progs` + touch $@ + +# this empty target allows a mklibs dependeny to be included in the +# target targets, but it will be only invoked if BR2_MKLIBS is conf'ed +.PHONY: mklibs +mklibs: + +############################################################# +# +# Toplevel Makefile options +# +############################################################# + +ifeq ($(strip $(BR2_MKLIBS)),y) +mklibs: $(STAGING_DIR)/mklibs-stamp +endif diff --git a/toolchain/mklibs/mklibs.py b/toolchain/mklibs/mklibs.py new file mode 100644 index 000000000..a84fd42fa --- /dev/null +++ b/toolchain/mklibs/mklibs.py @@ -0,0 +1,597 @@ +#! /usr/bin/python + +# mklibs.py: An automated way to create a minimal /lib/ directory. +# +# Copyright 2001 by Falk Hueffner +# & Goswin Brederlow +# +# mklibs.sh by Marcus Brinkmann +# used as template +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +# HOW IT WORKS +# +# - Gather all unresolved symbols and libraries needed by the programs +# and reduced libraries +# - Gather all symbols provided by the already reduced libraries +# (none on the first pass) +# - If all symbols are provided we are done +# - go through all libraries and remember what symbols they provide +# - go through all unresolved/needed symbols and mark them as used +# - for each library: +# - find pic file (if not present copy and strip the so) +# - compile in only used symbols +# - strip +# - back to the top + +# TODO +# * complete argument parsing as given as comment in main + +import commands +import string +import re +import sys +import os +import glob +import getopt +from stat import * + +DEBUG_NORMAL = 1 +DEBUG_VERBOSE = 2 +DEBUG_SPAM = 3 + +debuglevel = DEBUG_NORMAL + +def debug(level, *msg): + if debuglevel >= level: + print string.join(msg) + +# A simple set class. It should be replaced with the standard sets.Set +# type as soon as Python 2.3 is out. +class Set: + def __init__(self): + self.__dict = {} + + def add(self, obj): + self.__dict[obj] = 1 + + def contains(self, obj): + return self.__dict.has_key(obj) + + def merge(self, s): + for e in s.elems(): + self.add(e) + + def elems(self): + return self.__dict.keys() + + def size(self): + return len(self.__dict) + + def __eq__(self, other): + return self.__dict == other.__dict + + def __str__(self): + return `self.__dict.keys()` + + def __repr__(self): + return `self.__dict.keys()` + +# return a list of lines of output of the command +def command(command, *args): + debug(DEBUG_SPAM, "calling", command, string.join(args)) + (status, output) = commands.getstatusoutput(command + ' ' + string.join(args)) + if os.WEXITSTATUS(status) != 0: + print "Command failed with status", os.WEXITSTATUS(status), ":", \ + command, string.join(args) + print "With output:", output + sys.exit(1) + return string.split(output, '\n') + +# Filter a list according to a regexp containing a () group. Return +# a Set. +def regexpfilter(list, regexp, groupnr = 1): + pattern = re.compile(regexp) + result = Set() + for x in list: + match = pattern.match(x) + if match: + result.add(match.group(groupnr)) + + return result + +# Return a Set of rpath strings for the passed object +def rpath(obj): + if not os.access(obj, os.F_OK): + raise "Cannot find lib: " + obj + output = command(target + "objdump", "--private-headers", obj) + return map(lambda x: root + "/" + x, regexpfilter(output, ".*RPATH\s*(\S+)$").elems()) + +# Return a Set of libraries the passed objects depend on. +def library_depends(obj): + if not os.access(obj, os.F_OK): + raise "Cannot find lib: " + obj + output = command(target + "objdump", "--private-headers", obj) + return regexpfilter(output, ".*NEEDED\s*(\S+)$") + +# Return a list of libraries the passed objects depend on. The +# libraries are in "-lfoo" format suitable for passing to gcc. +def library_depends_gcc_libnames(obj): + if not os.access(obj, os.F_OK): + raise "Cannot find lib: " + obj + output = command(target + "objdump", "--private-headers", obj) + output = regexpfilter(output, ".*NEEDED\s*lib(\S+)\.so.*$") + if not output.elems(): + return "" + else: + return "-l" + string.join(output.elems(), " -l") + +# Scan readelf output. Example: +# Num: Value Size Type Bind Vis Ndx Name +# 1: 000000012002ab48 168 FUNC GLOBAL DEFAULT UND strchr@GLIBC_2.0 (2) +symline_regexp = \ + re.compile("\s*\d+: .+\s+\d+\s+\w+\s+(\w+)+\s+\w+\s+(\w+)\s+([^\s@]+)") + +# Return undefined symbols in an object as a Set of tuples (name, weakness) +def undefined_symbols(obj): + if not os.access(obj, os.F_OK): + raise "Cannot find lib" + obj + + result = Set() + output = command(target + "readelf", "-s", "-W", obj) + for line in output: + match = symline_regexp.match(line) + if match: + bind, ndx, name = match.groups() + if ndx == "UND": + result.add((name, bind == "WEAK")) + return result + +# Return a Set of symbols provided by a library +def provided_symbols(obj): + if not os.access(obj, os.F_OK): + raise "Cannot find lib" + obj + + result = Set() + debug(DEBUG_SPAM, "provided_symbols result = ", `result`) + output = command(target + "readelf", "-s", "-W", obj) + for line in output: + match = symline_regexp.match(line) + if match: + bind, ndx, name = match.groups() + if bind != "LOCAL" and not ndx in ("UND", "ABS"): + debug(DEBUG_SPAM, "provided_symbols adding ", `name`) + result.add(name) + return result + +# Return real target of a symlink +def resolve_link(file): + debug(DEBUG_SPAM, "resolving", file) + while S_ISLNK(os.lstat(file)[ST_MODE]): + new_file = os.readlink(file) + if new_file[0] != "/": + file = os.path.join(os.path.dirname(file), new_file) + else: + file = new_file + debug(DEBUG_SPAM, "resolved to", file) + return file + +# Find complete path of a library, by searching in lib_path +def find_lib(lib): + for path in lib_path: + if os.access(path + "/" + lib, os.F_OK): + return path + "/" + lib + + return "" + +# Find a PIC archive for the library +def find_pic(lib): + base_name = so_pattern.match(lib).group(1) + for path in lib_path: + for file in glob.glob(path + "/" + base_name + "_pic.a"): + if os.access(file, os.F_OK): + return resolve_link(file) + return "" + +# Find a PIC .map file for the library +def find_pic_map(lib): + base_name = so_pattern.match(lib).group(1) + for path in lib_path: + for file in glob.glob(path + "/" + base_name + "_pic.map"): + if os.access(file, os.F_OK): + return resolve_link(file) + return "" + +def extract_soname(so_file): + soname_data = regexpfilter(command(target + "readelf", "--all", "-W", so_file), + ".*SONAME.*\[(.*)\].*") + if soname_data.elems(): + return soname_data.elems()[0] + + return "" +def usage(was_err): + if was_err: + outfd = sys.stderr + else: + outfd = sys.stdout + print >> outfd, "Usage: mklibs [OPTION]... -d DEST FILE ..." + print >> outfd, "Make a set of minimal libraries for FILE(s) in DEST." + print >> outfd, "" + print >> outfd, " -d, --dest-dir DIRECTORY create libraries in DIRECTORY" + print >> outfd, " -D, --no-default-lib omit default libpath (", string.join(default_lib_path, " : "), ")" + print >> outfd, " -L DIRECTORY[:DIRECTORY]... add DIRECTORY(s) to the library search path" + print >> outfd, " --ldlib LDLIB use LDLIB for the dynamic linker" + print >> outfd, " --libc-extras-dir DIRECTORY look for libc extra files in DIRECTORY" + # Ugh... Adding the trailing '-' breaks common practice. + #print >> outfd, " --target TARGET prepend TARGET- to the gcc and binutils calls" + print >> outfd, " --target TARGET prepend TARGET to the gcc and binutils calls" + print >> outfd, " --root ROOT search in ROOT for library rpaths" + print >> outfd, " -v, --verbose explain what is being done" + print >> outfd, " -h, --help display this help and exit" + sys.exit(was_err) + +def version(vers): + print "mklibs: version ",vers + print "" + +#################### main #################### +## Usage: ./mklibs.py [OPTION]... -d DEST FILE ... +## Make a set of minimal libraries for FILE ... in directory DEST. +## +## Options: +## -L DIRECTORY Add DIRECTORY to library search path. +## -D, --no-default-lib Do not use default lib directories of /lib:/usr/lib +## -n, --dry-run Don't actually run any commands; just print them. +## -v, --verbose Print additional progress information. +## -V, --version Print the version number and exit. +## -h, --help Print this help and exit. +## --ldlib Name of dynamic linker (overwrites environment variable ldlib) +## --libc-extras-dir Directory for libc extra files +## --target Use as prefix for gcc or binutils calls +## +## -d, --dest-dir DIRECTORY Create libraries in DIRECTORY. +## +## Required arguments for long options are also mandatory for the short options. + +# Clean the environment +vers="0.12 with uClibc fixes" +os.environ['LC_ALL'] = "C" + +# Argument parsing +opts = "L:DnvVhd:r:" +longopts = ["no-default-lib", "dry-run", "verbose", "version", "help", + "dest-dir=", "ldlib=", "libc-extras-dir=", "target=", "root="] + +# some global variables +lib_rpath = [] +lib_path = [] +dest_path = "DEST" +ldlib = "LDLIB" +include_default_lib_path = "yes" +default_lib_path = ["/lib/", "/usr/lib/", "/usr/X11R6/lib/"] +libc_extras_dir = "/usr/lib/libc_pic" +target = "" +root = "" +so_pattern = re.compile("((lib|ld).*)\.so(\..+)*") +script_pattern = re.compile("^#!\s*/") + +try: + optlist, proglist = getopt.getopt(sys.argv[1:], opts, longopts) +except getopt.GetoptError, msg: + print >> sys.stderr, msg + usage(1) + +for opt, arg in optlist: + if opt in ("-v", "--verbose"): + if debuglevel < DEBUG_SPAM: + debuglevel = debuglevel + 1 + elif opt == "-L": + lib_path.extend(string.split(arg, ":")) + elif opt in ("-d", "--dest-dir"): + dest_path = arg + elif opt in ("-D", "--no-default-lib"): + include_default_lib_path = "no" + elif opt == "--ldlib": + ldlib = arg + elif opt == "--libc-extras-dir": + libc_extras_dir = arg + elif opt == "--target": + #target = arg + "-" + target = arg + elif opt in ("-r", "--root"): + root = arg + elif opt in ("--help", "-h"): + usage(0) + sys.exit(0) + elif opt in ("--version", "-V"): + version(vers) + sys.exit(0) + else: + print "WARNING: unknown option: " + opt + "\targ: " + arg + +if include_default_lib_path == "yes": + lib_path.extend(default_lib_path) + +if ldlib == "LDLIB": + ldlib = os.getenv("ldlib") + +objects = {} # map from inode to filename +for prog in proglist: + inode = os.stat(prog)[ST_INO] + if objects.has_key(inode): + debug(DEBUG_SPAM, prog, "is a hardlink to", objects[inode]) + elif so_pattern.match(prog): + debug(DEBUG_SPAM, prog, "is a library") + elif script_pattern.match(open(prog).read(256)): + debug(DEBUG_SPAM, prog, "is a script") + else: + objects[inode] = prog + +if not ldlib: + pattern = re.compile(".*Requesting program interpreter:.*/([^\]/]+).*") + for obj in objects.values(): + output = command(target + "readelf", "--program-headers", obj) + for x in output: + match = pattern.match(x) + if match: + ldlib = match.group(1) + break + if ldlib: + break + +if not ldlib: + sys.exit("E: Dynamic linker not found, aborting.") + +debug(DEBUG_NORMAL, "I: Using", ldlib, "as dynamic linker.") + +pattern = re.compile(".*ld-uClibc.*"); +if pattern.match(ldlib): + uclibc = 1 +else: + uclibc = 0 + +# Check for rpaths +for obj in objects.values(): + rpath_val = rpath(obj) + if rpath_val: + if root: + if debuglevel >= DEBUG_VERBOSE: + print "Adding rpath " + string.join(rpath_val, ":") + " for " + obj + lib_rpath.extend(rpath_val) + else: + print "warning: " + obj + " may need rpath, but --root not specified" + +lib_path.extend(lib_rpath) + +passnr = 1 +previous_pass_unresolved = Set() +while 1: + debug(DEBUG_NORMAL, "I: library reduction pass", `passnr`) + if debuglevel >= DEBUG_VERBOSE: + print "Objects:", + for obj in objects.values(): + print obj[string.rfind(obj, '/') + 1:], + print + + passnr = passnr + 1 + # Gather all already reduced libraries and treat them as objects as well + small_libs = [] + for lib in regexpfilter(os.listdir(dest_path), "(.*-so-stripped)$").elems(): + obj = dest_path + "/" + lib + small_libs.append(obj) + inode = os.stat(obj)[ST_INO] + if objects.has_key(inode): + debug(DEBUG_SPAM, obj, "is hardlink to", objects[inode]) + else: + objects[inode] = obj + + # DEBUG + for obj in objects.values(): + small_libs.append(obj) + debug(DEBUG_VERBOSE, "Object:", obj) + + # calculate what symbols and libraries are needed + needed_symbols = Set() # Set of (name, weakness-flag) + libraries = Set() + for obj in objects.values(): + needed_symbols.merge(undefined_symbols(obj)) + libraries.merge(library_depends(obj)) + + # FIXME: on i386 this is undefined but not marked UND + # I don't know how to detect those symbols but this seems + # to be the only one and including it on alpha as well + # doesn't hurt. I guess all archs can live with this. + needed_symbols.add(("sys_siglist", 1)) + + # calculate what symbols are present in small_libs + present_symbols = Set() + for lib in small_libs: + present_symbols.merge(provided_symbols(lib)) + + # are we finished? + using_ctor_dtor = 0 + num_unresolved = 0 + present_symbols_elems = present_symbols.elems() + unresolved = Set() + for (symbol, is_weak) in needed_symbols.elems(): + if not symbol in present_symbols_elems: + debug(DEBUG_SPAM, "Still need:", symbol, `is_weak`) + unresolved.add((symbol, is_weak)) + num_unresolved = num_unresolved + 1 + + debug (DEBUG_NORMAL, `needed_symbols.size()`, "symbols,", + `num_unresolved`, "unresolved") + + if num_unresolved == 0: + break + + if unresolved == previous_pass_unresolved: + # No progress in last pass. Verify all remaining symbols are weak. + for (symbol, is_weak) in unresolved.elems(): + if not is_weak: + raise "Unresolvable symbol " + symbol + break + + previous_pass_unresolved = unresolved + + library_symbols = {} + library_symbols_used = {} + symbol_provider = {} + + # Calculate all symbols each library provides + for library in libraries.elems(): + path = find_lib(library) + if not path: + sys.exit("Library not found: " + library + " in path: " + + string.join(lib_path, " : ")) + symbols = provided_symbols(path) + library_symbols[library] = Set() + library_symbols_used[library] = Set() + for symbol in symbols.elems(): + if symbol_provider.has_key(symbol): + # in doubt, prefer symbols from libc + if re.match("^libc[\.-]", library): + library_symbols[library].add(symbol) + symbol_provider[symbol] = library + else: + debug(DEBUG_SPAM, "duplicate symbol", symbol, "in", + symbol_provider[symbol], "and", library) + else: + library_symbols[library].add(symbol) + symbol_provider[symbol] = library + + # Fixup support for constructors and destructors + if symbol_provider.has_key("_init"): + debug(DEBUG_VERBOSE, library, ": Library has a constructor!"); + using_ctor_dtor = 1 + library_symbols[library].add("_init") + symbol_provider["_init"] = library + library_symbols_used[library].add("_init") + + if symbol_provider.has_key("_fini"): + debug(DEBUG_VERBOSE, library, ": Library has a destructor!"); + using_ctor_dtor = 1 + library_symbols[library].add("_fini") + symbol_provider["_fini"] = library + library_symbols_used[library].add("_fini") + + # which symbols are actually used from each lib + for (symbol, is_weak) in needed_symbols.elems(): + if not symbol_provider.has_key(symbol): + if not is_weak: + if not uclibc or (symbol != "main"): + raise "No library provides non-weak " + symbol + else: + lib = symbol_provider[symbol] + library_symbols_used[lib].add(symbol) + + # reduce libraries + for library in libraries.elems(): + debug(DEBUG_VERBOSE, "reducing", library) + debug(DEBUG_SPAM, "using: " + string.join(library_symbols_used[library].elems())) + so_file = find_lib(library) + if root and (re.compile("^" + root).search(so_file)): + debug(DEBUG_VERBOSE, "no action required for " + so_file) + continue + so_file_name = os.path.basename(so_file) + if not so_file: + sys.exit("File not found:" + library) + pic_file = find_pic(library) + if not pic_file: + # No pic file, so we have to use the .so file, no reduction + debug(DEBUG_VERBOSE, "No pic file found for", so_file, "; copying") + command(target + "objcopy", "--strip-unneeded -R .note -R .comment", + so_file, dest_path + "/" + so_file_name + "-so-stripped") + else: + # we have a pic file, recompile + debug(DEBUG_SPAM, "extracting from:", pic_file, "so_file:", so_file) + soname = extract_soname(so_file) + if soname == "": + debug(DEBUG_VERBOSE, so_file, " has no soname, copying") + continue + debug(DEBUG_SPAM, "soname:", soname) + base_name = so_pattern.match(library).group(1) + # libc needs its soinit.o and sofini.o as well as the pic + if (base_name == "libc") and not uclibc: + # force dso_handle.os to be included, otherwise reduced libc + # may segfault in ptmalloc_init due to undefined weak reference + extra_flags = find_lib(ldlib) + " -u __dso_handle" + extra_pre_obj = libc_extras_dir + "/soinit.o" + extra_post_obj = libc_extras_dir + "/sofini.o" + else: + extra_flags = "" + extra_pre_obj = "" + extra_post_obj = "" + map_file = find_pic_map(library) + if map_file: + extra_flags = extra_flags + " -Wl,--version-script=" + map_file + if library_symbols_used[library].elems(): + joined_symbols = "-u" + string.join(library_symbols_used[library].elems(), " -u") + else: + joined_symbols = "" + if using_ctor_dtor == 1: + extra_flags = extra_flags + " -shared" + # compile in only used symbols + command(target + "gcc", + "-nostdlib -nostartfiles -shared -Wl,-soname=" + soname,\ + joined_symbols, \ + "-o", dest_path + "/" + so_file_name + "-so", \ + extra_pre_obj, \ + pic_file, \ + extra_post_obj, \ + extra_flags, \ + "-lgcc -L", dest_path, \ + "-L" + string.join(lib_path, " -L"), \ + library_depends_gcc_libnames(so_file)) + # strip result + command(target + "objcopy", "--strip-unneeded -R .note -R .comment", + dest_path + "/" + so_file_name + "-so", + dest_path + "/" + so_file_name + "-so-stripped") + ## DEBUG + debug(DEBUG_VERBOSE, so_file, "\t", `os.stat(so_file)[ST_SIZE]`) + debug(DEBUG_VERBOSE, dest_path + "/" + so_file_name + "-so", "\t", + `os.stat(dest_path + "/" + so_file_name + "-so")[ST_SIZE]`) + debug(DEBUG_VERBOSE, dest_path + "/" + so_file_name + "-so-stripped", + "\t", `os.stat(dest_path + "/" + so_file_name + "-so-stripped")[ST_SIZE]`) + +# Finalising libs and cleaning up +for lib in regexpfilter(os.listdir(dest_path), "(.*)-so-stripped$").elems(): + os.rename(dest_path + "/" + lib + "-so-stripped", dest_path + "/" + lib) +for lib in regexpfilter(os.listdir(dest_path), "(.*-so)$").elems(): + os.remove(dest_path + "/" + lib) + +# Canonicalize library names. +for lib in regexpfilter(os.listdir(dest_path), "(.*so[.\d]*)$").elems(): + this_lib_path = dest_path + "/" + lib + if os.path.islink(this_lib_path): + debug(DEBUG_VERBOSE, "Unlinking %s." % lib) + os.remove(this_lib_path) + continue + soname = extract_soname(this_lib_path) + if soname: + debug(DEBUG_VERBOSE, "Moving %s to %s." % (lib, soname)) + os.rename(dest_path + "/" + lib, dest_path + "/" + soname) + +# Make sure the dynamic linker is present and is executable +ld_file = find_lib(ldlib) +ld_file_name = os.path.basename(ld_file) + +if not os.access(dest_path + "/" + ld_file_name, os.F_OK): + debug(DEBUG_NORMAL, "I: stripping and copying dynamic linker.") + command(target + "objcopy", "--strip-unneeded -R .note -R .comment", + ld_file, dest_path + "/" + ld_file_name) + +os.chmod(dest_path + "/" + ld_file_name, 0755) -- cgit v1.2.3