diff options
author | V3n3RiX <venerix@redcorelinux.org> | 2017-10-09 18:53:29 +0100 |
---|---|---|
committer | V3n3RiX <venerix@redcorelinux.org> | 2017-10-09 18:53:29 +0100 |
commit | 4f2d7949f03e1c198bc888f2d05f421d35c57e21 (patch) | |
tree | ba5f07bf3f9d22d82e54a462313f5d244036c768 /eclass/multiprocessing.eclass |
reinit the tree, so we can have metadata
Diffstat (limited to 'eclass/multiprocessing.eclass')
-rw-r--r-- | eclass/multiprocessing.eclass | 313 |
1 files changed, 313 insertions, 0 deletions
diff --git a/eclass/multiprocessing.eclass b/eclass/multiprocessing.eclass new file mode 100644 index 000000000000..b6e92976f73e --- /dev/null +++ b/eclass/multiprocessing.eclass @@ -0,0 +1,313 @@ +# Copyright 1999-2014 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 + +# @ECLASS: multiprocessing.eclass +# @MAINTAINER: +# base-system@gentoo.org +# @AUTHOR: +# Brian Harring <ferringb@gentoo.org> +# Mike Frysinger <vapier@gentoo.org> +# @BLURB: parallelization with bash (wtf?) +# @DESCRIPTION: +# The multiprocessing eclass contains a suite of functions that allow ebuilds +# to quickly run things in parallel using shell code. +# +# It has two modes: pre-fork and post-fork. If you don't want to dive into any +# more nuts & bolts, just use the pre-fork mode. For main threads that mostly +# spawn children and then wait for them to finish, use the pre-fork mode. For +# main threads that do a bit of processing themselves, use the post-fork mode. +# You may mix & match them for longer computation loops. +# @EXAMPLE: +# +# @CODE +# # First initialize things: +# multijob_init +# +# # Then hash a bunch of files in parallel: +# for n in {0..20} ; do +# multijob_child_init md5sum data.${n} > data.${n} +# done +# +# # Then wait for all the children to finish: +# multijob_finish +# @CODE + +if [[ -z ${_MULTIPROCESSING_ECLASS} ]]; then +_MULTIPROCESSING_ECLASS=1 + +# @FUNCTION: bashpid +# @DESCRIPTION: +# Return the process id of the current sub shell. This is to support bash +# versions older than 4.0 that lack $BASHPID support natively. Simply do: +# echo ${BASHPID:-$(bashpid)} +# +# Note: Using this func in any other way than the one above is not supported. +bashpid() { + # Running bashpid plainly will return incorrect results. This func must + # be run in a subshell of the current subshell to get the right pid. + # i.e. This will show the wrong value: + # bashpid + # But this will show the right value: + # (bashpid) + sh -c 'echo ${PPID}' +} + +# @FUNCTION: get_nproc +# @USAGE: [${fallback:-1}] +# @DESCRIPTION: +# Attempt to figure out the number of processing units available. +# If the value can not be determined, prints the provided fallback +# instead. If no fallback is provided, defaults to 1. +get_nproc() { + local nproc + + # GNU + if type -P nproc &>/dev/null; then + nproc=$(nproc) + fi + + # BSD + if [[ -z ${nproc} ]] && type -P sysctl &>/dev/null; then + nproc=$(sysctl -n hw.ncpu 2>/dev/null) + fi + + # fallback to python2.6+ + # note: this may fail (raise NotImplementedError) + if [[ -z ${nproc} ]] && type -P python &>/dev/null; then + nproc=$(python -c 'import multiprocessing; print(multiprocessing.cpu_count());' 2>/dev/null) + fi + + if [[ -n ${nproc} ]]; then + echo "${nproc}" + else + echo "${1:-1}" + fi +} + +# @FUNCTION: makeopts_jobs +# @USAGE: [${MAKEOPTS}] [${inf:-999}] +# @DESCRIPTION: +# Searches the arguments (defaults to ${MAKEOPTS}) and extracts the jobs number +# specified therein. Useful for running non-make tools in parallel too. +# i.e. if the user has MAKEOPTS=-j9, this will echo "9" -- we can't return the +# number as bash normalizes it to [0, 255]. If the flags haven't specified a +# -j flag, then "1" is shown as that is the default `make` uses. Since there's +# no way to represent infinity, we return ${inf} (defaults to 999) if the user +# has -j without a number. +makeopts_jobs() { + [[ $# -eq 0 ]] && set -- "${MAKEOPTS}" + # This assumes the first .* will be more greedy than the second .* + # since POSIX doesn't specify a non-greedy match (i.e. ".*?"). + local jobs=$(echo " $* " | sed -r -n \ + -e 's:.*[[:space:]](-[a-z]*j|--jobs[=[:space:]])[[:space:]]*([0-9]+).*:\2:p' \ + -e "s:.*[[:space:]](-[a-z]*j|--jobs)[[:space:]].*:${2:-999}:p") + echo ${jobs:-1} +} + +# @FUNCTION: makeopts_loadavg +# @USAGE: [${MAKEOPTS}] [${inf:-999}] +# @DESCRIPTION: +# Searches the arguments (defaults to ${MAKEOPTS}) and extracts the value set +# for load-average. For make and ninja based builds this will mean new jobs are +# not only limited by the jobs-value, but also by the current load - which might +# get excessive due to I/O and not just due to CPU load. +# Be aware that the returned number might be a floating-point number. Test +# whether your software supports that. +# If no limit is specified or --load-average is used without a number, ${inf} +# (defaults to 999) is returned. +makeopts_loadavg() { + [[ $# -eq 0 ]] && set -- "${MAKEOPTS}" + # This assumes the first .* will be more greedy than the second .* + # since POSIX doesn't specify a non-greedy match (i.e. ".*?"). + local lavg=$(echo " $* " | sed -r -n \ + -e 's:.*[[:space:]](-[a-z]*l|--(load-average|max-load)[=[:space:]])[[:space:]]*([0-9]+(\.[0-9]+)?)[[:space:]].*:\3:p' \ + -e "s:.*[[:space:]](-[a-z]*l|--(load-average|max-load))[[:space:]].*:${2:-999}:p") + # Default to ${inf} since the default is to not use a load limit. + echo ${lavg:-${2:-999}} +} + +# @FUNCTION: multijob_init +# @USAGE: [${MAKEOPTS}] +# @DESCRIPTION: +# Setup the environment for executing code in parallel. +# You must call this before any other multijob function. +multijob_init() { + # When something goes wrong, try to wait for all the children so we + # don't leave any zombies around. + has wait ${EBUILD_DEATH_HOOKS} || EBUILD_DEATH_HOOKS+=" wait " + + # Setup a pipe for children to write their pids to when they finish. + # We have to allocate two fd's because POSIX has undefined behavior + # when using one single fd for both read and write. #487056 + # However, opening an fd for read or write only will block until the + # opposite end is opened as well. Thus we open the first fd for both + # read and write to not block ourselve, but use it for reading only. + # The second fd really is opened for write only, as Cygwin supports + # just one single read fd per FIFO. #583962 + local pipe="${T}/multijob.pipe" + mkfifo -m 600 "${pipe}" + redirect_alloc_fd mj_read_fd "${pipe}" + redirect_alloc_fd mj_write_fd "${pipe}" '>' + rm -f "${pipe}" + + # See how many children we can fork based on the user's settings. + mj_max_jobs=$(makeopts_jobs "$@") + mj_num_jobs=0 +} + +# @FUNCTION: multijob_child_init +# @USAGE: [--pre|--post] [command to run in background] +# @DESCRIPTION: +# This function has two forms. You can use it to execute a simple command +# in the background (and it takes care of everything else), or you must +# call this first thing in your forked child process. +# +# The --pre/--post options allow you to select the child generation mode. +# +# @CODE +# # 1st form: pass the command line as arguments: +# multijob_child_init ls /dev +# # Or if you want to use pre/post fork modes: +# multijob_child_init --pre ls /dev +# multijob_child_init --post ls /dev +# +# # 2nd form: execute multiple stuff in the background (post fork): +# ( +# multijob_child_init +# out=`ls` +# if echo "${out}" | grep foo ; then +# echo "YEAH" +# fi +# ) & +# multijob_post_fork +# +# # 2nd form: execute multiple stuff in the background (pre fork): +# multijob_pre_fork +# ( +# multijob_child_init +# out=`ls` +# if echo "${out}" | grep foo ; then +# echo "YEAH" +# fi +# ) & +# @CODE +multijob_child_init() { + local mode="pre" + case $1 in + --pre) mode="pre" ; shift ;; + --post) mode="post"; shift ;; + esac + + if [[ $# -eq 0 ]] ; then + trap 'echo ${BASHPID:-$(bashpid)} $? >&'${mj_write_fd} EXIT + trap 'exit 1' INT TERM + else + local ret + [[ ${mode} == "pre" ]] && { multijob_pre_fork; ret=$?; } + ( multijob_child_init ; "$@" ) & + [[ ${mode} == "post" ]] && { multijob_post_fork; ret=$?; } + return ${ret} + fi +} + +# @FUNCTION: _multijob_fork +# @INTERNAL +# @DESCRIPTION: +# Do the actual book keeping. +_multijob_fork() { + [[ $# -eq 1 ]] || die "incorrect number of arguments" + + local ret=0 + [[ $1 == "post" ]] && : $(( ++mj_num_jobs )) + if [[ ${mj_num_jobs} -ge ${mj_max_jobs} ]] ; then + multijob_finish_one + ret=$? + fi + [[ $1 == "pre" ]] && : $(( ++mj_num_jobs )) + return ${ret} +} + +# @FUNCTION: multijob_pre_fork +# @DESCRIPTION: +# You must call this in the parent process before forking a child process. +# If the parallel limit has been hit, it will wait for one child to finish +# and return its exit status. +multijob_pre_fork() { _multijob_fork pre "$@" ; } + +# @FUNCTION: multijob_post_fork +# @DESCRIPTION: +# You must call this in the parent process after forking a child process. +# If the parallel limit has been hit, it will wait for one child to finish +# and return its exit status. +multijob_post_fork() { _multijob_fork post "$@" ; } + +# @FUNCTION: multijob_finish_one +# @DESCRIPTION: +# Wait for a single process to exit and return its exit code. +multijob_finish_one() { + [[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments" + + local pid ret + read -r -u ${mj_read_fd} pid ret || die + : $(( --mj_num_jobs )) + return ${ret} +} + +# @FUNCTION: multijob_finish +# @DESCRIPTION: +# Wait for all pending processes to exit and return the bitwise or +# of all their exit codes. +multijob_finish() { + local ret=0 + while [[ ${mj_num_jobs} -gt 0 ]] ; do + multijob_finish_one + : $(( ret |= $? )) + done + # Let bash clean up its internal child tracking state. + wait + + # Do this after reaping all the children. + [[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments" + + # No need to hook anymore. + EBUILD_DEATH_HOOKS=${EBUILD_DEATH_HOOKS/ wait / } + + return ${ret} +} + +# @FUNCTION: redirect_alloc_fd +# @USAGE: <var> <file> [redirection] +# @DESCRIPTION: +# Find a free fd and redirect the specified file via it. Store the new +# fd in the specified variable. Useful for the cases where we don't care +# about the exact fd #. +redirect_alloc_fd() { + local var=$1 file=$2 redir=${3:-"<>"} + + # Make sure /dev/fd is sane on Linux hosts. #479656 + if [[ ! -L /dev/fd && ${CBUILD} == *linux* ]] ; then + eerror "You're missing a /dev/fd symlink to /proc/self/fd." + eerror "Please fix the symlink and check your boot scripts (udev/etc...)." + die "/dev/fd is broken" + fi + + if [[ $(( (BASH_VERSINFO[0] << 8) + BASH_VERSINFO[1] )) -ge $(( (4 << 8) + 1 )) ]] ; then + # Newer bash provides this functionality. + eval "exec {${var}}${redir}'${file}'" + else + # Need to provide the functionality ourselves. + local fd=10 + while :; do + # Make sure the fd isn't open. It could be a char device, + # or a symlink (possibly broken) to something else. + if [[ ! -e /dev/fd/${fd} ]] && [[ ! -L /dev/fd/${fd} ]] ; then + eval "exec ${fd}${redir}'${file}'" && break + fi + [[ ${fd} -gt 1024 ]] && die 'could not locate a free temp fd !?' + : $(( ++fd )) + done + : $(( ${var} = fd )) + fi +} + +fi |