diff options
Diffstat (limited to 'dev-lang/ghc/files/ghc-8.0.1-ww-args-limit.patch')
-rw-r--r-- | dev-lang/ghc/files/ghc-8.0.1-ww-args-limit.patch | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/dev-lang/ghc/files/ghc-8.0.1-ww-args-limit.patch b/dev-lang/ghc/files/ghc-8.0.1-ww-args-limit.patch new file mode 100644 index 000000000000..4752f4482678 --- /dev/null +++ b/dev-lang/ghc/files/ghc-8.0.1-ww-args-limit.patch @@ -0,0 +1,127 @@ +commit 5efbf0d243984444cf352ad6f0d147e226c64498 +Author: Sergei Trofimovich <slyfox@gentoo.org> +Date: Thu Sep 1 17:34:58 2016 +0100 + + restore -fmax-worker-args handling (Trac #11565) + + maxWorkerArgs handling was accidentally lost 3 years ago + in a major update of demand analysis + commit 0831a12ea2fc73c33652eeec1adc79fa19700578 + + Old regression is noticeable as: + - code bloat (requires stack reshuffling) + - compilation slowdown (more code to optimise/generate) + - and increased heap usage (DynFlags unboxing/reboxing?) + + On a simple compile benchmark this change causes heap + allocation drop from 70G don to 67G (ghc perf build). + + Signed-off-by: Sergei Trofimovich <siarheit@google.com> + + Reviewers: simonpj, ezyang, goldfire, austin, bgamari + + Reviewed By: simonpj, ezyang + + Subscribers: thomie + + Differential Revision: https://phabricator.haskell.org/D2503 + + GHC Trac Issues: #11565 + +diff --git a/compiler/specialise/SpecConstr.hs b/compiler/specialise/SpecConstr.hs +index 10d5614..7166f57 100644 +--- a/compiler/specialise/SpecConstr.hs ++++ b/compiler/specialise/SpecConstr.hs +@@ -29,7 +29,7 @@ import CoreFVs ( exprsFreeVarsList ) + import CoreMonad + import Literal ( litIsLifted ) + import HscTypes ( ModGuts(..) ) +-import WwLib ( mkWorkerArgs ) ++import WwLib ( isWorkerSmallEnough, mkWorkerArgs ) + import DataCon + import Coercion hiding( substCo ) + import Rules +@@ -1533,10 +1533,14 @@ specialise env bind_calls (RI { ri_fn = fn, ri_lam_bndrs = arg_bndrs + + | Just all_calls <- lookupVarEnv bind_calls fn + = -- pprTrace "specialise entry {" (ppr fn <+> ppr (length all_calls)) $ +- do { (boring_call, pats) <- callsToPats env specs arg_occs all_calls +- ++ do { (boring_call, all_pats) <- callsToPats env specs arg_occs all_calls + -- Bale out if too many specialisations +- ; let n_pats = length pats ++ ; let pats = filter (is_small_enough . fst) all_pats ++ is_small_enough vars = isWorkerSmallEnough (sc_dflags env) vars ++ -- We are about to construct w/w pair in 'spec_one'. ++ -- Omit specialisation leading to high arity workers. ++ -- See Note [Limit w/w arity] ++ n_pats = length pats + spec_count' = n_pats + spec_count + ; case sc_count env of + Just max | not (sc_force env) && spec_count' > max +diff --git a/compiler/stranal/WwLib.hs b/compiler/stranal/WwLib.hs +index 09bc204..d9460d9 100644 +--- a/compiler/stranal/WwLib.hs ++++ b/compiler/stranal/WwLib.hs +@@ -8,6 +8,7 @@ + + module WwLib ( mkWwBodies, mkWWstr, mkWorkerArgs + , deepSplitProductType_maybe, findTypeShape ++ , isWorkerSmallEnough + ) where + + #include "HsVersions.h" +@@ -144,7 +145,8 @@ mkWwBodies dflags fam_envs fun_ty demands res_info one_shots + wrapper_body = wrap_fn_args . wrap_fn_cpr . wrap_fn_str . applyToVars work_call_args . Var + worker_body = mkLams work_lam_args. work_fn_str . work_fn_cpr . work_fn_args + +- ; if useful1 && not (only_one_void_argument) || useful2 ++ ; if isWorkerSmallEnough dflags work_args ++ && (useful1 && not only_one_void_argument || useful2) + then return (Just (worker_args_dmds, wrapper_body, worker_body)) + else return Nothing + } +@@ -165,6 +167,12 @@ mkWwBodies dflags fam_envs fun_ty demands res_info one_shots + | otherwise + = False + ++-- See Note [Limit w/w arity] ++isWorkerSmallEnough :: DynFlags -> [Var] -> Bool ++isWorkerSmallEnough dflags vars = count isId vars <= maxWorkerArgs dflags ++ -- We count only Free variables (isId) to skip Type, Kind ++ -- variables which have no runtime representation. ++ + {- + Note [Always do CPR w/w] + ~~~~~~~~~~~~~~~~~~~~~~~~ +@@ -178,6 +186,30 @@ a disaster, because then the enclosing function might say it has the CPR + property, but now doesn't and there a cascade of disaster. A good example + is Trac #5920. + ++Note [Limit w/w arity] ++~~~~~~~~~~~~~~~~~~~~~~~~ ++Guard against high worker arity as it generates a lot of stack traffic. ++A simplified example is Trac #11565#comment:6 ++ ++Current strategy is very simple: don't perform w/w transformation at all ++if the result produces a wrapper with arity higher than -fmax-worker-args=. ++ ++It is a bit all or nothing, consider ++ ++ f (x,y) (a,b,c,d,e ... , z) = rhs ++ ++Currently we will remove all w/w ness entirely. But actually we could ++w/w on the (x,y) pair... it's the huge product that is the problem. ++ ++Could we instead refrain from w/w on an arg-by-arg basis? Yes, that'd ++solve f. But we can get a lot of args from deeply-nested products: ++ ++ g (a, (b, (c, (d, ...)))) = rhs ++ ++This is harder to spot on an arg-by-arg basis. Previously mkWwStr was ++given some "fuel" saying how many arguments it could add; when we ran ++out of fuel it would stop w/wing. ++Still not very clever because it had a left-right bias. + + ************************************************************************ + * * |