summaryrefslogtreecommitdiff
path: root/dev-lang/ghc/files/ghc-8.0.1-par-g0-on-A32.patch
diff options
context:
space:
mode:
Diffstat (limited to 'dev-lang/ghc/files/ghc-8.0.1-par-g0-on-A32.patch')
-rw-r--r--dev-lang/ghc/files/ghc-8.0.1-par-g0-on-A32.patch65
1 files changed, 0 insertions, 65 deletions
diff --git a/dev-lang/ghc/files/ghc-8.0.1-par-g0-on-A32.patch b/dev-lang/ghc/files/ghc-8.0.1-par-g0-on-A32.patch
deleted file mode 100644
index b46e57301782..000000000000
--- a/dev-lang/ghc/files/ghc-8.0.1-par-g0-on-A32.patch
+++ /dev/null
@@ -1,65 +0,0 @@
-commit bdfc5375f219d6def81effda4e57cb56d01fc917
-Author: Sergei Trofimovich <slyfox@gentoo.org>
-Date: Tue Aug 30 12:10:54 2016 +0100
-
- rts: enable parallel GC scan of large (32M+) allocation area
-
- Parallel GC does not scan large allocation area (-A)
- effectively as it does not do work stealing from nursery
- by default.
-
- That leads to large imbalance when only one of threads
- overflows allocation area: most of GC threads finish
- quickly (as there is not much to collect) and sit idle
- waiting while single GC thread finishes scan of single
- allocation area for that thread.
-
- The patch enables work stealing for (equivalent of -qb0)
- allocation area of -A32M or higher.
-
- Tested on a highlighting-kate package from Trac #9221
-
- On 8-core machine the difference is around 5% faster
- of wall-clock time. On 24-core VM the speedup is 20%.
-
- Signed-off-by: Sergei Trofimovich <siarheit@google.com>
-
- Test Plan: measured wall time and GC parallelism on highlighting-kate build
-
- Reviewers: austin, bgamari, erikd, simonmar
-
- Reviewed By: bgamari, simonmar
-
- Subscribers: thomie
-
- Differential Revision: https://phabricator.haskell.org/D2483
-
- GHC Trac Issues: #9221
-
-diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
-index fda33f0..7a719b9 100644
---- a/rts/RtsFlags.c
-+++ b/rts/RtsFlags.c
-@@ -237,1 +237,1 @@ void initRtsFlagsDefaults(void)
-- RtsFlags.ParFlags.parGcLoadBalancingGen = 1;
-+ RtsFlags.ParFlags.parGcLoadBalancingGen = ~0u; /* auto, based on -A */
-@@ -1398,2 +1390,19 @@ static void normaliseRtsOpts (void)
- }
-
-+#ifdef THREADED_RTS
-+ if (RtsFlags.ParFlags.parGcLoadBalancingGen == ~0u) {
-+ StgWord alloc_area_bytes
-+ = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE;
-+
-+ // If allocation area is larger that CPU cache
-+ // we can finish scanning quicker doing work-stealing
-+ // scan. Trac #9221
-+ // 32M looks big enough not to fit into L2 cache
-+ // of popular modern CPUs.
-+ if (alloc_area_bytes >= 32 * 1024 * 1024) {
-+ RtsFlags.ParFlags.parGcLoadBalancingGen = 0;
-+ } else {
-+ RtsFlags.ParFlags.parGcLoadBalancingGen = 1;
-+ }
-+ }
-+#endif