From 42672d73de4fab730a2d6e91c92d19afebeb40bb Mon Sep 17 00:00:00 2001 From: V3n3RiX Date: Sat, 9 Feb 2019 20:03:56 +0000 Subject: sys-kernel/linux-{image,sources}-redcore-lts : version bump to v4.14.95 && v4.19.20 --- .../files/4.14-0002-BFQ-v8r12-20180404.patch | 4611 ++++++++++++++++++++ 1 file changed, 4611 insertions(+) create mode 100644 sys-kernel/linux-image-redcore-lts/files/4.14-0002-BFQ-v8r12-20180404.patch (limited to 'sys-kernel/linux-image-redcore-lts/files/4.14-0002-BFQ-v8r12-20180404.patch') diff --git a/sys-kernel/linux-image-redcore-lts/files/4.14-0002-BFQ-v8r12-20180404.patch b/sys-kernel/linux-image-redcore-lts/files/4.14-0002-BFQ-v8r12-20180404.patch new file mode 100644 index 00000000..104325d6 --- /dev/null +++ b/sys-kernel/linux-image-redcore-lts/files/4.14-0002-BFQ-v8r12-20180404.patch @@ -0,0 +1,4611 @@ +From 7bd365a925748767d7ed807e5498f90bae0ebc25 Mon Sep 17 00:00:00 2001 +From: Paolo Valente +Date: Tue, 14 Nov 2017 08:28:45 +0100 +Subject: [PATCH 01/23] block, bfq-mq: turn BUG_ON on request-size into WARN_ON + +BFQ has many checks of internal and external consistency. One of them +checks that an I/O request has still sectors to serve, if it happens +to be retired without being served. If the request has no sector to +serve, a BUG_ON signals the failure and causes the kernel to +terminate. Yet, from a crash report by a user [1], this condition may +happen to hold, in apparently correct functioning, for I/O with a +CD/DVD. + +To address this issue, this commit turns the above BUG_ON into a +WARN_ON. This commit also adds a companion WARN_ON on request +insertion into the scheduler. + +[1] https://groups.google.com/d/msg/bfq-iosched/DDOTJBroBa4/VyU1zUFtCgAJ + +Reported-by: Alexandre Frade +Signed-off-by: Paolo Valente +--- + block/bfq-mq-iosched.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c +index 0c09609a6099..0fc757ae7a42 100644 +--- a/block/bfq-mq-iosched.c ++++ b/block/bfq-mq-iosched.c +@@ -1540,6 +1540,8 @@ static void bfq_add_request(struct request *rq) + + BUG_ON(!RQ_BFQQ(rq)); + BUG_ON(RQ_BFQQ(rq) != bfqq); ++ WARN_ON(blk_rq_sectors(rq) == 0); ++ + elv_rb_add(&bfqq->sort_list, rq); + + /* +@@ -4962,7 +4964,7 @@ static void bfq_finish_request(struct request *rq) + rq_io_start_time_ns(rq), + rq->cmd_flags); + +- BUG_ON(blk_rq_sectors(rq) == 0 && !(rq->rq_flags & RQF_STARTED)); ++ WARN_ON(blk_rq_sectors(rq) == 0 && !(rq->rq_flags & RQF_STARTED)); + + if (likely(rq->rq_flags & RQF_STARTED)) { + unsigned long flags; + +From 1097d368a20456c88acd75b3184c68df38e8f7b8 Mon Sep 17 00:00:00 2001 +From: Paolo Valente +Date: Sun, 12 Nov 2017 22:43:46 +0100 +Subject: [PATCH 02/23] block, bfq-sq, bfq-mq: consider also past I/O in soft + real-time detection + +BFQ privileges the I/O of soft real-time applications, such as video +players, to guarantee to these application a high bandwidth and a low +latency. In this respect, it is not easy to correctly detect when an +application is soft real-time. A particularly nasty false positive is +that of an I/O-bound application that occasionally happens to meet all +requirements to be deemed as soft real-time. After being detected as +soft real-time, such an application monopolizes the device. Fortunately, +BFQ will realize soon that the application is actually not soft +real-time and suspend every privilege. Yet, the application may happen +again to be wrongly detected as soft real-time, and so on. + +As highlighted by our tests, this problem causes BFQ to occasionally +fail to guarantee a high responsiveness, in the presence of heavy +background I/O workloads. The reason is that the background workload +happens to be detected as soft real-time, more or less frequently, +during the execution of the interactive task under test. To give an +idea, because of this problem, Libreoffice Writer occasionally takes 8 +seconds, instead of 3, to start up, if there are sequential reads and +writes in the background, on a Kingston SSDNow V300. + +This commit addresses this issue by leveraging the following facts. + +The reason why some applications are detected as soft real-time despite +all BFQ checks to avoid false positives, is simply that, during high +CPU or storage-device load, I/O-bound applications may happen to do +I/O slowly enough to meet all soft real-time requirements, and pass +all BFQ extra checks. Yet, this happens only for limited time periods: +slow-speed time intervals are usually interspersed between other time +intervals during which these applications do I/O at a very high speed. +To exploit these facts, this commit introduces a little change, in the +detection of soft real-time behavior, to systematically consider also +the recent past: the higher the speed was in the recent past, the +later next I/O should arrive for the application to be considered as +soft real-time. At the beginning of a slow-speed interval, the minimum +arrival time allowed for the next I/O usually happens to still be so +high, to fall *after* the end of the slow-speed period itself. As a +consequence, the application does not risk to be deemed as soft +real-time during the slow-speed interval. Then, during the next +high-speed interval, the application cannot, evidently, be deemed as +soft real-time (exactly because of its speed), and so on. + +This extra filtering proved to be rather effective: in the above test, +the frequency of false positives became so low that the start-up time +was 3 seconds in all iterations (apart from occasional outliers, +caused by page-cache-management issues, which are out of the scope of +this commit, and cannot be solved by an I/O scheduler). + +Signed-off-by: Paolo Valente +Signed-off-by: Angelo Ruocco +--- + block/bfq-mq-iosched.c | 115 ++++++++++++++++++++++++++++++++++--------------- + block/bfq-sq-iosched.c | 115 ++++++++++++++++++++++++++++++++++--------------- + 2 files changed, 162 insertions(+), 68 deletions(-) + +diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c +index 0fc757ae7a42..4d06d900f45e 100644 +--- a/block/bfq-mq-iosched.c ++++ b/block/bfq-mq-iosched.c +@@ -3201,37 +3201,78 @@ static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq, + * whereas soft_rt_next_start is set to infinity for applications that do + * not. + * +- * Unfortunately, even a greedy application may happen to behave in an +- * isochronous way if the CPU load is high. In fact, the application may +- * stop issuing requests while the CPUs are busy serving other processes, +- * then restart, then stop again for a while, and so on. In addition, if +- * the disk achieves a low enough throughput with the request pattern +- * issued by the application (e.g., because the request pattern is random +- * and/or the device is slow), then the application may meet the above +- * bandwidth requirement too. To prevent such a greedy application to be +- * deemed as soft real-time, a further rule is used in the computation of +- * soft_rt_next_start: soft_rt_next_start must be higher than the current +- * time plus the maximum time for which the arrival of a request is waited +- * for when a sync queue becomes idle, namely bfqd->bfq_slice_idle. +- * This filters out greedy applications, as the latter issue instead their +- * next request as soon as possible after the last one has been completed +- * (in contrast, when a batch of requests is completed, a soft real-time +- * application spends some time processing data). ++ * Unfortunately, even a greedy (i.e., I/O-bound) application may ++ * happen to meet, occasionally or systematically, both the above ++ * bandwidth and isochrony requirements. This may happen at least in ++ * the following circumstances. First, if the CPU load is high. The ++ * application may stop issuing requests while the CPUs are busy ++ * serving other processes, then restart, then stop again for a while, ++ * and so on. The other circumstances are related to the storage ++ * device: the storage device is highly loaded or reaches a low-enough ++ * throughput with the I/O of the application (e.g., because the I/O ++ * is random and/or the device is slow). In all these cases, the ++ * I/O of the application may be simply slowed down enough to meet ++ * the bandwidth and isochrony requirements. To reduce the probability ++ * that greedy applications are deemed as soft real-time in these ++ * corner cases, a further rule is used in the computation of ++ * soft_rt_next_start: the return value of this function is forced to ++ * be higher than the maximum between the following two quantities. + * +- * Unfortunately, the last filter may easily generate false positives if +- * only bfqd->bfq_slice_idle is used as a reference time interval and one +- * or both the following cases occur: +- * 1) HZ is so low that the duration of a jiffy is comparable to or higher +- * than bfqd->bfq_slice_idle. This happens, e.g., on slow devices with +- * HZ=100. ++ * (a) Current time plus: (1) the maximum time for which the arrival ++ * of a request is waited for when a sync queue becomes idle, ++ * namely bfqd->bfq_slice_idle, and (2) a few extra jiffies. We ++ * postpone for a moment the reason for adding a few extra ++ * jiffies; we get back to it after next item (b). Lower-bounding ++ * the return value of this function with the current time plus ++ * bfqd->bfq_slice_idle tends to filter out greedy applications, ++ * because the latter issue their next request as soon as possible ++ * after the last one has been completed. In contrast, a soft ++ * real-time application spends some time processing data, after a ++ * batch of its requests has been completed. ++ * ++ * (b) Current value of bfqq->soft_rt_next_start. As pointed out ++ * above, greedy applications may happen to meet both the ++ * bandwidth and isochrony requirements under heavy CPU or ++ * storage-device load. In more detail, in these scenarios, these ++ * applications happen, only for limited time periods, to do I/O ++ * slowly enough to meet all the requirements described so far, ++ * including the filtering in above item (a). These slow-speed ++ * time intervals are usually interspersed between other time ++ * intervals during which these applications do I/O at a very high ++ * speed. Fortunately, exactly because of the high speed of the ++ * I/O in the high-speed intervals, the values returned by this ++ * function happen to be so high, near the end of any such ++ * high-speed interval, to be likely to fall *after* the end of ++ * the low-speed time interval that follows. These high values are ++ * stored in bfqq->soft_rt_next_start after each invocation of ++ * this function. As a consequence, if the last value of ++ * bfqq->soft_rt_next_start is constantly used to lower-bound the ++ * next value that this function may return, then, from the very ++ * beginning of a low-speed interval, bfqq->soft_rt_next_start is ++ * likely to be constantly kept so high that any I/O request ++ * issued during the low-speed interval is considered as arriving ++ * to soon for the application to be deemed as soft ++ * real-time. Then, in the high-speed interval that follows, the ++ * application will not be deemed as soft real-time, just because ++ * it will do I/O at a high speed. And so on. ++ * ++ * Getting back to the filtering in item (a), in the following two ++ * cases this filtering might be easily passed by a greedy ++ * application, if the reference quantity was just ++ * bfqd->bfq_slice_idle: ++ * 1) HZ is so low that the duration of a jiffy is comparable to or ++ * higher than bfqd->bfq_slice_idle. This happens, e.g., on slow ++ * devices with HZ=100. The time granularity may be so coarse ++ * that the approximation, in jiffies, of bfqd->bfq_slice_idle ++ * is rather lower than the exact value. + * 2) jiffies, instead of increasing at a constant rate, may stop increasing + * for a while, then suddenly 'jump' by several units to recover the lost + * increments. This seems to happen, e.g., inside virtual machines. +- * To address this issue, we do not use as a reference time interval just +- * bfqd->bfq_slice_idle, but bfqd->bfq_slice_idle plus a few jiffies. In +- * particular we add the minimum number of jiffies for which the filter +- * seems to be quite precise also in embedded systems and KVM/QEMU virtual +- * machines. ++ * To address this issue, in the filtering in (a) we do not use as a ++ * reference time interval just bfqd->bfq_slice_idle, but ++ * bfqd->bfq_slice_idle plus a few jiffies. In particular, we add the ++ * minimum number of jiffies for which the filter seems to be quite ++ * precise also in embedded systems and KVM/QEMU virtual machines. + */ + static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, + struct bfq_queue *bfqq) +@@ -3243,10 +3284,11 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, + jiffies_to_msecs(HZ * bfqq->service_from_backlogged / + bfqd->bfq_wr_max_softrt_rate)); + +- return max(bfqq->last_idle_bklogged + +- HZ * bfqq->service_from_backlogged / +- bfqd->bfq_wr_max_softrt_rate, +- jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4); ++ return max3(bfqq->soft_rt_next_start, ++ bfqq->last_idle_bklogged + ++ HZ * bfqq->service_from_backlogged / ++ bfqd->bfq_wr_max_softrt_rate, ++ jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4); + } + + /** +@@ -4395,10 +4437,15 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, + bfqq->split_time = bfq_smallest_from_now(); + + /* +- * Set to the value for which bfqq will not be deemed as +- * soft rt when it becomes backlogged. ++ * To not forget the possibly high bandwidth consumed by a ++ * process/queue in the recent past, ++ * bfq_bfqq_softrt_next_start() returns a value at least equal ++ * to the current value of bfqq->soft_rt_next_start (see ++ * comments on bfq_bfqq_softrt_next_start). Set ++ * soft_rt_next_start to now, to mean that bfqq has consumed ++ * no bandwidth so far. + */ +- bfqq->soft_rt_next_start = bfq_greatest_from_now(); ++ bfqq->soft_rt_next_start = jiffies; + + /* first request is almost certainly seeky */ + bfqq->seek_history = 1; +diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c +index 4bbd7f4c0154..987dc255c82c 100644 +--- a/block/bfq-sq-iosched.c ++++ b/block/bfq-sq-iosched.c +@@ -3089,37 +3089,78 @@ static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq, + * whereas soft_rt_next_start is set to infinity for applications that do + * not. + * +- * Unfortunately, even a greedy application may happen to behave in an +- * isochronous way if the CPU load is high. In fact, the application may +- * stop issuing requests while the CPUs are busy serving other processes, +- * then restart, then stop again for a while, and so on. In addition, if +- * the disk achieves a low enough throughput with the request pattern +- * issued by the application (e.g., because the request pattern is random +- * and/or the device is slow), then the application may meet the above +- * bandwidth requirement too. To prevent such a greedy application to be +- * deemed as soft real-time, a further rule is used in the computation of +- * soft_rt_next_start: soft_rt_next_start must be higher than the current +- * time plus the maximum time for which the arrival of a request is waited +- * for when a sync queue becomes idle, namely bfqd->bfq_slice_idle. +- * This filters out greedy applications, as the latter issue instead their +- * next request as soon as possible after the last one has been completed +- * (in contrast, when a batch of requests is completed, a soft real-time +- * application spends some time processing data). ++ * Unfortunately, even a greedy (i.e., I/O-bound) application may ++ * happen to meet, occasionally or systematically, both the above ++ * bandwidth and isochrony requirements. This may happen at least in ++ * the following circumstances. First, if the CPU load is high. The ++ * application may stop issuing requests while the CPUs are busy ++ * serving other processes, then restart, then stop again for a while, ++ * and so on. The other circumstances are related to the storage ++ * device: the storage device is highly loaded or reaches a low-enough ++ * throughput with the I/O of the application (e.g., because the I/O ++ * is random and/or the device is slow). In all these cases, the ++ * I/O of the application may be simply slowed down enough to meet ++ * the bandwidth and isochrony requirements. To reduce the probability ++ * that greedy applications are deemed as soft real-time in these ++ * corner cases, a further rule is used in the computation of ++ * soft_rt_next_start: the return value of this function is forced to ++ * be higher than the maximum between the following two quantities. + * +- * Unfortunately, the last filter may easily generate false positives if +- * only bfqd->bfq_slice_idle is used as a reference time interval and one +- * or both the following cases occur: +- * 1) HZ is so low that the duration of a jiffy is comparable to or higher +- * than bfqd->bfq_slice_idle. This happens, e.g., on slow devices with +- * HZ=100. ++ * (a) Current time plus: (1) the maximum time for which the arrival ++ * of a request is waited for when a sync queue becomes idle, ++ * namely bfqd->bfq_slice_idle, and (2) a few extra jiffies. We ++ * postpone for a moment the reason for adding a few extra ++ * jiffies; we get back to it after next item (b). Lower-bounding ++ * the return value of this function with the current time plus ++ * bfqd->bfq_slice_idle tends to filter out greedy applications, ++ * because the latter issue their next request as soon as possible ++ * after the last one has been completed. In contrast, a soft ++ * real-time application spends some time processing data, after a ++ * batch of its requests has been completed. ++ * ++ * (b) Current value of bfqq->soft_rt_next_start. As pointed out ++ * above, greedy applications may happen to meet both the ++ * bandwidth and isochrony requirements under heavy CPU or ++ * storage-device load. In more detail, in these scenarios, these ++ * applications happen, only for limited time periods, to do I/O ++ * slowly enough to meet all the requirements described so far, ++ * including the filtering in above item (a). These slow-speed ++ * time intervals are usually interspersed between other time ++ * intervals during which these applications do I/O at a very high ++ * speed. Fortunately, exactly because of the high speed of the ++ * I/O in the high-speed intervals, the values returned by this ++ * function happen to be so high, near the end of any such ++ * high-speed interval, to be likely to fall *after* the end of ++ * the low-speed time interval that follows. These high values are ++ * stored in bfqq->soft_rt_next_start after each invocation of ++ * this function. As a consequence, if the last value of ++ * bfqq->soft_rt_next_start is constantly used to lower-bound the ++ * next value that this function may return, then, from the very ++ * beginning of a low-speed interval, bfqq->soft_rt_next_start is ++ * likely to be constantly kept so high that any I/O request ++ * issued during the low-speed interval is considered as arriving ++ * to soon for the application to be deemed as soft ++ * real-time. Then, in the high-speed interval that follows, the ++ * application will not be deemed as soft real-time, just because ++ * it will do I/O at a high speed. And so on. ++ * ++ * Getting back to the filtering in item (a), in the following two ++ * cases this filtering might be easily passed by a greedy ++ * application, if the reference quantity was just ++ * bfqd->bfq_slice_idle: ++ * 1) HZ is so low that the duration of a jiffy is comparable to or ++ * higher than bfqd->bfq_slice_idle. This happens, e.g., on slow ++ * devices with HZ=100. The time granularity may be so coarse ++ * that the approximation, in jiffies, of bfqd->bfq_slice_idle ++ * is rather lower than the exact value. + * 2) jiffies, instead of increasing at a constant rate, may stop increasing + * for a while, then suddenly 'jump' by several units to recover the lost + * increments. This seems to happen, e.g., inside virtual machines. +- * To address this issue, we do not use as a reference time interval just +- * bfqd->bfq_slice_idle, but bfqd->bfq_slice_idle plus a few jiffies. In +- * particular we add the minimum number of jiffies for which the filter +- * seems to be quite precise also in embedded systems and KVM/QEMU virtual +- * machines. ++ * To address this issue, in the filtering in (a) we do not use as a ++ * reference time interval just bfqd->bfq_slice_idle, but ++ * bfqd->bfq_slice_idle plus a few jiffies. In particular, we add the ++ * minimum number of jiffies for which the filter seems to be quite ++ * precise also in embedded systems and KVM/QEMU virtual machines. + */ + static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, + struct bfq_queue *bfqq) +@@ -3131,10 +3172,11 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, + jiffies_to_msecs(HZ * bfqq->service_from_backlogged / + bfqd->bfq_wr_max_softrt_rate)); + +- return max(bfqq->last_idle_bklogged + +- HZ * bfqq->service_from_backlogged / +- bfqd->bfq_wr_max_softrt_rate, +- jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4); ++ return max3(bfqq->soft_rt_next_start, ++ bfqq->last_idle_bklogged + ++ HZ * bfqq->service_from_backlogged / ++ bfqd->bfq_wr_max_softrt_rate, ++ jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4); + } + + /** +@@ -4167,10 +4209,15 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, + bfqq->split_time = bfq_smallest_from_now(); + + /* +- * Set to the value for which bfqq will not be deemed as +- * soft rt when it becomes backlogged. ++ * To not forget the possibly high bandwidth consumed by a ++ * process/queue in the recent past, ++ * bfq_bfqq_softrt_next_start() returns a value at least equal ++ * to the current value of bfqq->soft_rt_next_start (see ++ * comments on bfq_bfqq_softrt_next_start). Set ++ * soft_rt_next_start to now, to mean that bfqq has consumed ++ * no bandwidth so far. + */ +- bfqq->soft_rt_next_start = bfq_greatest_from_now(); ++ bfqq->soft_rt_next_start = jiffies; + + /* first request is almost certainly seeky */ + bfqq->seek_history = 1; + +From 2a09b505660c81dbb80a5d68c9bc558c326d041f Mon Sep 17 00:00:00 2001 +From: Chiara Bruschi +Date: Thu, 7 Dec 2017 09:57:19 +0100 +Subject: [PATCH 03/23] block, bfq-mq: fix occurrences of request + prepare/finish methods' old names + +Commits 'b01f1fa3bb19' (Port of "blk-mq-sched: unify request prepare +methods") and 'cc10d2d7d2c1' (Port of "blk-mq-sched: unify request +finished methods") changed the old names of current bfq_prepare_request +and bfq_finish_request methods, but left them unchanged elsewhere in +the code (related comments, part of function name bfq_put_rq_priv_body). + +This commit fixes every occurrence of the old names of these methods +by changing them into the current names. + +Fixes: b01f1fa3bb19 (Port of "blk-mq-sched: unify request prepare methods") +Fixes: cc10d2d7d2c1 (Port of "blk-mq-sched: unify request finished methods") +Reviewed-by: Paolo Valente +Signed-off-by: Federico Motta +Signed-off-by: Chiara Bruschi +--- + block/bfq-mq-iosched.c | 38 +++++++++++++++++++------------------- + 1 file changed, 19 insertions(+), 19 deletions(-) + +diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c +index 4d06d900f45e..8f8d5eccb016 100644 +--- a/block/bfq-mq-iosched.c ++++ b/block/bfq-mq-iosched.c +@@ -4018,20 +4018,20 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) + /* + * TESTING: reset DISP_LIST flag, because: 1) + * this rq this request has passed through +- * get_rq_private, 2) then it will have +- * put_rq_private invoked on it, and 3) in +- * put_rq_private we use this flag to check +- * that put_rq_private is not invoked on +- * requests for which get_rq_private has been +- * invoked. ++ * bfq_prepare_request, 2) then it will have ++ * bfq_finish_request invoked on it, and 3) in ++ * bfq_finish_request we use this flag to check ++ * that bfq_finish_request is not invoked on ++ * requests for which bfq_prepare_request has ++ * been invoked. + */ + rq->rq_flags &= ~RQF_DISP_LIST; + goto inc_in_driver_start_rq; + } + + /* +- * We exploit the put_rq_private hook to decrement +- * rq_in_driver, but put_rq_private will not be ++ * We exploit the bfq_finish_request hook to decrement ++ * rq_in_driver, but bfq_finish_request will not be + * invoked on this request. So, to avoid unbalance, + * just start this request, without incrementing + * rq_in_driver. As a negative consequence, +@@ -4040,14 +4040,14 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) + * bfq_schedule_dispatch to be invoked uselessly. + * + * As for implementing an exact solution, the +- * put_request hook, if defined, is probably invoked +- * also on this request. So, by exploiting this hook, +- * we could 1) increment rq_in_driver here, and 2) +- * decrement it in put_request. Such a solution would +- * let the value of the counter be always accurate, +- * but it would entail using an extra interface +- * function. This cost seems higher than the benefit, +- * being the frequency of non-elevator-private ++ * bfq_finish_request hook, if defined, is probably ++ * invoked also on this request. So, by exploiting ++ * this hook, we could 1) increment rq_in_driver here, ++ * and 2) decrement it in bfq_finish_request. Such a ++ * solution would let the value of the counter be ++ * always accurate, but it would entail using an extra ++ * interface function. This cost seems higher than the ++ * benefit, being the frequency of non-elevator-private + * requests very low. + */ + goto start_rq; +@@ -4963,7 +4963,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) + } + } + +-static void bfq_put_rq_priv_body(struct bfq_queue *bfqq) ++static void bfq_finish_request_body(struct bfq_queue *bfqq) + { + bfq_log_bfqq(bfqq->bfqd, bfqq, + "put_request_body: allocated %d", bfqq->allocated); +@@ -5019,7 +5019,7 @@ static void bfq_finish_request(struct request *rq) + spin_lock_irqsave(&bfqd->lock, flags); + + bfq_completed_request(bfqq, bfqd); +- bfq_put_rq_priv_body(bfqq); ++ bfq_finish_request_body(bfqq); + + spin_unlock_irqrestore(&bfqd->lock, flags); + } else { +@@ -5042,7 +5042,7 @@ static void bfq_finish_request(struct request *rq) + bfqg_stats_update_io_remove(bfqq_group(bfqq), + rq->cmd_flags); + } +- bfq_put_rq_priv_body(bfqq); ++ bfq_finish_request_body(bfqq); + } + + rq->elv.priv[0] = NULL; + +From 4df19943c3a767df453abea3d2ac3433c3326ce0 Mon Sep 17 00:00:00 2001 +From: Paolo Valente +Date: Thu, 16 Nov 2017 18:38:13 +0100 +Subject: [PATCH 04/23] block, bfq-sq, bfq-mq: add missing rq_pos_tree update + on rq removal + +If two processes do I/O close to each other, then BFQ merges the +bfq_queues associated with these processes, to get a more sequential +I/O, and thus a higher throughput. In this respect, to detect whether +two processes are doing I/O close to each other, BFQ keeps a list of +the head-of-line I/O requests of all active bfq_queues. The list is +ordered by initial sectors, and implemented through a red-black tree +(rq_pos_tree). + +Unfortunately, the update of the rq_pos_tree was incomplete, because +the tree was not updated on the removal of the head-of-line I/O +request of a bfq_queue, in case the queue did not remain empty. This +commit adds the missing update. + +Signed-off-by: Paolo Valente +Signed-off-by: Angelo Ruocco +--- + block/bfq-mq-iosched.c | 3 +++ + block/bfq-sq-iosched.c | 3 +++ + 2 files changed, 6 insertions(+) + +diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c +index 8f8d5eccb016..603191c9008f 100644 +--- a/block/bfq-mq-iosched.c ++++ b/block/bfq-mq-iosched.c +@@ -1729,6 +1729,9 @@ static void bfq_remove_request(struct request_queue *q, + rb_erase(&bfqq->pos_node, bfqq->pos_root); + bfqq->pos_root = NULL; + } ++ } else { ++ BUG_ON(!bfqq->next_rq); ++ bfq_pos_tree_add_move(bfqd, bfqq); + } + + if (rq->cmd_flags & REQ_META) { +diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c +index 987dc255c82c..ea90ace79e49 100644 +--- a/block/bfq-sq-iosched.c ++++ b/block/bfq-sq-iosched.c +@@ -1669,6 +1669,9 @@ static void bfq_remove_request(struct request *rq) + rb_erase(&bfqq->pos_node, bfqq->pos_root); + bfqq->pos_root = NULL; + } ++ } else { ++ BUG_ON(!bfqq->next_rq); ++ bfq_pos_tree_add_move(bfqd, bfqq); + } + + if (rq->cmd_flags & REQ_META) { + +From b844e345140aaea957d84a21d2aa67588b020cd5 Mon Sep 17 00:00:00 2001 +From: Angelo Ruocco +Date: Mon, 18 Dec 2017 08:28:08 +0100 +Subject: [PATCH 05/23] block, bfq-sq, bfq-mq: check low_latency flag in + bfq_bfqq_save_state() + +A just-created bfq_queue will certainly be deemed as interactive on +the arrival of its first I/O request, if the low_latency flag is +set. Yet, if the queue is merged with another queue on the arrival of +its first I/O request, it will not have the chance to be flagged as +interactive. Nevertheless, if the queue is then split soon enough, it +has to be flagged as interactive after the split. + +To handle this early-merge scenario correctly, BFQ saves the state of +the queue, on the merge, as if the latter had already been deemed +interactive. So, if the queue is split soon, it will get +weight-raised, because the previous state of the queue is resumed on +the split. + +Unfortunately, in the act of saving the state of the newly-created +queue, BFQ doesn't check whether the low_latency flag is set, and this +causes early-merged queues to be then weight-raised, on queue splits, +even if low_latency is off. This commit addresses this problem by +adding the missing check. + +Signed-off-by: Angelo Ruocco +Signed-off-by: Paolo Valente +--- + block/bfq-mq-iosched.c | 3 ++- + block/bfq-sq-iosched.c | 3 ++- + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c +index 603191c9008f..ff9776c8836a 100644 +--- a/block/bfq-mq-iosched.c ++++ b/block/bfq-mq-iosched.c +@@ -2231,7 +2231,8 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) + bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq); + bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node); + if (unlikely(bfq_bfqq_just_created(bfqq) && +- !bfq_bfqq_in_large_burst(bfqq))) { ++ !bfq_bfqq_in_large_burst(bfqq) && ++ bfqq->bfqd->low_latency)) { + /* + * bfqq being merged ritgh after being created: bfqq + * would have deserved interactive weight raising, but +diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c +index ea90ace79e49..3a2d764e760c 100644 +--- a/block/bfq-sq-iosched.c ++++ b/block/bfq-sq-iosched.c +@@ -2109,7 +2109,8 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) + bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq); + bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node); + if (unlikely(bfq_bfqq_just_created(bfqq) && +- !bfq_bfqq_in_large_burst(bfqq))) { ++ !bfq_bfqq_in_large_burst(bfqq) && ++ bfqq->bfqd->low_latency)) { + /* + * bfqq being merged ritgh after being created: bfqq + * would have deserved interactive weight raising, but + +From 4cc6896fe1de2e0b4de151a6e70658f10b9ec2fa Mon Sep 17 00:00:00 2001 +From: Paolo Valente +Date: Fri, 27 Oct 2017 11:12:14 +0200 +Subject: [PATCH 06/23] block, bfq-sq, bfq-mq: let a queue be merged only + shortly after starting I/O + +In BFQ and CFQ, two processes are said to be cooperating if they do +I/O in such a way that the union of their I/O requests yields a +sequential I/O pattern. To get such a sequential I/O pattern out of +the non-sequential pattern of each cooperating process, BFQ and CFQ +merge the queues associated with these processes. In more detail, +cooperating processes, and thus their associated queues, usually +start, or restart, to do I/O shortly after each other. This is the +case, e.g., for the I/O threads of KVM/QEMU and of the dump +utility. Basing on this assumption, this commit allows a bfq_queue to +be merged only during a short time interval (100ms) after it starts, +or re-starts, to do I/O. This filtering provides two important +benefits. + +First, it greatly reduces the probability that two non-cooperating +processes have their queues merged by mistake, if they just happen to +do I/O close to each other for a short time interval. These spurious +merges cause loss of service guarantees. A low-weight bfq_queue may +unjustly get more than its expected share of the throughput: if such a +low-weight queue is merged with a high-weight queue, then the I/O for +the low-weight queue is served as if the queue had a high weight. This +may damage other high-weight queues unexpectedly. For instance, +because of this issue, lxterminal occasionally took 7.5 seconds to +start, instead of 6.5 seconds, when some sequential readers and +writers did I/O in the background on a FUJITSU MHX2300BT HDD. The +reason is that the bfq_queues associated with some of the readers or +the writers were merged with the high-weight queues of some processes +that had to do some urgent but little I/O. The readers then exploited +the inherited high weight for all or most of their I/O, during the +start-up of terminal. The filtering introduced by this commit +eliminated any outlier caused by spurious queue merges in our start-up +time tests. + +This filtering also provides a little boost of the throughput +sustainable by BFQ: 3-4%, depending on the CPU. The reason is that, +once a bfq_queue cannot be merged any longer, this commit makes BFQ +stop updating the data needed to handle merging for the queue. + +Signed-off-by: Paolo Valente +Signed-off-by: Angelo Ruocco +--- + block/bfq-mq-iosched.c | 64 +++++++++++++++++++++++++++++++++++++++++--------- + block/bfq-mq.h | 1 + + block/bfq-sched.c | 4 ++++ + block/bfq-sq-iosched.c | 64 +++++++++++++++++++++++++++++++++++++++++--------- + block/bfq.h | 2 ++ + 5 files changed, 113 insertions(+), 22 deletions(-) + +diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c +index ff9776c8836a..8b17b25a3c30 100644 +--- a/block/bfq-mq-iosched.c ++++ b/block/bfq-mq-iosched.c +@@ -119,6 +119,20 @@ static const int bfq_async_charge_factor = 10; + /* Default timeout values, in jiffies, approximating CFQ defaults. */ + static const int bfq_timeout = (HZ / 8); + ++/* ++ * Time limit for merging (see comments in bfq_setup_cooperator). Set ++ * to the slowest value that, in our tests, proved to be effective in ++ * removing false positives, while not causing true positives to miss ++ * queue merging. ++ * ++ * As can be deduced from the low time limit below, queue merging, if ++ * successful, happens at the very beggining of the I/O of the involved ++ * cooperating processes, as a consequence of the arrival of the very ++ * first requests from each cooperator. After that, there is very ++ * little chance to find cooperators. ++ */ ++static const unsigned long bfq_merge_time_limit = HZ/10; ++ + static struct kmem_cache *bfq_pool; + + /* Below this threshold (in ns), we consider thinktime immediate. */ +@@ -389,6 +403,13 @@ bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, + return bfqq; + } + ++static bool bfq_too_late_for_merging(struct bfq_queue *bfqq) ++{ ++ return bfqq->service_from_backlogged > 0 && ++ time_is_before_jiffies(bfqq->first_IO_time + ++ bfq_merge_time_limit); ++} ++ + static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) + { + struct rb_node **p, *parent; +@@ -399,6 +420,14 @@ static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) + bfqq->pos_root = NULL; + } + ++ /* ++ * bfqq cannot be merged any longer (see comments in ++ * bfq_setup_cooperator): no point in adding bfqq into the ++ * position tree. ++ */ ++ if (bfq_too_late_for_merging(bfqq)) ++ return; ++ + if (bfq_class_idle(bfqq)) + return; + if (!bfqq->next_rq) +@@ -2081,6 +2110,13 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) + static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, + struct bfq_queue *new_bfqq) + { ++ if (bfq_too_late_for_merging(new_bfqq)) { ++ bfq_log_bfqq(bfqq->bfqd, bfqq, ++ "[%s] too late for bfq%d to be merged", ++ __func__, new_bfqq->pid); ++ return false; ++ } ++ + if (bfq_class_idle(bfqq) || bfq_class_idle(new_bfqq) || + (bfqq->ioprio_class != new_bfqq->ioprio_class)) + return false; +@@ -2149,6 +2185,23 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, + { + struct bfq_queue *in_service_bfqq, *new_bfqq; + ++ /* ++ * Prevent bfqq from being merged if it has been created too ++ * long ago. The idea is that true cooperating processes, and ++ * thus their associated bfq_queues, are supposed to be ++ * created shortly after each other. This is the case, e.g., ++ * for KVM/QEMU and dump I/O threads. Basing on this ++ * assumption, the following filtering greatly reduces the ++ * probability that two non-cooperating processes, which just ++ * happen to do close I/O for some short time interval, have ++ * their queues merged by mistake. ++ */ ++ if (bfq_too_late_for_merging(bfqq)) { ++ bfq_log_bfqq(bfqd, bfqq, ++ "would have looked for coop, but too late"); ++ return NULL; ++ } ++ + if (bfqq->new_bfqq) + return bfqq->new_bfqq; + +@@ -3338,17 +3391,6 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, + */ + slow = bfq_bfqq_is_slow(bfqd, bfqq, compensate, reason, &delta); + +- /* +- * Increase service_from_backlogged before next statement, +- * because the possible next invocation of +- * bfq_bfqq_charge_time would likely inflate +- * entity->service. In contrast, service_from_backlogged must +- * contain real service, to enable the soft real-time +- * heuristic to correctly compute the bandwidth consumed by +- * bfqq. +- */ +- bfqq->service_from_backlogged += entity->service; +- + /* + * As above explained, charge slow (typically seeky) and + * timed-out queues with the time and not the service +diff --git a/block/bfq-mq.h b/block/bfq-mq.h +index 1cb05bb853d2..a5947b203ef2 100644 +--- a/block/bfq-mq.h ++++ b/block/bfq-mq.h +@@ -337,6 +337,7 @@ struct bfq_queue { + unsigned long wr_start_at_switch_to_srt; + + unsigned long split_time; /* time of last split */ ++ unsigned long first_IO_time; /* time of first I/O for this queue */ + }; + + /** +diff --git a/block/bfq-sched.c b/block/bfq-sched.c +index 616c0692335a..9d261dd428e4 100644 +--- a/block/bfq-sched.c ++++ b/block/bfq-sched.c +@@ -939,6 +939,10 @@ static void bfq_bfqq_served(struct bfq_queue *bfqq, int served) + struct bfq_entity *entity = &bfqq->entity; + struct bfq_service_tree *st; + ++ if (!bfqq->service_from_backlogged) ++ bfqq->first_IO_time = jiffies; ++ ++ bfqq->service_from_backlogged += served; + for_each_entity(entity) { + st = bfq_entity_service_tree(entity); + +diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c +index 3a2d764e760c..cd00a41ca35d 100644 +--- a/block/bfq-sq-iosched.c ++++ b/block/bfq-sq-iosched.c +@@ -113,6 +113,20 @@ static const int bfq_async_charge_factor = 10; + /* Default timeout values, in jiffies, approximating CFQ defaults. */ + static const int bfq_timeout = (HZ / 8); + ++/* ++ * Time limit for merging (see comments in bfq_setup_cooperator). Set ++ * to the slowest value that, in our tests, proved to be effective in ++ * removing false positives, while not causing true positives to miss ++ * queue merging. ++ * ++ * As can be deduced from the low time limit below, queue merging, if ++ * successful, happens at the very beggining of the I/O of the involved ++ * cooperating processes, as a consequence of the arrival of the very ++ * first requests from each cooperator. After that, there is very ++ * little chance to find cooperators. ++ */ ++static const unsigned long bfq_merge_time_limit = HZ/10; ++ + static struct kmem_cache *bfq_pool; + + /* Below this threshold (in ns), we consider thinktime immediate. */ +@@ -351,6 +365,13 @@ bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, + return bfqq; + } + ++static bool bfq_too_late_for_merging(struct bfq_queue *bfqq) ++{ ++ return bfqq->service_from_backlogged > 0 && ++ time_is_before_jiffies(bfqq->first_IO_time + ++ bfq_merge_time_limit); ++} ++ + static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) + { + struct rb_node **p, *parent; +@@ -361,6 +382,14 @@ static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) + bfqq->pos_root = NULL; + } + ++ /* ++ * bfqq cannot be merged any longer (see comments in ++ * bfq_setup_cooperator): no point in adding bfqq into the ++ * position tree. ++ */ ++ if (bfq_too_late_for_merging(bfqq)) ++ return; ++ + if (bfq_class_idle(bfqq)) + return; + if (!bfqq->next_rq) +@@ -1960,6 +1989,13 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) + static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, + struct bfq_queue *new_bfqq) + { ++ if (bfq_too_late_for_merging(new_bfqq)) { ++ bfq_log_bfqq(bfqq->bfqd, bfqq, ++ "[%s] too late for bfq%d to be merged", ++ __func__, new_bfqq->pid); ++ return false; ++ } ++ + if (bfq_class_idle(bfqq) || bfq_class_idle(new_bfqq) || + (bfqq->ioprio_class != new_bfqq->ioprio_class)) + return false; +@@ -2028,6 +2064,23 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, + { + struct bfq_queue *in_service_bfqq, *new_bfqq; + ++ /* ++ * Prevent bfqq from being merged if it has been created too ++ * long ago. The idea is that true cooperating processes, and ++ * thus their associated bfq_queues, are supposed to be ++ * created shortly after each other. This is the case, e.g., ++ * for KVM/QEMU and dump I/O threads. Basing on this ++ * assumption, the following filtering greatly reduces the ++ * probability that two non-cooperating processes, which just ++ * happen to do close I/O for some short time interval, have ++ * their queues merged by mistake. ++ */ ++ if (bfq_too_late_for_merging(bfqq)) { ++ bfq_log_bfqq(bfqd, bfqq, ++ "would have looked for coop, but too late"); ++ return NULL; ++ } ++ + if (bfqq->new_bfqq) + return bfqq->new_bfqq; + +@@ -3226,17 +3279,6 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, + */ + slow = bfq_bfqq_is_slow(bfqd, bfqq, compensate, reason, &delta); + +- /* +- * Increase service_from_backlogged before next statement, +- * because the possible next invocation of +- * bfq_bfqq_charge_time would likely inflate +- * entity->service. In contrast, service_from_backlogged must +- * contain real service, to enable the soft real-time +- * heuristic to correctly compute the bandwidth consumed by +- * bfqq. +- */ +- bfqq->service_from_backlogged += entity->service; +- + /* + * As above explained, charge slow (typically seeky) and + * timed-out queues with the time and not the service +diff --git a/block/bfq.h b/block/bfq.h +index 47cd4d5a8c32..59539adc00a5 100644 +--- a/block/bfq.h ++++ b/block/bfq.h +@@ -329,6 +329,8 @@ struct bfq_queue { + unsigned long wr_start_at_switch_to_srt; + + unsigned long split_time; /* time of last split */ ++ ++ unsigned long first_IO_time; /* time of first I/O for this queue */ + }; + + /** + +From 157f39c43ab182280634cd4f6335d0187b3741a0 Mon Sep 17 00:00:00 2001 +From: Angelo Ruocco +Date: Mon, 11 Dec 2017 14:19:54 +0100 +Subject: [PATCH 07/23] block, bfq-sq, bfq-mq: remove superfluous check in + queue-merging setup + +When two or more processes do I/O in a way that the their requests are +sequential in respect to one another, BFQ merges the bfq_queues associated +with the processes. This way the overall I/O pattern becomes sequential, +and thus there is a boost in througput. +These cooperating processes usually start or restart to do I/O shortly +after each other. So, in order to avoid merging non-cooperating processes, +BFQ ensures that none of these queues has been in weight raising for too +long. + +In this respect, from commit "block, bfq-sq, bfq-mq: let a queue be merged +only shortly after being created", BFQ checks whether any queue (and not +only weight-raised ones) is doing I/O continuously from too long to be +merged. + +This new additional check makes the first one useless: a queue doing +I/O from long enough, if being weight-raised, is also a queue in +weight raising for too long to be merged. Accordingly, this commit +removes the first check. + +Signed-off-by: Angelo Ruocco +Signed-off-by: Paolo Valente +--- + block/bfq-mq-iosched.c | 53 ++++---------------------------------------------- + block/bfq-sq-iosched.c | 53 ++++---------------------------------------------- + 2 files changed, 8 insertions(+), 98 deletions(-) + +diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c +index 8b17b25a3c30..f5db8613a70f 100644 +--- a/block/bfq-mq-iosched.c ++++ b/block/bfq-mq-iosched.c +@@ -2140,20 +2140,6 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, + return true; + } + +-/* +- * If this function returns true, then bfqq cannot be merged. The idea +- * is that true cooperation happens very early after processes start +- * to do I/O. Usually, late cooperations are just accidental false +- * positives. In case bfqq is weight-raised, such false positives +- * would evidently degrade latency guarantees for bfqq. +- */ +-static bool wr_from_too_long(struct bfq_queue *bfqq) +-{ +- return bfqq->wr_coeff > 1 && +- time_is_before_jiffies(bfqq->last_wr_start_finish + +- msecs_to_jiffies(100)); +-} +- + /* + * Attempt to schedule a merge of bfqq with the currently in-service + * queue or with a close queue among the scheduled queues. Return +@@ -2167,11 +2153,6 @@ static bool wr_from_too_long(struct bfq_queue *bfqq) + * to maintain. Besides, in such a critical condition as an out of memory, + * the benefits of queue merging may be little relevant, or even negligible. + * +- * Weight-raised queues can be merged only if their weight-raising +- * period has just started. In fact cooperating processes are usually +- * started together. Thus, with this filter we avoid false positives +- * that would jeopardize low-latency guarantees. +- * + * WARNING: queue merging may impair fairness among non-weight raised + * queues, for at least two reasons: 1) the original weight of a + * merged queue may change during the merged state, 2) even being the +@@ -2205,15 +2186,7 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, + if (bfqq->new_bfqq) + return bfqq->new_bfqq; + +- if (io_struct && wr_from_too_long(bfqq) && +- likely(bfqq != &bfqd->oom_bfqq)) +- bfq_log_bfqq(bfqd, bfqq, +- "would have looked for coop, but bfq%d wr", +- bfqq->pid); +- +- if (!io_struct || +- wr_from_too_long(bfqq) || +- unlikely(bfqq == &bfqd->oom_bfqq)) ++ if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq)) + return NULL; + + /* If there is only one backlogged queue, don't search. */ +@@ -2223,17 +2196,8 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, + in_service_bfqq = bfqd->in_service_queue; + + if (in_service_bfqq && in_service_bfqq != bfqq && +- wr_from_too_long(in_service_bfqq) +- && likely(in_service_bfqq == &bfqd->oom_bfqq)) +- bfq_log_bfqq(bfqd, bfqq, +- "would have tried merge with in-service-queue, but wr"); +- +- if (!in_service_bfqq || in_service_bfqq == bfqq +- || wr_from_too_long(in_service_bfqq) || +- unlikely(in_service_bfqq == &bfqd->oom_bfqq)) +- goto check_scheduled; +- +- if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) && ++ likely(in_service_bfqq != &bfqd->oom_bfqq) && ++ bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) && + bfqq->entity.parent == in_service_bfqq->entity.parent && + bfq_may_be_close_cooperator(bfqq, in_service_bfqq)) { + new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq); +@@ -2245,21 +2209,12 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, + * queues. The only thing we need is that the bio/request is not + * NULL, as we need it to establish whether a cooperator exists. + */ +-check_scheduled: + new_bfqq = bfq_find_close_cooperator(bfqd, bfqq, + bfq_io_struct_pos(io_struct, request)); + + BUG_ON(new_bfqq && bfqq->entity.parent != new_bfqq->entity.parent); + +- if (new_bfqq && wr_from_too_long(new_bfqq) && +- likely(new_bfqq != &bfqd->oom_bfqq) && +- bfq_may_be_close_cooperator(bfqq, new_bfqq)) +- bfq_log_bfqq(bfqd, bfqq, +- "would have merged with bfq%d, but wr", +- new_bfqq->pid); +- +- if (new_bfqq && !wr_from_too_long(new_bfqq) && +- likely(new_bfqq != &bfqd->oom_bfqq) && ++ if (new_bfqq && likely(new_bfqq != &bfqd->oom_bfqq) && + bfq_may_be_close_cooperator(bfqq, new_bfqq)) + return bfq_setup_merge(bfqq, new_bfqq); + +diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c +index cd00a41ca35d..d8a358e5e284 100644 +--- a/block/bfq-sq-iosched.c ++++ b/block/bfq-sq-iosched.c +@@ -2019,20 +2019,6 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, + return true; + } + +-/* +- * If this function returns true, then bfqq cannot be merged. The idea +- * is that true cooperation happens very early after processes start +- * to do I/O. Usually, late cooperations are just accidental false +- * positives. In case bfqq is weight-raised, such false positives +- * would evidently degrade latency guarantees for bfqq. +- */ +-static bool wr_from_too_long(struct bfq_queue *bfqq) +-{ +- return bfqq->wr_coeff > 1 && +- time_is_before_jiffies(bfqq->last_wr_start_finish + +- msecs_to_jiffies(100)); +-} +- + /* + * Attempt to schedule a merge of bfqq with the currently in-service + * queue or with a close queue among the scheduled queues. Return +@@ -2046,11 +2032,6 @@ static bool wr_from_too_long(struct bfq_queue *bfqq) + * to maintain. Besides, in such a critical condition as an out of memory, + * the benefits of queue merging may be little relevant, or even negligible. + * +- * Weight-raised queues can be merged only if their weight-raising +- * period has just started. In fact cooperating processes are usually +- * started together. Thus, with this filter we avoid false positives +- * that would jeopardize low-latency guarantees. +- * + * WARNING: queue merging may impair fairness among non-weight raised + * queues, for at least two reasons: 1) the original weight of a + * merged queue may change during the merged state, 2) even being the +@@ -2084,15 +2065,7 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, + if (bfqq->new_bfqq) + return bfqq->new_bfqq; + +- if (io_struct && wr_from_too_long(bfqq) && +- likely(bfqq != &bfqd->oom_bfqq)) +- bfq_log_bfqq(bfqd, bfqq, +- "would have looked for coop, but bfq%d wr", +- bfqq->pid); +- +- if (!io_struct || +- wr_from_too_long(bfqq) || +- unlikely(bfqq == &bfqd->oom_bfqq)) ++ if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq)) + return NULL; + + /* If there is only one backlogged queue, don't search. */ +@@ -2102,17 +2075,8 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, + in_service_bfqq = bfqd->in_service_queue; + + if (in_service_bfqq && in_service_bfqq != bfqq && +- bfqd->in_service_bic && wr_from_too_long(in_service_bfqq) +- && likely(in_service_bfqq == &bfqd->oom_bfqq)) +- bfq_log_bfqq(bfqd, bfqq, +- "would have tried merge with in-service-queue, but wr"); +- +- if (!in_service_bfqq || in_service_bfqq == bfqq || +- !bfqd->in_service_bic || wr_from_too_long(in_service_bfqq) || +- unlikely(in_service_bfqq == &bfqd->oom_bfqq)) +- goto check_scheduled; +- +- if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) && ++ likely(in_service_bfqq != &bfqd->oom_bfqq) && ++ bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) && + bfqq->entity.parent == in_service_bfqq->entity.parent && + bfq_may_be_close_cooperator(bfqq, in_service_bfqq)) { + new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq); +@@ -2124,21 +2088,12 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, + * queues. The only thing we need is that the bio/request is not + * NULL, as we need it to establish whether a cooperator exists. + */ +-check_scheduled: + new_bfqq = bfq_find_close_cooperator(bfqd, bfqq, + bfq_io_struct_pos(io_struct, request)); + + BUG_ON(new_bfqq && bfqq->entity.parent != new_bfqq->entity.parent); + +- if (new_bfqq && wr_from_too_long(new_bfqq) && +- likely(new_bfqq != &bfqd->oom_bfqq) && +- bfq_may_be_close_cooperator(bfqq, new_bfqq)) +- bfq_log_bfqq(bfqd, bfqq, +- "would have merged with bfq%d, but wr", +- new_bfqq->pid); +- +- if (new_bfqq && !wr_from_too_long(new_bfqq) && +- likely(new_bfqq != &bfqd->oom_bfqq) && ++ if (new_bfqq && likely(new_bfqq != &bfqd->oom_bfqq) && + bfq_may_be_close_cooperator(bfqq, new_bfqq)) + return bfq_setup_merge(bfqq, new_bfqq); + + +From b82eb91d87f172aba7eb5eb98e8d5e2a621adf51 Mon Sep 17 00:00:00 2001 +From: Paolo Valente +Date: Thu, 30 Nov 2017 17:48:28 +0100 +Subject: [PATCH 08/23] block, bfq-sq, bfq-mq: increase threshold to deem I/O + as random + +If two processes do I/O close to each other, i.e., are cooperating +processes in BFQ (and CFQ'S) nomenclature, then BFQ merges their +associated bfq_queues, so as to get sequential I/O from the union of +the I/O requests of the processes, and thus reach a higher +throughput. A merged queue is then split if its I/O stops being +sequential. In this respect, BFQ deems the I/O of a bfq_queue as +(mostly) sequential only if less than 4 I/O requests are random, out +of the last 32 requests inserted into the queue. + +Unfortunately, extensive testing (with the interleaved_io benchmark of +the S suite [1], and with real applications spawning cooperating +processes) has clearly shown that, with such a low threshold, only a +rather low I/O throughput may be reached when several cooperating +processes do I/O. In particular, the outcome of each test run was +bimodal: if queue merging occurred and was stable during the test, +then the throughput was close to the peak rate of the storage device, +otherwise the throughput was arbitrarily low (usually around 1/10 of +the peak rate with a rotational device). The probability to get the +unlucky outcomes grew with the number of cooperating processes: it was +already significant with 5 processes, and close to one with 7 or more +processes. + +The cause of the low throughput in the unlucky runs was that the +merged queues containing the I/O of these cooperating processes were +soon split, because they contained more random I/O requests than those +tolerated by the 4/32 threshold, but +- that I/O would have however allowed the storage device to reach + peak throughput or almost peak throughput; +- in contrast, the I/O of these processes, if served individually + (from separate queues) yielded a rather low throughput. + +So we repeated our tests with increasing values of the threshold, +until we found the minimum value (19) for which we obtained maximum +throughput, reliably, with at least up to 9 cooperating +processes. Then we checked that the use of that higher threshold value +did not cause any regression for any other benchmark in the suite [1]. +This commit raises the threshold to such a higher value. + +[1] https://github.com/Algodev-github/S + +Signed-off-by: Angelo Ruocco +Signed-off-by: Paolo Valente +--- + block/bfq-mq-iosched.c | 2 +- + block/bfq-sq-iosched.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c +index f5db8613a70f..cb5f49ddecb6 100644 +--- a/block/bfq-mq-iosched.c ++++ b/block/bfq-mq-iosched.c +@@ -145,7 +145,7 @@ static struct kmem_cache *bfq_pool; + #define BFQQ_SEEK_THR (sector_t)(8 * 100) + #define BFQQ_SECT_THR_NONROT (sector_t)(2 * 32) + #define BFQQ_CLOSE_THR (sector_t)(8 * 1024) +-#define BFQQ_SEEKY(bfqq) (hweight32(bfqq->seek_history) > 32/8) ++#define BFQQ_SEEKY(bfqq) (hweight32(bfqq->seek_history) > 19) + + /* Min number of samples required to perform peak-rate update */ + #define BFQ_RATE_MIN_SAMPLES 32 +diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c +index d8a358e5e284..e1c6dc651be1 100644 +--- a/block/bfq-sq-iosched.c ++++ b/block/bfq-sq-iosched.c +@@ -139,7 +139,7 @@ static struct kmem_cache *bfq_pool; + #define BFQQ_SEEK_THR (sector_t)(8 * 100) + #define BFQQ_SECT_THR_NONROT (sector_t)(2 * 32) + #define BFQQ_CLOSE_THR (sector_t)(8 * 1024) +-#define BFQQ_SEEKY(bfqq) (hweight32(bfqq->seek_history) > 32/8) ++#define BFQQ_SEEKY(bfqq) (hweight32(bfqq->seek_history) > 19) + + /* Min number of samples required to perform peak-rate update */ + #define BFQ_RATE_MIN_SAMPLES 32 + +From b739dda4e4b3a1cbbc905f86f9fbb0860b068ce7 Mon Sep 17 00:00:00 2001 +From: Chiara Bruschi +Date: Mon, 11 Dec 2017 18:55:26 +0100 +Subject: [PATCH 09/23] block, bfq-sq, bfq-mq: specify usage condition of + delta_us in bfq_log_bfqq call + +Inside the function bfq_completed_request the value of a variable +called delta_us is computed as current request completion time. +delta_us is used inside a call to the function bfq_log_bfqq as divisor +in a division operation to compute a rate value, but no check makes +sure that delta_us has non-zero value. A divisor with value 0 leads +to a division error that could result in a kernel oops (therefore +unstable/unreliable system state) and consequently cause kernel panic +if resources are unavailable after the system fault. + +This commit fixes this call to bfq_log_bfqq specifying the condition +that allows delta_us to be safely used as divisor. + +Signed-off-by: Paolo Valente +Signed-off-by: Chiara Bruschi +--- + block/bfq-mq-iosched.c | 5 ++++- + block/bfq-sq-iosched.c | 5 ++++- + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c +index cb5f49ddecb6..6ce2c0789046 100644 +--- a/block/bfq-mq-iosched.c ++++ b/block/bfq-mq-iosched.c +@@ -4904,9 +4904,12 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) + bfq_log_bfqq(bfqd, bfqq, + "rq_completed: delta %uus/%luus max_size %u rate %llu/%llu", + delta_us, BFQ_MIN_TT/NSEC_PER_USEC, bfqd->last_rq_max_size, ++ delta_us > 0 ? + (USEC_PER_SEC* + (u64)((bfqd->last_rq_max_size<>BFQ_RATE_SHIFT, ++ >>BFQ_RATE_SHIFT : ++ (USEC_PER_SEC* ++ (u64)(bfqd->last_rq_max_size<>BFQ_RATE_SHIFT, + (USEC_PER_SEC*(u64)(1UL<<(BFQ_RATE_SHIFT-10)))>>BFQ_RATE_SHIFT); + + /* +diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c +index e1c6dc651be1..eff4c4edf5a0 100644 +--- a/block/bfq-sq-iosched.c ++++ b/block/bfq-sq-iosched.c +@@ -4565,9 +4565,12 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq) + + bfq_log(bfqd, "rq_completed: delta %uus/%luus max_size %u rate %llu/%llu", + delta_us, BFQ_MIN_TT/NSEC_PER_USEC, bfqd->last_rq_max_size, ++ delta_us > 0 ? + (USEC_PER_SEC* + (u64)((bfqd->last_rq_max_size<>BFQ_RATE_SHIFT, ++ >>BFQ_RATE_SHIFT : ++ (USEC_PER_SEC* ++ (u64)(bfqd->last_rq_max_size<>BFQ_RATE_SHIFT, + (USEC_PER_SEC*(u64)(1UL<<(BFQ_RATE_SHIFT-10)))>>BFQ_RATE_SHIFT); + + /* + +From ae4310c13eca762644734d53074d8456c85e2dec Mon Sep 17 00:00:00 2001 +From: Paolo Valente +Date: Tue, 19 Dec 2017 12:07:12 +0100 +Subject: [PATCH 10/23] block, bfq-mq: limit tags for writes and async I/O + +Asynchronous I/O can easily starve synchronous I/O (both sync reads +and sync writes), by consuming all request tags. Similarly, storms of +synchronous writes, such as those that sync(2) may trigger, can starve +synchronous reads. In their turn, these two problems may also cause +BFQ to loose control on latency for interactive and soft real-time +applications. For example, on a PLEXTOR PX-256M5S SSD, LibreOffice +Writer takes 0.6 seconds to start if the device is idle, but it takes +more than 45 seconds (!) if there are sequential writes in the +background. + +This commit addresses this issue by limiting the maximum percentage of +tags that asynchronous I/O requests and synchronous write requests can +consume. In particular, this commit grants a higher threshold to +synchronous writes, to prevent the latter from being starved by +asynchronous I/O. + +According to the above test, LibreOffice Writer now starts in about +1.2 seconds on average, regardless of the background workload, and +apart from some rare outlier. To check this improvement, run, e.g., +sudo ./comm_startup_lat.sh bfq-mq 5 5 seq 10 "lowriter --terminate_after_init" +for the comm_startup_lat benchmark in the S suite [1]. + +[1] https://github.com/Algodev-github/S + +Signed-off-by: Paolo Valente +--- + block/bfq-mq-iosched.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++ + block/bfq-mq.h | 12 ++++++++ + 2 files changed, 89 insertions(+) + +diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c +index 6ce2c0789046..f384f5566672 100644 +--- a/block/bfq-mq-iosched.c ++++ b/block/bfq-mq-iosched.c +@@ -362,6 +362,82 @@ static struct request *bfq_choose_req(struct bfq_data *bfqd, + } + } + ++/* ++ * See the comments on bfq_limit_depth for the purpose of ++ * the depths set in the function. ++ */ ++static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt) ++{ ++ bfqd->sb_shift = bt->sb.shift; ++ ++ /* ++ * In-word depths if no bfq_queue is being weight-raised: ++ * leaving 25% of tags only for sync reads. ++ * ++ * In next formulas, right-shift the value ++ * (1U<sb_shift), instead of computing directly ++ * (1U<<(bfqd->sb_shift - something)), to be robust against ++ * any possible value of bfqd->sb_shift, without having to ++ * limit 'something'. ++ */ ++ /* no more than 50% of tags for async I/O */ ++ bfqd->word_depths[0][0] = max((1U<sb_shift)>>1, 1U); ++ /* ++ * no more than 75% of tags for sync writes (25% extra tags ++ * w.r.t. async I/O, to prevent async I/O from starving sync ++ * writes) ++ */ ++ bfqd->word_depths[0][1] = max(((1U<sb_shift) * 3)>>2, 1U); ++ ++ /* ++ * In-word depths in case some bfq_queue is being weight- ++ * raised: leaving ~63% of tags for sync reads. This is the ++ * highest percentage for which, in our tests, application ++ * start-up times didn't suffer from any regression due to tag ++ * shortage. ++ */ ++ /* no more than ~18% of tags for async I/O */ ++ bfqd->word_depths[1][0] = max(((1U<sb_shift) * 3)>>4, 1U); ++ /* no more than ~37% of tags for sync writes (~20% extra tags) */ ++ bfqd->word_depths[1][1] = max(((1U<sb_shift) * 6)>>4, 1U); ++} ++ ++/* ++ * Async I/O can easily starve sync I/O (both sync reads and sync ++ * writes), by consuming all tags. Similarly, storms of sync writes, ++ * such as those that sync(2) may trigger, can starve sync reads. ++ * Limit depths of async I/O and sync writes so as to counter both ++ * problems. ++ */ ++static void bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) ++{ ++ struct blk_mq_tags *tags = blk_mq_tags_from_data(data); ++ struct bfq_data *bfqd = data->q->elevator->elevator_data; ++ struct sbitmap_queue *bt; ++ ++ if (op_is_sync(op) && !op_is_write(op)) ++ return; ++ ++ if (data->flags & BLK_MQ_REQ_RESERVED) { ++ if (unlikely(!tags->nr_reserved_tags)) { ++ WARN_ON_ONCE(1); ++ return; ++ } ++ bt = &tags->breserved_tags; ++ } else ++ bt = &tags->bitmap_tags; ++ ++ if (unlikely(bfqd->sb_shift != bt->sb.shift)) ++ bfq_update_depths(bfqd, bt); ++ ++ data->shallow_depth = ++ bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(op)]; ++ ++ bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u", ++ __func__, bfqd->wr_busy_queues, op_is_sync(op), ++ data->shallow_depth); ++} ++ + static struct bfq_queue * + bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, + sector_t sector, struct rb_node **ret_parent, +@@ -5812,6 +5888,7 @@ static struct elv_fs_entry bfq_attrs[] = { + + static struct elevator_type iosched_bfq_mq = { + .ops.mq = { ++ .limit_depth = bfq_limit_depth, + .prepare_request = bfq_prepare_request, + .finish_request = bfq_finish_request, + .exit_icq = bfq_exit_icq, +diff --git a/block/bfq-mq.h b/block/bfq-mq.h +index a5947b203ef2..458099ee0308 100644 +--- a/block/bfq-mq.h ++++ b/block/bfq-mq.h +@@ -619,6 +619,18 @@ struct bfq_data { + struct bfq_queue *bio_bfqq; + /* Extra flag used only for TESTING */ + bool bio_bfqq_set; ++ ++ /* ++ * Cached sbitmap shift, used to compute depth limits in ++ * bfq_update_depths. ++ */ ++ unsigned int sb_shift; ++ ++ /* ++ * Depth limits used in bfq_limit_depth (see comments on the ++ * function) ++ */ ++ unsigned int word_depths[2][2]; + }; + + enum bfqq_state_flags { + +From 402e5f6b59662d290ab2b3c10b0016207a63ad21 Mon Sep 17 00:00:00 2001 +From: Paolo Valente +Date: Thu, 21 Dec 2017 15:51:39 +0100 +Subject: [PATCH 11/23] bfq-sq, bfq-mq: limit sectors served with interactive + weight raising + +To maximise responsiveness, BFQ raises the weight, and performs device +idling, for bfq_queues associated with processes deemed as +interactive. In particular, weight raising has a maximum duration, +equal to the time needed to start a large application. If a +weight-raised process goes on doing I/O beyond this maximum duration, +it loses weight-raising. + +This mechanism is evidently vulnerable to the following false +positives: I/O-bound applications that will go on doing I/O for much +longer than the duration of weight-raising. These applications have +basically no benefit from being weight-raised at the beginning of +their I/O. On the opposite end, while being weight-raised, these +applications +a) unjustly steal throughput to applications that may truly need +low latency; +b) make BFQ uselessly perform device idling; device idling results +in loss of device throughput with most flash-based storage, and may +increase latencies when used purposelessly. + +This commit adds a countermeasure to reduce both the above +problems. To introduce this countermeasure, we provide the following +extra piece of information (full details in the comments added by this +commit). During the start-up of the large application used as a +reference to set the duration of weight-raising, involved processes +transfer at most ~110K sectors each. Accordingly, a process initially +deemed as interactive has no right to be weight-raised any longer, +once transferred 110K sectors or more. + +Basing on this consideration, this commit early-ends weight-raising +for a bfq_queue if the latter happens to have received an amount of +service at least equal to 110K sectors (actually, a little bit more, +to keep a safety margin). I/O-bound applications that reach a high +throughput, such as file copy, get to this threshold much before the +allowed weight-raising period finishes. Thus this early ending of +weight-raising reduces the amount of time during which these +applications cause the problems described above. + +Signed-off-by: Paolo Valente +--- + block/bfq-mq-iosched.c | 84 ++++++++++++++++++++++++++++++++++++++++++++------ + block/bfq-mq.h | 5 +++ + block/bfq-sched.c | 3 ++ + block/bfq-sq-iosched.c | 84 ++++++++++++++++++++++++++++++++++++++++++++------ + block/bfq.h | 5 +++ + 5 files changed, 163 insertions(+), 18 deletions(-) + +diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c +index f384f5566672..63fdd16dec3c 100644 +--- a/block/bfq-mq-iosched.c ++++ b/block/bfq-mq-iosched.c +@@ -162,15 +162,17 @@ static struct kmem_cache *bfq_pool; + * interactive applications automatically, using the following formula: + * duration = (R / r) * T, where r is the peak rate of the device, and + * R and T are two reference parameters. +- * In particular, R is the peak rate of the reference device (see below), +- * and T is a reference time: given the systems that are likely to be +- * installed on the reference device according to its speed class, T is +- * about the maximum time needed, under BFQ and while reading two files in +- * parallel, to load typical large applications on these systems. +- * In practice, the slower/faster the device at hand is, the more/less it +- * takes to load applications with respect to the reference device. +- * Accordingly, the longer/shorter BFQ grants weight raising to interactive +- * applications. ++ * In particular, R is the peak rate of the reference device (see ++ * below), and T is a reference time: given the systems that are ++ * likely to be installed on the reference device according to its ++ * speed class, T is about the maximum time needed, under BFQ and ++ * while reading two files in parallel, to load typical large ++ * applications on these systems (see the comments on ++ * max_service_from_wr below, for more details on how T is obtained). ++ * In practice, the slower/faster the device at hand is, the more/less ++ * it takes to load applications with respect to the reference device. ++ * Accordingly, the longer/shorter BFQ grants weight raising to ++ * interactive applications. + * + * BFQ uses four different reference pairs (R, T), depending on: + * . whether the device is rotational or non-rotational; +@@ -207,6 +209,60 @@ static int T_slow[2]; + static int T_fast[2]; + static int device_speed_thresh[2]; + ++/* ++ * BFQ uses the above-detailed, time-based weight-raising mechanism to ++ * privilege interactive tasks. This mechanism is vulnerable to the ++ * following false positives: I/O-bound applications that will go on ++ * doing I/O for much longer than the duration of weight ++ * raising. These applications have basically no benefit from being ++ * weight-raised at the beginning of their I/O. On the opposite end, ++ * while being weight-raised, these applications ++ * a) unjustly steal throughput to applications that may actually need ++ * low latency; ++ * b) make BFQ uselessly perform device idling; device idling results ++ * in loss of device throughput with most flash-based storage, and may ++ * increase latencies when used purposelessly. ++ * ++ * BFQ tries to reduce these problems, by adopting the following ++ * countermeasure. To introduce this countermeasure, we need first to ++ * finish explaining how the duration of weight-raising for ++ * interactive tasks is computed. ++ * ++ * For a bfq_queue deemed as interactive, the duration of weight ++ * raising is dynamically adjusted, as a function of the estimated ++ * peak rate of the device, so as to be equal to the time needed to ++ * execute the 'largest' interactive task we benchmarked so far. By ++ * largest task, we mean the task for which each involved process has ++ * to do more I/O than for any of the other tasks we benchmarked. This ++ * reference interactive task is the start-up of LibreOffice Writer, ++ * and in this task each process/bfq_queue needs to have at most ~110K ++ * sectors transferred. ++ * ++ * This last piece of information enables BFQ to reduce the actual ++ * duration of weight-raising for at least one class of I/O-bound ++ * applications: those doing sequential or quasi-sequential I/O. An ++ * example is file copy. In fact, once started, the main I/O-bound ++ * processes of these applications usually consume the above 110K ++ * sectors in much less time than the processes of an application that ++ * is starting, because these I/O-bound processes will greedily devote ++ * almost all their CPU cycles only to their target, ++ * throughput-friendly I/O operations. This is even more true if BFQ ++ * happens to be underestimating the device peak rate, and thus ++ * overestimating the duration of weight raising. But, according to ++ * our measurements, once transferred 110K sectors, these processes ++ * have no right to be weight-raised any longer. ++ * ++ * Basing on the last consideration, BFQ ends weight-raising for a ++ * bfq_queue if the latter happens to have received an amount of ++ * service at least equal to the following constant. The constant is ++ * set to slightly more than 110K, to have a minimum safety margin. ++ * ++ * This early ending of weight-raising reduces the amount of time ++ * during which interactive false positives cause the two problems ++ * described at the beginning of these comments. ++ */ ++static const unsigned long max_service_from_wr = 120000; ++ + #define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \ + { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 }) + +@@ -1361,6 +1417,7 @@ static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd, + if (old_wr_coeff == 1 && wr_or_deserves_wr) { + /* start a weight-raising period */ + if (interactive) { ++ bfqq->service_from_wr = 0; + bfqq->wr_coeff = bfqd->bfq_wr_coeff; + bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); + } else { +@@ -3980,6 +4037,15 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) + "back to interactive wr"); + } + } ++ if (bfqq->wr_coeff > 1 && ++ bfqq->wr_cur_max_time != bfqd->bfq_wr_rt_max_time && ++ bfqq->service_from_wr > max_service_from_wr) { ++ /* see comments on max_service_from_wr */ ++ bfq_bfqq_end_wr(bfqq); ++ bfq_log_bfqq(bfqd, bfqq, ++ "[%s] too much service", ++ __func__); ++ } + } + /* + * To improve latency (for this or other queues), immediately +diff --git a/block/bfq-mq.h b/block/bfq-mq.h +index 458099ee0308..9a5ce1168ff5 100644 +--- a/block/bfq-mq.h ++++ b/block/bfq-mq.h +@@ -331,6 +331,11 @@ struct bfq_queue { + * last transition from idle to backlogged. + */ + unsigned long service_from_backlogged; ++ /* ++ * Cumulative service received from the @bfq_queue since its ++ * last transition to weight-raised state. ++ */ ++ unsigned long service_from_wr; + /* + * Value of wr start time when switching to soft rt + */ +diff --git a/block/bfq-sched.c b/block/bfq-sched.c +index 9d261dd428e4..4e6c5232e2fb 100644 +--- a/block/bfq-sched.c ++++ b/block/bfq-sched.c +@@ -942,6 +942,9 @@ static void bfq_bfqq_served(struct bfq_queue *bfqq, int served) + if (!bfqq->service_from_backlogged) + bfqq->first_IO_time = jiffies; + ++ if (bfqq->wr_coeff > 1) ++ bfqq->service_from_wr += served; ++ + bfqq->service_from_backlogged += served; + for_each_entity(entity) { + st = bfq_entity_service_tree(entity); +diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c +index eff4c4edf5a0..486493aafaf8 100644 +--- a/block/bfq-sq-iosched.c ++++ b/block/bfq-sq-iosched.c +@@ -156,15 +156,17 @@ static struct kmem_cache *bfq_pool; + * interactive applications automatically, using the following formula: + * duration = (R / r) * T, where r is the peak rate of the device, and + * R and T are two reference parameters. +- * In particular, R is the peak rate of the reference device (see below), +- * and T is a reference time: given the systems that are likely to be +- * installed on the reference device according to its speed class, T is +- * about the maximum time needed, under BFQ and while reading two files in +- * parallel, to load typical large applications on these systems. +- * In practice, the slower/faster the device at hand is, the more/less it +- * takes to load applications with respect to the reference device. +- * Accordingly, the longer/shorter BFQ grants weight raising to interactive +- * applications. ++ * In particular, R is the peak rate of the reference device (see ++ * below), and T is a reference time: given the systems that are ++ * likely to be installed on the reference device according to its ++ * speed class, T is about the maximum time needed, under BFQ and ++ * while reading two files in parallel, to load typical large ++ * applications on these systems (see the comments on ++ * max_service_from_wr below, for more details on how T is obtained). ++ * In practice, the slower/faster the device at hand is, the more/less ++ * it takes to load applications with respect to the reference device. ++ * Accordingly, the longer/shorter BFQ grants weight raising to ++ * interactive applications. + * + * BFQ uses four different reference pairs (R, T), depending on: + * . whether the device is rotational or non-rotational; +@@ -201,6 +203,60 @@ static int T_slow[2]; + static int T_fast[2]; + static int device_speed_thresh[2]; + ++/* ++ * BFQ uses the above-detailed, time-based weight-raising mechanism to ++ * privilege interactive tasks. This mechanism is vulnerable to the ++ * following false positives: I/O-bound applications that will go on ++ * doing I/O for much longer than the duration of weight ++ * raising. These applications have basically no benefit from being ++ * weight-raised at the beginning of their I/O. On the opposite end, ++ * while being weight-raised, these applications ++ * a) unjustly steal throughput to applications that may actually need ++ * low latency; ++ * b) make BFQ uselessly perform device idling; device idling results ++ * in loss of device throughput with most flash-based storage, and may ++ * increase latencies when used purposelessly. ++ * ++ * BFQ tries to reduce these problems, by adopting the following ++ * countermeasure. To introduce this countermeasure, we need first to ++ * finish explaining how the duration of weight-raising for ++ * interactive tasks is computed. ++ * ++ * For a bfq_queue deemed as interactive, the duration of weight ++ * raising is dynamically adjusted, as a function of the estimated ++ * peak rate of the device, so as to be equal to the time needed to ++ * execute the 'largest' interactive task we benchmarked so far. By ++ * largest task, we mean the task for which each involved process has ++ * to do more I/O than for any of the other tasks we benchmarked. This ++ * reference interactive task is the start-up of LibreOffice Writer, ++ * and in this task each process/bfq_queue needs to have at most ~110K ++ * sectors transfered. ++ * ++ * This last piece of information enables BFQ to reduce the actual ++ * duration of weight-raising for at least one class of I/O-bound ++ * applications: those doing sequential or quasi-sequential I/O. An ++ * example is file copy. In fact, once started, the main I/O-bound ++ * processes of these applications usually consume the above 110K ++ * sectors in much less time than the processes of an application that ++ * is starting, because these I/O-bound processes will greedily devote ++ * almost all their CPU cycles only to their target, ++ * throughput-friendly I/O operations. This is even more true if BFQ ++ * happens to be underestimating the device peak rate, and thus ++ * overestimating the duration of weight raising. But, according to ++ * our measurements, once transferred 110K sectors, these processes ++ * have no right to be weight-raised any longer. ++ * ++ * Basing on the last consideration, BFQ ends weight-raising for a ++ * bfq_queue if the latter happens to have received an amount of ++ * service at least equal to the following constant. The constant is ++ * set to slightly more than 110K, to have a minimum safety margin. ++ * ++ * This early ending of weight-raising reduces the amount of time ++ * during which interactive false positives cause the two problems ++ * described at the beginning of these comments. ++ */ ++static const unsigned long max_service_from_wr = 120000; ++ + #define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \ + { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 }) + +@@ -1246,6 +1302,7 @@ static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd, + if (old_wr_coeff == 1 && wr_or_deserves_wr) { + /* start a weight-raising period */ + if (interactive) { ++ bfqq->service_from_wr = 0; + bfqq->wr_coeff = bfqd->bfq_wr_coeff; + bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); + } else { +@@ -3794,6 +3851,15 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) + "back to interactive wr"); + } + } ++ if (bfqq->wr_coeff > 1 && ++ bfqq->wr_cur_max_time != bfqd->bfq_wr_rt_max_time && ++ bfqq->service_from_wr > max_service_from_wr) { ++ /* see comments on max_service_from_wr */ ++ bfq_bfqq_end_wr(bfqq); ++ bfq_log_bfqq(bfqd, bfqq, ++ "[%s] too much service", ++ __func__); ++ } + } + /* + * To improve latency (for this or other queues), immediately +diff --git a/block/bfq.h b/block/bfq.h +index 59539adc00a5..0cd7a3f251a7 100644 +--- a/block/bfq.h ++++ b/block/bfq.h +@@ -323,6 +323,11 @@ struct bfq_queue { + * last transition from idle to backlogged. + */ + unsigned long service_from_backlogged; ++ /* ++ * Cumulative service received from the @bfq_queue since its ++ * last transition to weight-raised state. ++ */ ++ unsigned long service_from_wr; + /* + * Value of wr start time when switching to soft rt + */ + +From 59efebb94b2f9bac653faf62dadb45b83bd27fa7 Mon Sep 17 00:00:00 2001 +From: Paolo Valente +Date: Thu, 4 Jan 2018 16:29:58 +0100 +Subject: [PATCH 12/23] bfq-sq, bfq-mq: put async queues for root bfq groups + too +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +For each pair [device for which bfq is selected as I/O scheduler, +group in blkio/io], bfq maintains a corresponding bfq group. Each such +bfq group contains a set of async queues, with each async queue +created on demand, i.e., when some I/O request arrives for it. On +creation, an async queue gets an extra reference, to make sure that +the queue is not freed as long as its bfq group exists. Accordingly, +to allow the queue to be freed after the group exited, this extra +reference must released on group exit. + +The above holds also for a bfq root group, i.e., for the bfq group +corresponding to the root blkio/io root for a given device. Yet, by +mistake, the references to the existing async queues of a root group +are not released when the latter exits. This causes a memory leak when +the instance of bfq for a given device exits. In a similar vein, +bfqg_stats_xfer_dead is not executed for a root group. + +This commit fixes bfq_pd_offline so that the latter executes the above +missing operations for a root group too. + +Reported-by: Holger Hoffstätte +Reported-by: Guoqing Jiang +Signed-off-by: Davide Ferrari +Signed-off-by: Paolo Valente +--- + block/bfq-cgroup-included.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/block/bfq-cgroup-included.c b/block/bfq-cgroup-included.c +index 562b0ce581a7..45fefb2e2d57 100644 +--- a/block/bfq-cgroup-included.c ++++ b/block/bfq-cgroup-included.c +@@ -885,13 +885,13 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) + + entity = bfqg->my_entity; + +- if (!entity) /* root group */ +- return; +- + #ifdef BFQ_MQ + spin_lock_irqsave(&bfqd->lock, flags); + #endif + ++ if (!entity) /* root group */ ++ goto put_async_queues; ++ + /* + * Empty all service_trees belonging to this group before + * deactivating the group itself. +@@ -926,6 +926,8 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) + BUG_ON(bfqg->sched_data.in_service_entity); + + __bfq_deactivate_entity(entity, false); ++ ++put_async_queues: + bfq_put_async_queues(bfqd, bfqg); + + #ifdef BFQ_MQ + +From 2dfbaaaf95054e2da3ededc0deb1ba5a4f589e53 Mon Sep 17 00:00:00 2001 +From: Paolo Valente +Date: Mon, 8 Jan 2018 19:38:45 +0100 +Subject: [PATCH 13/23] bfq-sq, bfq-mq: release oom-queue ref to root group on + exit + +On scheduler init, a reference to the root group, and a reference to +its corresponding blkg are taken for the oom queue. Yet these +references are not released on scheduler exit, which prevents these +objects from be freed. This commit adds the missing reference +releases. + +Reported-by: Davide Ferrari +Signed-off-by: Paolo Valente +--- + block/bfq-mq-iosched.c | 3 +++ + block/bfq-sq-iosched.c | 3 +++ + 2 files changed, 6 insertions(+) + +diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c +index 63fdd16dec3c..b82c52fabf91 100644 +--- a/block/bfq-mq-iosched.c ++++ b/block/bfq-mq-iosched.c +@@ -5507,6 +5507,9 @@ static void bfq_exit_queue(struct elevator_queue *e) + + BUG_ON(hrtimer_active(&bfqd->idle_slice_timer)); + ++ /* release oom-queue reference to root group */ ++ bfqg_and_blkg_put(bfqd->root_group); ++ + #ifdef BFQ_GROUP_IOSCHED_ENABLED + blkcg_deactivate_policy(bfqd->queue, &blkcg_policy_bfq); + #else +diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c +index 486493aafaf8..851af055664d 100644 +--- a/block/bfq-sq-iosched.c ++++ b/block/bfq-sq-iosched.c +@@ -5052,6 +5052,9 @@ static void bfq_exit_queue(struct elevator_queue *e) + + BUG_ON(hrtimer_active(&bfqd->idle_slice_timer)); + ++ /* release oom-queue reference to root group */ ++ bfqg_put(bfqd->root_group); ++ + #ifdef BFQ_GROUP_IOSCHED_ENABLED + blkcg_deactivate_policy(q, &blkcg_policy_bfq); + #else + +From 13efe00c8292d78d223e1090a7f36426e360eb38 Mon Sep 17 00:00:00 2001 +From: Paolo Valente +Date: Mon, 8 Jan 2018 19:40:38 +0100 +Subject: [PATCH 14/23] block, bfq-sq, bfq-mq: trace get and put of bfq groups + +Signed-off-by: Paolo Valente +--- + block/bfq-cgroup-included.c | 15 +++++++++++++++ + block/bfq-mq-iosched.c | 3 ++- + 2 files changed, 17 insertions(+), 1 deletion(-) + +diff --git a/block/bfq-cgroup-included.c b/block/bfq-cgroup-included.c +index 45fefb2e2d57..f94743fb2e7d 100644 +--- a/block/bfq-cgroup-included.c ++++ b/block/bfq-cgroup-included.c +@@ -267,6 +267,8 @@ static struct bfq_group *bfqq_group(struct bfq_queue *bfqq) + + static void bfqg_get(struct bfq_group *bfqg) + { ++ trace_printk("bfqg %p\n", bfqg); ++ + #ifdef BFQ_MQ + bfqg->ref++; + #else +@@ -280,6 +282,9 @@ static void bfqg_put(struct bfq_group *bfqg) + bfqg->ref--; + + BUG_ON(bfqg->ref < 0); ++ trace_printk("putting bfqg %p %s\n", bfqg, ++ bfqg->ref == 0 ? "and freeing it" : ""); ++ + if (bfqg->ref == 0) + kfree(bfqg); + #else +@@ -293,6 +298,7 @@ static void bfqg_and_blkg_get(struct bfq_group *bfqg) + /* see comments in bfq_bic_update_cgroup for why refcounting bfqg */ + bfqg_get(bfqg); + ++ trace_printk("getting blkg for bfqg %p\n", bfqg); + blkg_get(bfqg_to_blkg(bfqg)); + } + +@@ -300,6 +306,7 @@ static void bfqg_and_blkg_put(struct bfq_group *bfqg) + { + bfqg_put(bfqg); + ++ trace_printk("putting blkg for bfqg %p\n", bfqg); + blkg_put(bfqg_to_blkg(bfqg)); + } + #endif +@@ -382,6 +389,8 @@ static void bfq_init_entity(struct bfq_entity *entity, + * Make sure that bfqg and its associated blkg do not + * disappear before entity. + */ ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "[%s] getting bfqg %p and blkg\n", __func__, bfqg); ++ + bfqg_and_blkg_get(bfqg); + #else + bfqg_get(bfqg); +@@ -475,6 +484,7 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node) + kfree(bfqg); + return NULL; + } ++ trace_printk("bfqg %p\n", bfqg); + + #ifdef BFQ_MQ + /* see comments in bfq_bic_update_cgroup for why refcounting */ +@@ -513,6 +523,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd) + static void bfq_pd_free(struct blkg_policy_data *pd) + { + struct bfq_group *bfqg = pd_to_bfqg(pd); ++ trace_printk("bfqg %p\n", bfqg); + + bfqg_stats_exit(&bfqg->stats); + #ifdef BFQ_MQ +@@ -650,6 +661,8 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, + bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); + } + #ifdef BFQ_MQ ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "[%s] putting blkg and bfqg %p\n", __func__, bfqg); ++ + bfqg_and_blkg_put(bfqq_group(bfqq)); + #else + bfqg_put(bfqq_group(bfqq)); +@@ -658,6 +671,8 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, + entity->parent = bfqg->my_entity; + entity->sched_data = &bfqg->sched_data; + #ifdef BFQ_MQ ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "[%s] getting blkg and bfqg %p\n", __func__, bfqg); ++ + /* pin down bfqg and its associated blkg */ + bfqg_and_blkg_get(bfqg); + #else +diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c +index b82c52fabf91..d5b7a6b985d7 100644 +--- a/block/bfq-mq-iosched.c ++++ b/block/bfq-mq-iosched.c +@@ -4385,10 +4385,11 @@ static void bfq_put_queue(struct bfq_queue *bfqq) + if (bfqq->bfqd) + bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p freed", bfqq); + +- kmem_cache_free(bfq_pool, bfqq); + #ifdef BFQ_GROUP_IOSCHED_ENABLED ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "[%s] putting blkg and bfqg %p\n", __func__, bfqg); + bfqg_and_blkg_put(bfqg); + #endif ++ kmem_cache_free(bfq_pool, bfqq); + } + + static void bfq_put_cooperator(struct bfq_queue *bfqq) + +From 816b77fba966171974eb5ee25d81bc4e19eaf1b4 Mon Sep 17 00:00:00 2001 +From: Paolo Valente +Date: Wed, 10 Jan 2018 09:08:22 +0100 +Subject: [PATCH 15/23] bfq-sq, bfq-mq: compile group put for oom queue only if + BFQ_GROUP_IOSCHED is set + +Commit ("bfq-sq, bfq-mq: release oom-queue ref to root group on exit") +added a missing put of the root bfq group for the oom queue. That put +has to be, and can be, performed only if CONFIG_BFQ_GROUP_IOSCHED is +defined: the function doing the put is even not defined at all if +CONFIG_BFQ_GROUP_IOSCHED is not defined. But that commit makes that +put be invoked regardless of whether CONFIG_BFQ_GROUP_IOSCHED is +defined. This commit fixes this mistake, by making that invocation be +compiled only if CONFIG_BFQ_GROUP_IOSCHED is actually defined. + +Fixes ("block, bfq: release oom-queue ref to root group on exit") +Reported-by: Jan Alexander Steffens +Signed-off-by: Paolo Valente +--- + block/bfq-mq-iosched.c | 2 +- + block/bfq-sq-iosched.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c +index d5b7a6b985d7..2581fe0f6f2f 100644 +--- a/block/bfq-mq-iosched.c ++++ b/block/bfq-mq-iosched.c +@@ -5508,10 +5508,10 @@ static void bfq_exit_queue(struct elevator_queue *e) + + BUG_ON(hrtimer_active(&bfqd->idle_slice_timer)); + ++#ifdef BFQ_GROUP_IOSCHED_ENABLED + /* release oom-queue reference to root group */ + bfqg_and_blkg_put(bfqd->root_group); + +-#ifdef BFQ_GROUP_IOSCHED_ENABLED + blkcg_deactivate_policy(bfqd->queue, &blkcg_policy_bfq); + #else + spin_lock_irq(&bfqd->lock); +diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c +index 851af055664d..c4df156b1fb4 100644 +--- a/block/bfq-sq-iosched.c ++++ b/block/bfq-sq-iosched.c +@@ -5052,10 +5052,10 @@ static void bfq_exit_queue(struct elevator_queue *e) + + BUG_ON(hrtimer_active(&bfqd->idle_slice_timer)); + ++#ifdef BFQ_GROUP_IOSCHED_ENABLED + /* release oom-queue reference to root group */ + bfqg_put(bfqd->root_group); + +-#ifdef BFQ_GROUP_IOSCHED_ENABLED + blkcg_deactivate_policy(q, &blkcg_policy_bfq); + #else + bfq_put_async_queues(bfqd, bfqd->root_group); + +From 643a89c659172b2c9ae16adfe03af4e3e88e1326 Mon Sep 17 00:00:00 2001 +From: Paolo Valente +Date: Sat, 13 Jan 2018 18:48:41 +0100 +Subject: [PATCH 16/23] block, bfq-sq, bfq-mq: remove trace_printks + +Commit ("block, bfq-sq, bfq-mq: trace get and put of bfq groups") +unwisely added some invocations of the function trace_printk, which +is inappropriate in production kernels. This commit removes those +invocations. + +Signed-off-by: Paolo Valente +--- + block/bfq-cgroup-included.c | 10 ---------- + 1 file changed, 10 deletions(-) + +diff --git a/block/bfq-cgroup-included.c b/block/bfq-cgroup-included.c +index f94743fb2e7d..a4f8a03edfc9 100644 +--- a/block/bfq-cgroup-included.c ++++ b/block/bfq-cgroup-included.c +@@ -267,8 +267,6 @@ static struct bfq_group *bfqq_group(struct bfq_queue *bfqq) + + static void bfqg_get(struct bfq_group *bfqg) + { +- trace_printk("bfqg %p\n", bfqg); +- + #ifdef BFQ_MQ + bfqg->ref++; + #else +@@ -282,9 +280,6 @@ static void bfqg_put(struct bfq_group *bfqg) + bfqg->ref--; + + BUG_ON(bfqg->ref < 0); +- trace_printk("putting bfqg %p %s\n", bfqg, +- bfqg->ref == 0 ? "and freeing it" : ""); +- + if (bfqg->ref == 0) + kfree(bfqg); + #else +@@ -298,7 +293,6 @@ static void bfqg_and_blkg_get(struct bfq_group *bfqg) + /* see comments in bfq_bic_update_cgroup for why refcounting bfqg */ + bfqg_get(bfqg); + +- trace_printk("getting blkg for bfqg %p\n", bfqg); + blkg_get(bfqg_to_blkg(bfqg)); + } + +@@ -306,7 +300,6 @@ static void bfqg_and_blkg_put(struct bfq_group *bfqg) + { + bfqg_put(bfqg); + +- trace_printk("putting blkg for bfqg %p\n", bfqg); + blkg_put(bfqg_to_blkg(bfqg)); + } + #endif +@@ -484,8 +477,6 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node) + kfree(bfqg); + return NULL; + } +- trace_printk("bfqg %p\n", bfqg); +- + #ifdef BFQ_MQ + /* see comments in bfq_bic_update_cgroup for why refcounting */ + bfqg_get(bfqg); +@@ -523,7 +514,6 @@ static void bfq_pd_init(struct blkg_policy_data *pd) + static void bfq_pd_free(struct blkg_policy_data *pd) + { + struct bfq_group *bfqg = pd_to_bfqg(pd); +- trace_printk("bfqg %p\n", bfqg); + + bfqg_stats_exit(&bfqg->stats); + #ifdef BFQ_MQ + +From ce050275e24fecec800f346c09d9494563e9fc8a Mon Sep 17 00:00:00 2001 +From: Paolo Valente +Date: Mon, 15 Jan 2018 15:07:05 +0100 +Subject: [PATCH 17/23] block, bfq-mq: add requeue-request hook + +Commit 'a6a252e64914 ("blk-mq-sched: decide how to handle flush rq via +RQF_FLUSH_SEQ")' makes all non-flush re-prepared requests for a device +be re-inserted into the active I/O scheduler for that device. As a +consequence, I/O schedulers may get the same request inserted again, +even several times, without a finish_request invoked on that request +before each re-insertion. + +This fact is the cause of the failure reported in [1]. For an I/O +scheduler, every re-insertion of the same re-prepared request is +equivalent to the insertion of a new request. For schedulers like +mq-deadline or kyber, this fact causes no harm. In contrast, it +confuses a stateful scheduler like BFQ, which keeps state for an I/O +request, until the finish_request hook is invoked on the request. In +particular, BFQ may get stuck, waiting forever for the number of +request dispatches, of the same request, to be balanced by an equal +number of request completions (while there will be one completion for +that request). In this state, BFQ may refuse to serve I/O requests +from other bfq_queues. The hang reported in [1] then follows. + +However, the above re-prepared requests undergo a requeue, thus the +requeue_request hook of the active elevator is invoked for these +requests, if set. This commit then addresses the above issue by +properly implementing the hook requeue_request in BFQ. + +[1] https://marc.info/?l=linux-block&m=151211117608676 + +Reported-by: Ivan Kozik +Reported-by: Alban Browaeys +Signed-off-by: Paolo Valente +Signed-off-by: Serena Ziviani +--- + block/bfq-mq-iosched.c | 90 ++++++++++++++++++++++++++++++++++++++++---------- + 1 file changed, 73 insertions(+), 17 deletions(-) + +diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c +index 2581fe0f6f2f..bb7ccc2f1165 100644 +--- a/block/bfq-mq-iosched.c ++++ b/block/bfq-mq-iosched.c +@@ -4162,9 +4162,9 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) + * TESTING: reset DISP_LIST flag, because: 1) + * this rq this request has passed through + * bfq_prepare_request, 2) then it will have +- * bfq_finish_request invoked on it, and 3) in +- * bfq_finish_request we use this flag to check +- * that bfq_finish_request is not invoked on ++ * bfq_finish_requeue_request invoked on it, and 3) in ++ * bfq_finish_requeue_request we use this flag to check ++ * that bfq_finish_requeue_request is not invoked on + * requests for which bfq_prepare_request has + * been invoked. + */ +@@ -4173,8 +4173,8 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) + } + + /* +- * We exploit the bfq_finish_request hook to decrement +- * rq_in_driver, but bfq_finish_request will not be ++ * We exploit the bfq_finish_requeue_request hook to decrement ++ * rq_in_driver, but bfq_finish_requeue_request will not be + * invoked on this request. So, to avoid unbalance, + * just start this request, without incrementing + * rq_in_driver. As a negative consequence, +@@ -4183,10 +4183,10 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) + * bfq_schedule_dispatch to be invoked uselessly. + * + * As for implementing an exact solution, the +- * bfq_finish_request hook, if defined, is probably ++ * bfq_finish_requeue_request hook, if defined, is probably + * invoked also on this request. So, by exploiting + * this hook, we could 1) increment rq_in_driver here, +- * and 2) decrement it in bfq_finish_request. Such a ++ * and 2) decrement it in bfq_finish_requeue_request. Such a + * solution would let the value of the counter be + * always accurate, but it would entail using an extra + * interface function. This cost seems higher than the +@@ -4878,6 +4878,8 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) + return idle_timer_disabled; + } + ++static void bfq_prepare_request(struct request *rq, struct bio *bio); ++ + static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, + bool at_head) + { +@@ -4919,6 +4921,20 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, + BUG_ON(!(rq->rq_flags & RQF_GOT)); + rq->rq_flags &= ~RQF_GOT; + ++ if (!bfqq) { ++ /* ++ * This should never happen. Most likely rq is ++ * a requeued regular request, being ++ * re-inserted without being first ++ * re-prepared. Do a prepare, to avoid ++ * failure. ++ */ ++ pr_warn("Regular request associated with no queue"); ++ WARN_ON(1); ++ bfq_prepare_request(rq, rq->bio); ++ bfqq = RQ_BFQQ(rq); ++ } ++ + #if defined(BFQ_GROUP_IOSCHED_ENABLED) && defined(CONFIG_DEBUG_BLK_CGROUP) + idle_timer_disabled = __bfq_insert_request(bfqd, rq); + /* +@@ -5110,7 +5126,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) + } + } + +-static void bfq_finish_request_body(struct bfq_queue *bfqq) ++static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq) + { + bfq_log_bfqq(bfqq->bfqd, bfqq, + "put_request_body: allocated %d", bfqq->allocated); +@@ -5120,7 +5136,13 @@ static void bfq_finish_request_body(struct bfq_queue *bfqq) + bfq_put_queue(bfqq); + } + +-static void bfq_finish_request(struct request *rq) ++/* ++ * Handle either a requeue or a finish for rq. The things to do are ++ * the same in both cases: all references to rq are to be dropped. In ++ * particular, rq is considered completed from the point of view of ++ * the scheduler. ++ */ ++static void bfq_finish_requeue_request(struct request *rq) + { + struct bfq_queue *bfqq; + struct bfq_data *bfqd; +@@ -5128,11 +5150,27 @@ static void bfq_finish_request(struct request *rq) + + BUG_ON(!rq); + +- if (!rq->elv.icq) ++ bfqq = RQ_BFQQ(rq); ++ ++ /* ++ * Requeue and finish hooks are invoked in blk-mq without ++ * checking whether the involved request is actually still ++ * referenced in the scheduler. To handle this fact, the ++ * following two checks make this function exit in case of ++ * spurious invocations, for which there is nothing to do. ++ * ++ * First, check whether rq has nothing to do with an elevator. ++ */ ++ if (unlikely(!(rq->rq_flags & RQF_ELVPRIV))) + return; + +- bfqq = RQ_BFQQ(rq); +- BUG_ON(!bfqq); ++ /* ++ * rq either is not associated with any icq, or is an already ++ * requeued request that has not (yet) been re-inserted into ++ * a bfq_queue. ++ */ ++ if (!rq->elv.icq || !bfqq) ++ return; + + bic = RQ_BIC(rq); + BUG_ON(!bic); +@@ -5145,7 +5183,6 @@ static void bfq_finish_request(struct request *rq) + BUG(); + } + BUG_ON(rq->rq_flags & RQF_QUEUED); +- BUG_ON(!(rq->rq_flags & RQF_ELVPRIV)); + + bfq_log_bfqq(bfqd, bfqq, + "putting rq %p with %u sects left, STARTED %d", +@@ -5166,13 +5203,14 @@ static void bfq_finish_request(struct request *rq) + spin_lock_irqsave(&bfqd->lock, flags); + + bfq_completed_request(bfqq, bfqd); +- bfq_finish_request_body(bfqq); ++ bfq_finish_requeue_request_body(bfqq); + + spin_unlock_irqrestore(&bfqd->lock, flags); + } else { + /* + * Request rq may be still/already in the scheduler, +- * in which case we need to remove it. And we cannot ++ * in which case we need to remove it (this should ++ * never happen in case of requeue). And we cannot + * defer such a check and removal, to avoid + * inconsistencies in the time interval from the end + * of this function to the start of the deferred work. +@@ -5189,9 +5227,26 @@ static void bfq_finish_request(struct request *rq) + bfqg_stats_update_io_remove(bfqq_group(bfqq), + rq->cmd_flags); + } +- bfq_finish_request_body(bfqq); ++ bfq_finish_requeue_request_body(bfqq); + } + ++ /* ++ * Reset private fields. In case of a requeue, this allows ++ * this function to correctly do nothing if it is spuriously ++ * invoked again on this same request (see the check at the ++ * beginning of the function). Probably, a better general ++ * design would be to prevent blk-mq from invoking the requeue ++ * or finish hooks of an elevator, for a request that is not ++ * referred by that elevator. ++ * ++ * Resetting the following fields would break the ++ * request-insertion logic if rq is re-inserted into a bfq ++ * internal queue, without a re-preparation. Here we assume ++ * that re-insertions of requeued requests, without ++ * re-preparation, can happen only for pass_through or at_head ++ * requests (which are not re-inserted into bfq internal ++ * queues). ++ */ + rq->elv.priv[0] = NULL; + rq->elv.priv[1] = NULL; + } +@@ -5960,7 +6015,8 @@ static struct elevator_type iosched_bfq_mq = { + .ops.mq = { + .limit_depth = bfq_limit_depth, + .prepare_request = bfq_prepare_request, +- .finish_request = bfq_finish_request, ++ .requeue_request = bfq_finish_requeue_request, ++ .finish_request = bfq_finish_requeue_request, + .exit_icq = bfq_exit_icq, + .insert_requests = bfq_insert_requests, + .dispatch_request = bfq_dispatch_request, + +From 3e4f292191cc62b3844316b9741534c3f1b36f0a Mon Sep 17 00:00:00 2001 +From: Davide Paganelli +Date: Thu, 8 Feb 2018 12:19:24 +0100 +Subject: [PATCH 18/23] block, bfq-mq, bfq-sq: make log functions print names + of calling functions + +Add the macro __func__ as a parameter to the invocations of the functions +pr_crit, blk_add_trace_msg and blk_add_cgroup_trace_msg in bfq_log* +functions, in order to include automatically in the log messages +the names of the functions that call the log functions. +The programmer can then avoid doing it. + +Signed-off-by: Davide Paganelli +Signed-off-by: Paolo Valente +--- + block/bfq-cgroup-included.c | 9 +-- + block/bfq-mq-iosched.c | 167 ++++++++++++++++++++++---------------------- + block/bfq-mq.h | 33 ++++----- + block/bfq-sched.c | 54 +++++++------- + block/bfq-sq-iosched.c | 134 +++++++++++++++++------------------ + block/bfq.h | 33 ++++----- + 6 files changed, 214 insertions(+), 216 deletions(-) + +diff --git a/block/bfq-cgroup-included.c b/block/bfq-cgroup-included.c +index a4f8a03edfc9..613f154e9da5 100644 +--- a/block/bfq-cgroup-included.c ++++ b/block/bfq-cgroup-included.c +@@ -382,7 +382,8 @@ static void bfq_init_entity(struct bfq_entity *entity, + * Make sure that bfqg and its associated blkg do not + * disappear before entity. + */ +- bfq_log_bfqq(bfqq->bfqd, bfqq, "[%s] getting bfqg %p and blkg\n", __func__, bfqg); ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "getting bfqg %p and blkg\n", ++ bfqg); + + bfqg_and_blkg_get(bfqg); + #else +@@ -651,7 +652,7 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, + bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); + } + #ifdef BFQ_MQ +- bfq_log_bfqq(bfqq->bfqd, bfqq, "[%s] putting blkg and bfqg %p\n", __func__, bfqg); ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "putting blkg and bfqg %p\n", bfqg); + + bfqg_and_blkg_put(bfqq_group(bfqq)); + #else +@@ -661,7 +662,7 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, + entity->parent = bfqg->my_entity; + entity->sched_data = &bfqg->sched_data; + #ifdef BFQ_MQ +- bfq_log_bfqq(bfqq->bfqd, bfqq, "[%s] getting blkg and bfqg %p\n", __func__, bfqg); ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "getting blkg and bfqg %p\n", bfqg); + + /* pin down bfqg and its associated blkg */ + bfqg_and_blkg_get(bfqg); +@@ -721,7 +722,7 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, + if (entity->sched_data != &bfqg->sched_data) { + bic_set_bfqq(bic, NULL, 0); + bfq_log_bfqq(bfqd, async_bfqq, +- "bic_change_group: %p %d", ++ "%p %d", + async_bfqq, + async_bfqq->ref); + bfq_put_queue(async_bfqq); +diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c +index bb7ccc2f1165..edc93b6af186 100644 +--- a/block/bfq-mq-iosched.c ++++ b/block/bfq-mq-iosched.c +@@ -310,7 +310,7 @@ static struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd, + static void bfq_schedule_dispatch(struct bfq_data *bfqd) + { + if (bfqd->queued != 0) { +- bfq_log(bfqd, "schedule dispatch"); ++ bfq_log(bfqd, ""); + blk_mq_run_hw_queues(bfqd->queue, true); + } + } +@@ -489,8 +489,8 @@ static void bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) + data->shallow_depth = + bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(op)]; + +- bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u", +- __func__, bfqd->wr_busy_queues, op_is_sync(op), ++ bfq_log(bfqd, "wr_busy %d sync %d depth %u", ++ bfqd->wr_busy_queues, op_is_sync(op), + data->shallow_depth); + } + +@@ -528,7 +528,7 @@ bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, + if (rb_link) + *rb_link = p; + +- bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d", ++ bfq_log(bfqd, "%llu: returning %d", + (unsigned long long) sector, + bfqq ? bfqq->pid : 0); + +@@ -749,7 +749,7 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq, + if (rq == last || ktime_get_ns() < rq->fifo_time) + return NULL; + +- bfq_log_bfqq(bfqq->bfqd, bfqq, "check_fifo: returned %p", rq); ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "returned %p", rq); + BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); + return rq; + } +@@ -842,7 +842,7 @@ static void bfq_updated_next_req(struct bfq_data *bfqd, + bfq_serv_to_charge(next_rq, bfqq)); + if (entity->budget != new_budget) { + entity->budget = new_budget; +- bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu", ++ bfq_log_bfqq(bfqd, bfqq, "new budget %lu", + new_budget); + bfq_requeue_bfqq(bfqd, bfqq, false); + } +@@ -915,8 +915,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, + BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish)); + + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "[%s] bic %p wr_coeff %d start_finish %lu max_time %lu", +- __func__, ++ "bic %p wr_coeff %d start_finish %lu max_time %lu", + bic, bfqq->wr_coeff, bfqq->last_wr_start_finish, + bfqq->wr_cur_max_time); + +@@ -929,11 +928,11 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, + bfq_wr_duration(bfqd))) { + switch_back_to_interactive_wr(bfqq, bfqd); + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "resume state: switching back to interactive"); ++ "switching back to interactive"); + } else { + bfqq->wr_coeff = 1; + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "resume state: switching off wr (%lu + %lu < %lu)", ++ "switching off wr (%lu + %lu < %lu)", + bfqq->last_wr_start_finish, bfqq->wr_cur_max_time, + jiffies); + } +@@ -985,7 +984,7 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) + /* Increment burst size to take into account also bfqq */ + bfqd->burst_size++; + +- bfq_log_bfqq(bfqd, bfqq, "add_to_burst %d", bfqd->burst_size); ++ bfq_log_bfqq(bfqd, bfqq, "%d", bfqd->burst_size); + + BUG_ON(bfqd->burst_size > bfqd->bfq_large_burst_thresh); + +@@ -998,7 +997,7 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) + * other to consider this burst as large. + */ + bfqd->large_burst = true; +- bfq_log_bfqq(bfqd, bfqq, "add_to_burst: large burst started"); ++ bfq_log_bfqq(bfqd, bfqq, "large burst started"); + + /* + * We can now mark all queues in the burst list as +@@ -1170,7 +1169,7 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) + bfqd->large_burst = false; + bfq_reset_burst_list(bfqd, bfqq); + bfq_log_bfqq(bfqd, bfqq, +- "handle_burst: late activation or different group"); ++ "late activation or different group"); + goto end; + } + +@@ -1180,7 +1179,7 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) + * bfqq as belonging to this large burst immediately. + */ + if (bfqd->large_burst) { +- bfq_log_bfqq(bfqd, bfqq, "handle_burst: marked in burst"); ++ bfq_log_bfqq(bfqd, bfqq, "marked in burst"); + bfq_mark_bfqq_in_large_burst(bfqq); + goto end; + } +@@ -1686,7 +1685,7 @@ static void bfq_add_request(struct request *rq) + unsigned int old_wr_coeff = bfqq->wr_coeff; + bool interactive = false; + +- bfq_log_bfqq(bfqd, bfqq, "add_request: size %u %s", ++ bfq_log_bfqq(bfqd, bfqq, "size %u %s", + blk_rq_sectors(rq), rq_is_sync(rq) ? "S" : "A"); + + if (bfqq->wr_coeff > 1) /* queue is being weight-raised */ +@@ -1952,7 +1951,7 @@ static int bfq_request_merge(struct request_queue *q, struct request **req, + __rq = bfq_find_rq_fmerge(bfqd, bio, q); + if (__rq && elv_bio_merge_ok(__rq, bio)) { + *req = __rq; +- bfq_log(bfqd, "request_merge: req %p", __rq); ++ bfq_log(bfqd, "req %p", __rq); + + return ELEVATOR_FRONT_MERGE; + } +@@ -1989,7 +1988,7 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, + bfqq->next_rq = next_rq; + + bfq_log_bfqq(bfqd, bfqq, +- "request_merged: req %p prev %p next_rq %p bfqq %p", ++ "req %p prev %p next_rq %p bfqq %p", + req, prev, next_rq, bfqq); + + /* +@@ -2018,7 +2017,7 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq, + goto end; + + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "requests_merged: rq %p next %p bfqq %p next_bfqq %p", ++ "rq %p next %p bfqq %p next_bfqq %p", + rq, next, bfqq, next_bfqq); + + spin_lock_irq(&bfqq->bfqd->lock); +@@ -2069,10 +2068,10 @@ static void bfq_bfqq_end_wr(struct bfq_queue *bfqq) + */ + bfqq->entity.prio_changed = 1; + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "end_wr: wrais ending at %lu, rais_max_time %u", ++ "wrais ending at %lu, rais_max_time %u", + bfqq->last_wr_start_finish, + jiffies_to_msecs(bfqq->wr_cur_max_time)); +- bfq_log_bfqq(bfqq->bfqd, bfqq, "end_wr: wr_busy %d", ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "wr_busy %d", + bfqq->bfqd->wr_busy_queues); + } + +@@ -2245,8 +2244,8 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, + { + if (bfq_too_late_for_merging(new_bfqq)) { + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "[%s] too late for bfq%d to be merged", +- __func__, new_bfqq->pid); ++ "too late for bfq%d to be merged", ++ new_bfqq->pid); + return false; + } + +@@ -2395,8 +2394,7 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) + } + BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish)); + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "[%s] bic %p wr_coeff %d start_finish %lu max_time %lu", +- __func__, ++ "bic %p wr_coeff %d start_finish %lu max_time %lu", + bic, bfqq->wr_coeff, bfqq->last_wr_start_finish, + bfqq->wr_cur_max_time); + } +@@ -2453,7 +2451,7 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, + + } + +- bfq_log_bfqq(bfqd, new_bfqq, "merge_bfqqs: wr_busy %d", ++ bfq_log_bfqq(bfqd, new_bfqq, "wr_busy %d", + bfqd->wr_busy_queues); + + /* +@@ -2554,7 +2552,7 @@ static void bfq_set_budget_timeout(struct bfq_data *bfqd, + bfqq->budget_timeout = jiffies + + bfqd->bfq_timeout * timeout_coeff; + +- bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u", ++ bfq_log_bfqq(bfqd, bfqq, "%u", + jiffies_to_msecs(bfqd->bfq_timeout * timeout_coeff)); + } + +@@ -2620,10 +2618,10 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd, + + bfq_set_budget_timeout(bfqd, bfqq); + bfq_log_bfqq(bfqd, bfqq, +- "set_in_service_queue, cur-budget = %d", ++ "cur-budget = %d", + bfqq->entity.budget); + } else +- bfq_log(bfqd, "set_in_service_queue: NULL"); ++ bfq_log(bfqd, "NULL"); + + bfqd->in_service_queue = bfqq; + } +@@ -2746,7 +2744,7 @@ static void bfq_reset_rate_computation(struct bfq_data *bfqd, struct request *rq + bfqd->peak_rate_samples = 0; /* full re-init on next disp. */ + + bfq_log(bfqd, +- "reset_rate_computation at end, sample %u/%u tot_sects %llu", ++ "at end, sample %u/%u tot_sects %llu", + bfqd->peak_rate_samples, bfqd->sequential_samples, + bfqd->tot_sectors_dispatched); + } +@@ -2766,7 +2764,7 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) + if (bfqd->peak_rate_samples < BFQ_RATE_MIN_SAMPLES || + bfqd->delta_from_first < BFQ_RATE_MIN_INTERVAL) { + bfq_log(bfqd, +- "update_rate_reset: only resetting, delta_first %lluus samples %d", ++ "only resetting, delta_first %lluus samples %d", + bfqd->delta_from_first>>10, bfqd->peak_rate_samples); + goto reset_computation; + } +@@ -2790,7 +2788,7 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) + div_u64(bfqd->delta_from_first, NSEC_PER_USEC)); + + bfq_log(bfqd, +-"update_rate_reset: tot_sects %llu delta_first %lluus rate %llu sects/s (%d)", ++"tot_sects %llu delta_first %lluus rate %llu sects/s (%d)", + bfqd->tot_sectors_dispatched, bfqd->delta_from_first>>10, + ((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT), + rate > 20<peak_rate) || + rate > 20<peak_rate_samples, bfqd->sequential_samples, + ((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT), + ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT)); + goto reset_computation; + } else { + bfq_log(bfqd, +- "update_rate_reset: do update, samples %u/%u rate/peak %llu/%llu", ++ "do update, samples %u/%u rate/peak %llu/%llu", + bfqd->peak_rate_samples, bfqd->sequential_samples, + ((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT), + ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT)); +@@ -2868,7 +2866,7 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) + rate /= divisor; /* smoothing constant alpha = 1/divisor */ + + bfq_log(bfqd, +- "update_rate_reset: divisor %d tmp_peak_rate %llu tmp_rate %u", ++ "divisor %d tmp_peak_rate %llu tmp_rate %u", + divisor, + ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT), + (u32)((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT)); +@@ -2922,7 +2920,7 @@ static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq) + + if (bfqd->peak_rate_samples == 0) { /* first dispatch */ + bfq_log(bfqd, +- "update_peak_rate: goto reset, samples %d", ++ "goto reset, samples %d", + bfqd->peak_rate_samples) ; + bfq_reset_rate_computation(bfqd, rq); + goto update_last_values; /* will add one sample */ +@@ -2943,7 +2941,7 @@ static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq) + if (now_ns - bfqd->last_dispatch > 100*NSEC_PER_MSEC && + bfqd->rq_in_driver == 0) { + bfq_log(bfqd, +-"update_peak_rate: jumping to updating&resetting delta_last %lluus samples %d", ++"jumping to updating&resetting delta_last %lluus samples %d", + (now_ns - bfqd->last_dispatch)>>10, + bfqd->peak_rate_samples) ; + goto update_rate_and_reset; +@@ -2969,7 +2967,7 @@ static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq) + bfqd->delta_from_first = now_ns - bfqd->first_dispatch; + + bfq_log(bfqd, +- "update_peak_rate: added samples %u/%u tot_sects %llu delta_first %lluus", ++ "added samples %u/%u tot_sects %llu delta_first %lluus", + bfqd->peak_rate_samples, bfqd->sequential_samples, + bfqd->tot_sectors_dispatched, + bfqd->delta_from_first>>10); +@@ -2985,12 +2983,12 @@ static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq) + bfqd->last_dispatch = now_ns; + + bfq_log(bfqd, +- "update_peak_rate: delta_first %lluus last_pos %llu peak_rate %llu", ++ "delta_first %lluus last_pos %llu peak_rate %llu", + (now_ns - bfqd->first_dispatch)>>10, + (unsigned long long) bfqd->last_position, + ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT)); + bfq_log(bfqd, +- "update_peak_rate: samples at end %d", bfqd->peak_rate_samples); ++ "samples at end %d", bfqd->peak_rate_samples); + } + + /* +@@ -3088,11 +3086,11 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, + */ + budget = 2 * min_budget; + +- bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %d, budg left %d", ++ bfq_log_bfqq(bfqd, bfqq, "last budg %d, budg left %d", + bfqq->entity.budget, bfq_bfqq_budget_left(bfqq)); +- bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %d, min budg %d", ++ bfq_log_bfqq(bfqd, bfqq, "last max_budg %d, min budg %d", + budget, bfq_min_budget(bfqd)); +- bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d", ++ bfq_log_bfqq(bfqd, bfqq, "sync %d, seeky %d", + bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue)); + + if (bfq_bfqq_sync(bfqq) && bfqq->wr_coeff == 1) { +@@ -3294,7 +3292,7 @@ static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq, + else /* charge at least one seek */ + *delta_ms = bfq_slice_idle / NSEC_PER_MSEC; + +- bfq_log(bfqd, "bfq_bfqq_is_slow: too short %u", delta_usecs); ++ bfq_log(bfqd, "too short %u", delta_usecs); + + return slow; + } +@@ -3317,11 +3315,11 @@ static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq, + * peak rate. + */ + slow = bfqq->entity.service < bfqd->bfq_max_budget / 2; +- bfq_log(bfqd, "bfq_bfqq_is_slow: relative rate %d/%d", ++ bfq_log(bfqd, "relative rate %d/%d", + bfqq->entity.service, bfqd->bfq_max_budget); + } + +- bfq_log_bfqq(bfqd, bfqq, "bfq_bfqq_is_slow: slow %d", slow); ++ bfq_log_bfqq(bfqd, bfqq, "slow %d", slow); + + return slow; + } +@@ -3423,7 +3421,7 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, + struct bfq_queue *bfqq) + { + bfq_log_bfqq(bfqd, bfqq, +-"softrt_next_start: service_blkg %lu soft_rate %u sects/sec interval %u", ++"service_blkg %lu soft_rate %u sects/sec interval %u", + bfqq->service_from_backlogged, + bfqd->bfq_wr_max_softrt_rate, + jiffies_to_msecs(HZ * bfqq->service_from_backlogged / +@@ -3602,7 +3600,7 @@ static bool bfq_bfqq_budget_timeout(struct bfq_queue *bfqq) + static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq) + { + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "may_budget_timeout: wait_request %d left %d timeout %d", ++ "wait_request %d left %d timeout %d", + bfq_bfqq_wait_request(bfqq), + bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3, + bfq_bfqq_budget_timeout(bfqq)); +@@ -3863,11 +3861,11 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) + * either boosts the throughput (without issues), or is + * necessary to preserve service guarantees. + */ +- bfq_log_bfqq(bfqd, bfqq, "may_idle: sync %d idling_boosts_thr %d", ++ bfq_log_bfqq(bfqd, bfqq, "sync %d idling_boosts_thr %d", + bfq_bfqq_sync(bfqq), idling_boosts_thr); + + bfq_log_bfqq(bfqd, bfqq, +- "may_idle: wr_busy %d boosts %d IO-bound %d guar %d", ++ "wr_busy %d boosts %d IO-bound %d guar %d", + bfqd->wr_busy_queues, + idling_boosts_thr_without_issues, + bfq_bfqq_IO_bound(bfqq), +@@ -3907,7 +3905,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) + if (!bfqq) + goto new_queue; + +- bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue"); ++ bfq_log_bfqq(bfqd, bfqq, "already in-service queue"); + + if (bfq_may_expire_for_budg_timeout(bfqq) && + !bfq_bfqq_wait_request(bfqq) && +@@ -3983,14 +3981,14 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) + new_queue: + bfqq = bfq_set_in_service_queue(bfqd); + if (bfqq) { +- bfq_log_bfqq(bfqd, bfqq, "select_queue: checking new queue"); ++ bfq_log_bfqq(bfqd, bfqq, "checking new queue"); + goto check_queue; + } + keep_queue: + if (bfqq) +- bfq_log_bfqq(bfqd, bfqq, "select_queue: returned this queue"); ++ bfq_log_bfqq(bfqd, bfqq, "returned this queue"); + else +- bfq_log(bfqd, "select_queue: no queue returned"); ++ bfq_log(bfqd, "no queue returned"); + + return bfqq; + } +@@ -4043,8 +4041,7 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) + /* see comments on max_service_from_wr */ + bfq_bfqq_end_wr(bfqq); + bfq_log_bfqq(bfqd, bfqq, +- "[%s] too much service", +- __func__); ++ "too much service"); + } + } + /* +@@ -4122,7 +4119,7 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx) + { + struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; + +- bfq_log(bfqd, "has_work, dispatch_non_empty %d busy_queues %d", ++ bfq_log(bfqd, "dispatch_non_empty %d busy_queues %d", + !list_empty_careful(&bfqd->dispatch), bfqd->busy_queues > 0); + + /* +@@ -4146,7 +4143,7 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) + rq->rq_flags &= ~RQF_DISP_LIST; + + bfq_log(bfqd, +- "dispatch requests: picked %p from dispatch list", rq); ++ "picked %p from dispatch list", rq); + bfqq = RQ_BFQQ(rq); + + if (bfqq) { +@@ -4196,7 +4193,7 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) + goto start_rq; + } + +- bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues); ++ bfq_log(bfqd, "%d busy queues", bfqd->busy_queues); + + if (bfqd->busy_queues == 0) + goto exit; +@@ -4236,13 +4233,13 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) + rq->rq_flags |= RQF_STARTED; + if (bfqq) + bfq_log_bfqq(bfqd, bfqq, +- "dispatched %s request %p, rq_in_driver %d", ++ "%s request %p, rq_in_driver %d", + bfq_bfqq_sync(bfqq) ? "sync" : "async", + rq, + bfqd->rq_in_driver); + else + bfq_log(bfqd, +- "dispatched request %p from dispatch list, rq_in_driver %d", ++ "request %p from dispatch list, rq_in_driver %d", + rq, bfqd->rq_in_driver); + } else + bfq_log(bfqd, +@@ -4339,7 +4336,7 @@ static void bfq_put_queue(struct bfq_queue *bfqq) + BUG_ON(bfqq->ref <= 0); + + if (bfqq->bfqd) +- bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p %d", bfqq, bfqq->ref); ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "%p %d", bfqq, bfqq->ref); + + bfqq->ref--; + if (bfqq->ref) +@@ -4383,10 +4380,10 @@ static void bfq_put_queue(struct bfq_queue *bfqq) + } + + if (bfqq->bfqd) +- bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p freed", bfqq); ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "%p freed", bfqq); + + #ifdef BFQ_GROUP_IOSCHED_ENABLED +- bfq_log_bfqq(bfqq->bfqd, bfqq, "[%s] putting blkg and bfqg %p\n", __func__, bfqg); ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "putting blkg and bfqg %p\n", bfqg); + bfqg_and_blkg_put(bfqg); + #endif + kmem_cache_free(bfq_pool, bfqq); +@@ -4418,7 +4415,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) + bfq_schedule_dispatch(bfqd); + } + +- bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, bfqq->ref); ++ bfq_log_bfqq(bfqd, bfqq, "%p, %d", bfqq, bfqq->ref); + + bfq_put_cooperator(bfqq); + +@@ -4502,7 +4499,7 @@ static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, + bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio); + bfqq->entity.prio_changed = 1; + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "set_next_ioprio_data: bic_class %d prio %d class %d", ++ "bic_class %d prio %d class %d", + ioprio_class, bfqq->new_ioprio, bfqq->new_ioprio_class); + } + +@@ -4529,7 +4526,7 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio) + bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic); + bic_set_bfqq(bic, bfqq, false); + bfq_log_bfqq(bfqd, bfqq, +- "check_ioprio_change: bfqq %p %d", ++ "bfqq %p %d", + bfqq, bfqq->ref); + } + +@@ -4667,14 +4664,14 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, + * guarantee that this queue is not freed + * until its group goes away. + */ +- bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d", ++ bfq_log_bfqq(bfqd, bfqq, "bfqq not in async: %p, %d", + bfqq, bfqq->ref); + *async_bfqq = bfqq; + } + + out: + bfqq->ref++; /* get a process reference to this queue */ +- bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, bfqq->ref); ++ bfq_log_bfqq(bfqd, bfqq, "at end: %p, %d", bfqq, bfqq->ref); + rcu_read_unlock(); + return bfqq; + } +@@ -4733,7 +4730,7 @@ static void bfq_update_has_short_ttime(struct bfq_data *bfqd, + bfqq->ttime.ttime_mean > bfqd->bfq_slice_idle)) + has_short_ttime = false; + +- bfq_log_bfqq(bfqd, bfqq, "update_has_short_ttime: has_short_ttime %d", ++ bfq_log_bfqq(bfqd, bfqq, "has_short_ttime %d", + has_short_ttime); + + if (has_short_ttime) +@@ -4759,7 +4756,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, + bfq_update_io_seektime(bfqd, bfqq, rq); + + bfq_log_bfqq(bfqd, bfqq, +- "rq_enqueued: has_short_ttime=%d (seeky %d)", ++ "has_short_ttime=%d (seeky %d)", + bfq_bfqq_has_short_ttime(bfqq), BFQQ_SEEKY(bfqq)); + + bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); +@@ -4818,7 +4815,7 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) + + assert_spin_locked(&bfqd->lock); + +- bfq_log_bfqq(bfqd, bfqq, "__insert_req: rq %p bfqq %p", rq, bfqq); ++ bfq_log_bfqq(bfqd, bfqq, "rq %p bfqq %p", rq, bfqq); + + /* + * An unplug may trigger a requeue of a request from the device +@@ -4837,9 +4834,9 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) + new_bfqq->allocated++; + bfqq->allocated--; + bfq_log_bfqq(bfqd, bfqq, +- "insert_request: new allocated %d", bfqq->allocated); ++ "new allocated %d", bfqq->allocated); + bfq_log_bfqq(bfqd, new_bfqq, +- "insert_request: new_bfqq new allocated %d", ++ "new_bfqq new allocated %d", + bfqq->allocated); + + new_bfqq->ref++; +@@ -4911,11 +4908,11 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, + rq->rq_flags |= RQF_DISP_LIST; + if (bfqq) + bfq_log_bfqq(bfqd, bfqq, +- "insert_request %p in disp: at_head %d", ++ "%p in disp: at_head %d", + rq, at_head); + else + bfq_log(bfqd, +- "insert_request %p in disp: at_head %d", ++ "%p in disp: at_head %d", + rq, at_head); + } else { + BUG_ON(!(rq->rq_flags & RQF_GOT)); +@@ -5033,7 +5030,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) + bfqq->dispatched--; + + bfq_log_bfqq(bfqd, bfqq, +- "completed_requests: new disp %d, new rq_in_driver %d", ++ "new disp %d, new rq_in_driver %d", + bfqq->dispatched, bfqd->rq_in_driver); + + if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) { +@@ -5061,7 +5058,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) + delta_us = div_u64(now_ns - bfqd->last_completion, NSEC_PER_USEC); + + bfq_log_bfqq(bfqd, bfqq, +- "rq_completed: delta %uus/%luus max_size %u rate %llu/%llu", ++ "delta %uus/%luus max_size %u rate %llu/%llu", + delta_us, BFQ_MIN_TT/NSEC_PER_USEC, bfqd->last_rq_max_size, + delta_us > 0 ? + (USEC_PER_SEC* +@@ -5129,7 +5126,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) + static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq) + { + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "put_request_body: allocated %d", bfqq->allocated); ++ "allocated %d", bfqq->allocated); + BUG_ON(!bfqq->allocated); + bfqq->allocated--; + +@@ -5406,10 +5403,10 @@ static void bfq_prepare_request(struct request *rq, struct bio *bio) + + bfqq->allocated++; + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "get_request: new allocated %d", bfqq->allocated); ++ "new allocated %d", bfqq->allocated); + + bfqq->ref++; +- bfq_log_bfqq(bfqd, bfqq, "get_request %p: bfqq %p, %d", rq, bfqq, bfqq->ref); ++ bfq_log_bfqq(bfqd, bfqq, "%p: bfqq %p, %d", rq, bfqq, bfqq->ref); + + rq->elv.priv[0] = bic; + rq->elv.priv[1] = bfqq; +@@ -5493,7 +5490,7 @@ static enum hrtimer_restart bfq_idle_slice_timer(struct hrtimer *timer) + idle_slice_timer); + struct bfq_queue *bfqq = bfqd->in_service_queue; + +- bfq_log(bfqd, "slice_timer expired"); ++ bfq_log(bfqd, "expired"); + + /* + * Theoretical race here: the in-service queue can be NULL or +@@ -5515,10 +5512,10 @@ static void __bfq_put_async_bfqq(struct bfq_data *bfqd, + struct bfq_group *root_group = bfqd->root_group; + struct bfq_queue *bfqq = *bfqq_ptr; + +- bfq_log(bfqd, "put_async_bfqq: %p", bfqq); ++ bfq_log(bfqd, "%p", bfqq); + if (bfqq) { + bfq_bfqq_move(bfqd, bfqq, root_group); +- bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d", ++ bfq_log_bfqq(bfqd, bfqq, "putting %p, %d", + bfqq, bfqq->ref); + bfq_put_queue(bfqq); + *bfqq_ptr = NULL; +@@ -5547,7 +5544,7 @@ static void bfq_exit_queue(struct elevator_queue *e) + struct bfq_data *bfqd = e->elevator_data; + struct bfq_queue *bfqq, *n; + +- bfq_log(bfqd, "exit_queue: starting ..."); ++ bfq_log(bfqd, "starting ..."); + + hrtimer_cancel(&bfqd->idle_slice_timer); + +@@ -5575,7 +5572,7 @@ static void bfq_exit_queue(struct elevator_queue *e) + spin_unlock_irq(&bfqd->lock); + #endif + +- bfq_log(bfqd, "exit_queue: finished ..."); ++ bfq_log(bfqd, "finished ..."); + kfree(bfqd); + } + +diff --git a/block/bfq-mq.h b/block/bfq-mq.h +index 9a5ce1168ff5..e2ae11bf8f76 100644 +--- a/block/bfq-mq.h ++++ b/block/bfq-mq.h +@@ -712,34 +712,34 @@ static struct bfq_group *bfqq_group(struct bfq_queue *bfqq); + static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); + + #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ +- pr_crit("%s bfq%d%c %s " fmt "\n", \ ++ pr_crit("%s bfq%d%c %s [%s] " fmt "\n", \ + checked_dev_name((bfqd)->queue->backing_dev_info->dev), \ + (bfqq)->pid, \ + bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ +- bfqq_group(bfqq)->blkg_path, ##args); \ ++ bfqq_group(bfqq)->blkg_path, __func__, ##args); \ + } while (0) + + #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \ +- pr_crit("%s %s " fmt "\n", \ ++ pr_crit("%s %s [%s] " fmt "\n", \ + checked_dev_name((bfqd)->queue->backing_dev_info->dev), \ +- bfqg->blkg_path, ##args); \ ++ bfqg->blkg_path, __func__, ##args); \ + } while (0) + + #else /* BFQ_GROUP_IOSCHED_ENABLED */ + + #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ +- pr_crit("%s bfq%d%c " fmt "\n", \ ++ pr_crit("%s bfq%d%c [%s] " fmt "\n", \ + checked_dev_name((bfqd)->queue->backing_dev_info->dev), \ + (bfqq)->pid, bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ +- ##args) ++ __func__, ##args) + #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0) + + #endif /* BFQ_GROUP_IOSCHED_ENABLED */ + + #define bfq_log(bfqd, fmt, args...) \ +- pr_crit("%s bfq " fmt "\n", \ ++ pr_crit("%s bfq [%s] " fmt "\n", \ + checked_dev_name((bfqd)->queue->backing_dev_info->dev), \ +- ##args) ++ __func__, ##args) + + #else /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */ + +@@ -762,28 +762,29 @@ static struct bfq_group *bfqq_group(struct bfq_queue *bfqq); + static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); + + #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ +- blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s " fmt, \ ++ blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s [%s] " fmt, \ + (bfqq)->pid, \ + bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ +- bfqq_group(bfqq)->blkg_path, ##args); \ ++ bfqq_group(bfqq)->blkg_path, __func__, ##args); \ + } while (0) + + #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \ +- blk_add_trace_msg((bfqd)->queue, "%s " fmt, bfqg->blkg_path, ##args);\ ++ blk_add_trace_msg((bfqd)->queue, "%s [%s] " fmt, bfqg->blkg_path, \ ++ __func__, ##args);\ + } while (0) + + #else /* BFQ_GROUP_IOSCHED_ENABLED */ + + #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ +- blk_add_trace_msg((bfqd)->queue, "bfq%d%c " fmt, (bfqq)->pid, \ ++ blk_add_trace_msg((bfqd)->queue, "bfq%d%c [%s] " fmt, (bfqq)->pid, \ + bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ +- ##args) ++ __func__, ##args) + #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0) + + #endif /* BFQ_GROUP_IOSCHED_ENABLED */ + + #define bfq_log(bfqd, fmt, args...) \ +- blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args) ++ blk_add_trace_msg((bfqd)->queue, "bfq [%s] " fmt, __func__, ##args) + + #endif /* CONFIG_BLK_DEV_IO_TRACE */ + #endif /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */ +@@ -938,7 +939,7 @@ bfq_entity_service_tree(struct bfq_entity *entity) + + if (bfqq) + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "entity_service_tree %p %d", ++ "%p %d", + sched_data->service_tree + idx, idx); + #ifdef BFQ_GROUP_IOSCHED_ENABLED + else { +@@ -946,7 +947,7 @@ bfq_entity_service_tree(struct bfq_entity *entity) + container_of(entity, struct bfq_group, entity); + + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, +- "entity_service_tree %p %d", ++ "%p %d", + sched_data->service_tree + idx, idx); + } + #endif +diff --git a/block/bfq-sched.c b/block/bfq-sched.c +index 4e6c5232e2fb..ead34c30a7c2 100644 +--- a/block/bfq-sched.c ++++ b/block/bfq-sched.c +@@ -119,7 +119,7 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd, + + if (bfqq) + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "update_next_in_service: chose without lookup"); ++ "chose without lookup"); + #ifdef BFQ_GROUP_IOSCHED_ENABLED + else { + struct bfq_group *bfqg = +@@ -127,7 +127,7 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd, + struct bfq_group, entity); + + bfq_log_bfqg((struct bfq_data*)bfqg->bfqd, bfqg, +- "update_next_in_service: chose without lookup"); ++ "chose without lookup"); + } + #endif + } +@@ -148,7 +148,7 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd, + bfqq = bfq_entity_to_bfqq(next_in_service); + if (bfqq) + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "update_next_in_service: chosen this queue"); ++ "chosen this queue"); + #ifdef BFQ_GROUP_IOSCHED_ENABLED + else { + struct bfq_group *bfqg = +@@ -156,7 +156,7 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd, + struct bfq_group, entity); + + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, +- "update_next_in_service: chosen this entity"); ++ "chosen this entity"); + } + #endif + return parent_sched_may_change; +@@ -331,10 +331,10 @@ static void bfq_calc_finish(struct bfq_entity *entity, unsigned long service) + + if (bfqq) { + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "calc_finish: serv %lu, w %d", ++ "serv %lu, w %d", + service, entity->weight); + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "calc_finish: start %llu, finish %llu, delta %llu", ++ "start %llu, finish %llu, delta %llu", + start, finish, delta); + #ifdef BFQ_GROUP_IOSCHED_ENABLED + } else { +@@ -342,10 +342,10 @@ static void bfq_calc_finish(struct bfq_entity *entity, unsigned long service) + container_of(entity, struct bfq_group, entity); + + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, +- "calc_finish group: serv %lu, w %d", ++ "group: serv %lu, w %d", + service, entity->weight); + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, +- "calc_finish group: start %llu, finish %llu, delta %llu", ++ "group: start %llu, finish %llu, delta %llu", + start, finish, delta); + #endif + } +@@ -484,7 +484,7 @@ static void bfq_update_active_node(struct rb_node *node) + + if (bfqq) { + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "update_active_node: new min_start %llu", ++ "new min_start %llu", + ((entity->min_start>>10)*1000)>>12); + #ifdef BFQ_GROUP_IOSCHED_ENABLED + } else { +@@ -492,7 +492,7 @@ static void bfq_update_active_node(struct rb_node *node) + container_of(entity, struct bfq_group, entity); + + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, +- "update_active_node: new min_start %llu", ++ "new min_start %llu", + ((entity->min_start>>10)*1000)>>12); + #endif + } +@@ -620,7 +620,7 @@ static void bfq_get_entity(struct bfq_entity *entity) + + if (bfqq) { + bfqq->ref++; +- bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d", ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "%p %d", + bfqq, bfqq->ref); + } + } +@@ -748,7 +748,7 @@ static void bfq_forget_entity(struct bfq_service_tree *st, + entity->on_st = false; + st->wsum -= entity->weight; + if (bfqq && !is_in_service) { +- bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity (before): %p %d", ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "(before): %p %d", + bfqq, bfqq->ref); + bfq_put_queue(bfqq); + } +@@ -1008,7 +1008,7 @@ static void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq, + tot_serv_to_charge = entity->service; + + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "charge_time: %lu/%u ms, %d/%d/%d sectors", ++ "%lu/%u ms, %d/%d/%d sectors", + time_ms, timeout_ms, entity->service, + tot_serv_to_charge, entity->budget); + +@@ -1080,7 +1080,7 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity, + + if (bfqq) { + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "update_fin_time_enqueue: new queue finish %llu", ++ "new queue finish %llu", + ((entity->finish>>10)*1000)>>12); + #ifdef BFQ_GROUP_IOSCHED_ENABLED + } else { +@@ -1088,7 +1088,7 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity, + container_of(entity, struct bfq_group, entity); + + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, +- "update_fin_time_enqueue: new group finish %llu", ++ "new group finish %llu", + ((entity->finish>>10)*1000)>>12); + #endif + } +@@ -1098,7 +1098,7 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity, + + if (bfqq) { + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "update_fin_time_enqueue: queue %seligible in st %p", ++ "queue %seligible in st %p", + entity->start <= st->vtime ? "" : "non ", st); + #ifdef BFQ_GROUP_IOSCHED_ENABLED + } else { +@@ -1106,7 +1106,7 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity, + container_of(entity, struct bfq_group, entity); + + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, +- "update_fin_time_enqueue: group %seligible in st %p", ++ "group %seligible in st %p", + entity->start <= st->vtime ? "" : "non ", st); + #endif + } +@@ -1550,7 +1550,7 @@ static u64 bfq_calc_vtime_jump(struct bfq_service_tree *st) + + if (bfqq) + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "calc_vtime_jump: new value %llu", ++ "new value %llu", + ((root_entity->min_start>>10)*1000)>>12); + #ifdef BFQ_GROUP_IOSCHED_ENABLED + else { +@@ -1559,7 +1559,7 @@ static u64 bfq_calc_vtime_jump(struct bfq_service_tree *st) + entity); + + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, +- "calc_vtime_jump: new value %llu", ++ "new value %llu", + ((root_entity->min_start>>10)*1000)>>12); + } + #endif +@@ -1677,7 +1677,7 @@ __bfq_lookup_next_entity(struct bfq_service_tree *st, bool in_service) + bfqq = bfq_entity_to_bfqq(entity); + if (bfqq) + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "__lookup_next: start %llu vtime %llu st %p", ++ "start %llu vtime %llu st %p", + ((entity->start>>10)*1000)>>12, + ((new_vtime>>10)*1000)>>12, st); + #ifdef BFQ_GROUP_IOSCHED_ENABLED +@@ -1686,7 +1686,7 @@ __bfq_lookup_next_entity(struct bfq_service_tree *st, bool in_service) + container_of(entity, struct bfq_group, entity); + + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, +- "__lookup_next: start %llu vtime %llu (%llu) st %p", ++ "start %llu vtime %llu (%llu) st %p", + ((entity->start>>10)*1000)>>12, + ((st->vtime>>10)*1000)>>12, + ((new_vtime>>10)*1000)>>12, st); +@@ -1821,14 +1821,14 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) + container_of(entity, struct bfq_group, entity); + + bfq_log_bfqg(bfqd, bfqg, +- "get_next_queue: lookup in this group"); ++ "lookup in this group"); + if (!sd->next_in_service) +- pr_crit("get_next_queue: lookup in this group"); ++ pr_crit("lookup in this group"); + } else { + bfq_log_bfqg(bfqd, bfqd->root_group, +- "get_next_queue: lookup in root group"); ++ "lookup in root group"); + if (!sd->next_in_service) +- pr_crit("get_next_queue: lookup in root group"); ++ pr_crit("lookup in root group"); + } + #endif + +@@ -1903,7 +1903,7 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) + bfqq = bfq_entity_to_bfqq(entity); + if (bfqq) + bfq_log_bfqq(bfqd, bfqq, +- "get_next_queue: this queue, finish %llu", ++ "this queue, finish %llu", + (((entity->finish>>10)*1000)>>10)>>2); + #ifdef BFQ_GROUP_IOSCHED_ENABLED + else { +@@ -1911,7 +1911,7 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) + container_of(entity, struct bfq_group, entity); + + bfq_log_bfqg(bfqd, bfqg, +- "get_next_queue: this entity, finish %llu", ++ "this entity, finish %llu", + (((entity->finish>>10)*1000)>>10)>>2); + } + #endif +diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c +index c4df156b1fb4..e49e8ac882b3 100644 +--- a/block/bfq-sq-iosched.c ++++ b/block/bfq-sq-iosched.c +@@ -281,7 +281,7 @@ static void bfq_schedule_dispatch(struct bfq_data *bfqd); + static void bfq_schedule_dispatch(struct bfq_data *bfqd) + { + if (bfqd->queued != 0) { +- bfq_log(bfqd, "schedule dispatch"); ++ bfq_log(bfqd, ""); + kblockd_schedule_work(&bfqd->unplug_work); + } + } +@@ -414,7 +414,7 @@ bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, + if (rb_link) + *rb_link = p; + +- bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d", ++ bfq_log(bfqd, "%llu: returning %d", + (unsigned long long) sector, + bfqq ? bfqq->pid : 0); + +@@ -635,7 +635,7 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq, + if (rq == last || ktime_get_ns() < rq->fifo_time) + return NULL; + +- bfq_log_bfqq(bfqq->bfqd, bfqq, "check_fifo: returned %p", rq); ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "returned %p", rq); + BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); + return rq; + } +@@ -728,7 +728,7 @@ static void bfq_updated_next_req(struct bfq_data *bfqd, + bfq_serv_to_charge(next_rq, bfqq)); + if (entity->budget != new_budget) { + entity->budget = new_budget; +- bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu", ++ bfq_log_bfqq(bfqd, bfqq, "new budget %lu", + new_budget); + bfq_requeue_bfqq(bfqd, bfqq, false); + } +@@ -800,8 +800,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, + BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish)); + + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "[%s] bic %p wr_coeff %d start_finish %lu max_time %lu", +- __func__, ++ "bic %p wr_coeff %d start_finish %lu max_time %lu", + bic, bfqq->wr_coeff, bfqq->last_wr_start_finish, + bfqq->wr_cur_max_time); + +@@ -814,11 +813,11 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, + bfq_wr_duration(bfqd))) { + switch_back_to_interactive_wr(bfqq, bfqd); + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "resume state: switching back to interactive"); ++ "switching back to interactive"); + } else { + bfqq->wr_coeff = 1; + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "resume state: switching off wr (%lu + %lu < %lu)", ++ "switching off wr (%lu + %lu < %lu)", + bfqq->last_wr_start_finish, bfqq->wr_cur_max_time, + jiffies); + } +@@ -870,7 +869,7 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) + /* Increment burst size to take into account also bfqq */ + bfqd->burst_size++; + +- bfq_log_bfqq(bfqd, bfqq, "add_to_burst %d", bfqd->burst_size); ++ bfq_log_bfqq(bfqd, bfqq, "%d", bfqd->burst_size); + + BUG_ON(bfqd->burst_size > bfqd->bfq_large_burst_thresh); + +@@ -883,7 +882,7 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) + * other to consider this burst as large. + */ + bfqd->large_burst = true; +- bfq_log_bfqq(bfqd, bfqq, "add_to_burst: large burst started"); ++ bfq_log_bfqq(bfqd, bfqq, "large burst started"); + + /* + * We can now mark all queues in the burst list as +@@ -1055,7 +1054,7 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) + bfqd->large_burst = false; + bfq_reset_burst_list(bfqd, bfqq); + bfq_log_bfqq(bfqd, bfqq, +- "handle_burst: late activation or different group"); ++ "late activation or different group"); + goto end; + } + +@@ -1065,7 +1064,7 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) + * bfqq as belonging to this large burst immediately. + */ + if (bfqd->large_burst) { +- bfq_log_bfqq(bfqd, bfqq, "handle_burst: marked in burst"); ++ bfq_log_bfqq(bfqd, bfqq, "marked in burst"); + bfq_mark_bfqq_in_large_burst(bfqq); + goto end; + } +@@ -1572,7 +1571,7 @@ static void bfq_add_request(struct request *rq) + unsigned int old_wr_coeff = bfqq->wr_coeff; + bool interactive = false; + +- bfq_log_bfqq(bfqd, bfqq, "add_request: size %u %s", ++ bfq_log_bfqq(bfqd, bfqq, "size %u %s", + blk_rq_sectors(rq), rq_is_sync(rq) ? "S" : "A"); + + if (bfqq->wr_coeff > 1) /* queue is being weight-raised */ +@@ -1870,10 +1869,10 @@ static void bfq_bfqq_end_wr(struct bfq_queue *bfqq) + */ + bfqq->entity.prio_changed = 1; + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "end_wr: wrais ending at %lu, rais_max_time %u", ++ "wrais ending at %lu, rais_max_time %u", + bfqq->last_wr_start_finish, + jiffies_to_msecs(bfqq->wr_cur_max_time)); +- bfq_log_bfqq(bfqq->bfqd, bfqq, "end_wr: wr_busy %d", ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "wr_busy %d", + bfqq->bfqd->wr_busy_queues); + } + +@@ -2048,8 +2047,8 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, + { + if (bfq_too_late_for_merging(new_bfqq)) { + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "[%s] too late for bfq%d to be merged", +- __func__, new_bfqq->pid); ++ "too late for bfq%d to be merged", ++ new_bfqq->pid); + return false; + } + +@@ -2258,7 +2257,7 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, + + } + +- bfq_log_bfqq(bfqd, new_bfqq, "merge_bfqqs: wr_busy %d", ++ bfq_log_bfqq(bfqd, new_bfqq, "wr_busy %d", + bfqd->wr_busy_queues); + + /* +@@ -2359,7 +2358,7 @@ static void bfq_set_budget_timeout(struct bfq_data *bfqd, + bfqq->budget_timeout = jiffies + + bfqd->bfq_timeout * timeout_coeff; + +- bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u", ++ bfq_log_bfqq(bfqd, bfqq, "%u", + jiffies_to_msecs(bfqd->bfq_timeout * timeout_coeff)); + } + +@@ -2427,10 +2426,10 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd, + + bfq_set_budget_timeout(bfqd, bfqq); + bfq_log_bfqq(bfqd, bfqq, +- "set_in_service_queue, cur-budget = %d", ++ "cur-budget = %d", + bfqq->entity.budget); + } else +- bfq_log(bfqd, "set_in_service_queue: NULL"); ++ bfq_log(bfqd, "NULL"); + + bfqd->in_service_queue = bfqq; + } +@@ -2559,7 +2558,7 @@ static void bfq_reset_rate_computation(struct bfq_data *bfqd, struct request *rq + bfqd->peak_rate_samples = 0; /* full re-init on next disp. */ + + bfq_log(bfqd, +- "reset_rate_computation at end, sample %u/%u tot_sects %llu", ++ "at end, sample %u/%u tot_sects %llu", + bfqd->peak_rate_samples, bfqd->sequential_samples, + bfqd->tot_sectors_dispatched); + } +@@ -2579,7 +2578,7 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) + if (bfqd->peak_rate_samples < BFQ_RATE_MIN_SAMPLES || + bfqd->delta_from_first < BFQ_RATE_MIN_INTERVAL) { + bfq_log(bfqd, +- "update_rate_reset: only resetting, delta_first %lluus samples %d", ++ "only resetting, delta_first %lluus samples %d", + bfqd->delta_from_first>>10, bfqd->peak_rate_samples); + goto reset_computation; + } +@@ -2603,7 +2602,7 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) + div_u64(bfqd->delta_from_first, NSEC_PER_USEC)); + + bfq_log(bfqd, +-"update_rate_reset: tot_sects %llu delta_first %lluus rate %llu sects/s (%d)", ++"tot_sects %llu delta_first %lluus rate %llu sects/s (%d)", + bfqd->tot_sectors_dispatched, bfqd->delta_from_first>>10, + ((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT), + rate > 20<peak_rate) || + rate > 20<peak_rate_samples, bfqd->sequential_samples, + ((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT), + ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT)); + goto reset_computation; + } else { + bfq_log(bfqd, +- "update_rate_reset: do update, samples %u/%u rate/peak %llu/%llu", ++ "do update, samples %u/%u rate/peak %llu/%llu", + bfqd->peak_rate_samples, bfqd->sequential_samples, + ((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT), + ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT)); +@@ -2681,7 +2680,7 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) + rate /= divisor; /* smoothing constant alpha = 1/divisor */ + + bfq_log(bfqd, +- "update_rate_reset: divisor %d tmp_peak_rate %llu tmp_rate %u", ++ "divisor %d tmp_peak_rate %llu tmp_rate %u", + divisor, + ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT), + (u32)((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT)); +@@ -2735,7 +2734,7 @@ static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq) + + if (bfqd->peak_rate_samples == 0) { /* first dispatch */ + bfq_log(bfqd, +- "update_peak_rate: goto reset, samples %d", ++ "goto reset, samples %d", + bfqd->peak_rate_samples) ; + bfq_reset_rate_computation(bfqd, rq); + goto update_last_values; /* will add one sample */ +@@ -2756,7 +2755,7 @@ static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq) + if (now_ns - bfqd->last_dispatch > 100*NSEC_PER_MSEC && + bfqd->rq_in_driver == 0) { + bfq_log(bfqd, +-"update_peak_rate: jumping to updating&resetting delta_last %lluus samples %d", ++"jumping to updating&resetting delta_last %lluus samples %d", + (now_ns - bfqd->last_dispatch)>>10, + bfqd->peak_rate_samples) ; + goto update_rate_and_reset; +@@ -2782,7 +2781,7 @@ static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq) + bfqd->delta_from_first = now_ns - bfqd->first_dispatch; + + bfq_log(bfqd, +- "update_peak_rate: added samples %u/%u tot_sects %llu delta_first %lluus", ++ "added samples %u/%u tot_sects %llu delta_first %lluus", + bfqd->peak_rate_samples, bfqd->sequential_samples, + bfqd->tot_sectors_dispatched, + bfqd->delta_from_first>>10); +@@ -2798,12 +2797,12 @@ static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq) + bfqd->last_dispatch = now_ns; + + bfq_log(bfqd, +- "update_peak_rate: delta_first %lluus last_pos %llu peak_rate %llu", ++ "delta_first %lluus last_pos %llu peak_rate %llu", + (now_ns - bfqd->first_dispatch)>>10, + (unsigned long long) bfqd->last_position, + ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT)); + bfq_log(bfqd, +- "update_peak_rate: samples at end %d", bfqd->peak_rate_samples); ++ "samples at end %d", bfqd->peak_rate_samples); + } + + /* +@@ -2900,11 +2899,11 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, + */ + budget = 2 * min_budget; + +- bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %d, budg left %d", ++ bfq_log_bfqq(bfqd, bfqq, "last budg %d, budg left %d", + bfqq->entity.budget, bfq_bfqq_budget_left(bfqq)); +- bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %d, min budg %d", ++ bfq_log_bfqq(bfqd, bfqq, "last max_budg %d, min budg %d", + budget, bfq_min_budget(bfqd)); +- bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d", ++ bfq_log_bfqq(bfqd, bfqq, "sync %d, seeky %d", + bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue)); + + if (bfq_bfqq_sync(bfqq) && bfqq->wr_coeff == 1) { +@@ -3106,7 +3105,7 @@ static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq, + else /* charge at least one seek */ + *delta_ms = bfq_slice_idle / NSEC_PER_MSEC; + +- bfq_log(bfqd, "bfq_bfqq_is_slow: too short %u", delta_usecs); ++ bfq_log(bfqd, "too short %u", delta_usecs); + + return slow; + } +@@ -3129,11 +3128,11 @@ static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq, + * peak rate. + */ + slow = bfqq->entity.service < bfqd->bfq_max_budget / 2; +- bfq_log(bfqd, "bfq_bfqq_is_slow: relative rate %d/%d", ++ bfq_log(bfqd, "relative rate %d/%d", + bfqq->entity.service, bfqd->bfq_max_budget); + } + +- bfq_log_bfqq(bfqd, bfqq, "bfq_bfqq_is_slow: slow %d", slow); ++ bfq_log_bfqq(bfqd, bfqq, "slow %d", slow); + + return slow; + } +@@ -3235,7 +3234,7 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, + struct bfq_queue *bfqq) + { + bfq_log_bfqq(bfqd, bfqq, +-"softrt_next_start: service_blkg %lu soft_rate %u sects/sec interval %u", ++"service_blkg %lu soft_rate %u sects/sec interval %u", + bfqq->service_from_backlogged, + bfqd->bfq_wr_max_softrt_rate, + jiffies_to_msecs(HZ * bfqq->service_from_backlogged / +@@ -3414,7 +3413,7 @@ static bool bfq_bfqq_budget_timeout(struct bfq_queue *bfqq) + static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq) + { + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "may_budget_timeout: wait_request %d left %d timeout %d", ++ "wait_request %d left %d timeout %d", + bfq_bfqq_wait_request(bfqq), + bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3, + bfq_bfqq_budget_timeout(bfqq)); +@@ -3675,11 +3674,11 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) + * either boosts the throughput (without issues), or is + * necessary to preserve service guarantees. + */ +- bfq_log_bfqq(bfqd, bfqq, "may_idle: sync %d idling_boosts_thr %d", ++ bfq_log_bfqq(bfqd, bfqq, "sync %d idling_boosts_thr %d", + bfq_bfqq_sync(bfqq), idling_boosts_thr); + + bfq_log_bfqq(bfqd, bfqq, +- "may_idle: wr_busy %d boosts %d IO-bound %d guar %d", ++ "wr_busy %d boosts %d IO-bound %d guar %d", + bfqd->wr_busy_queues, + idling_boosts_thr_without_issues, + bfq_bfqq_IO_bound(bfqq), +@@ -3719,7 +3718,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) + if (!bfqq) + goto new_queue; + +- bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue"); ++ bfq_log_bfqq(bfqd, bfqq, "already in-service queue"); + + if (bfq_may_expire_for_budg_timeout(bfqq) && + !hrtimer_active(&bfqd->idle_slice_timer) && +@@ -3797,14 +3796,14 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) + new_queue: + bfqq = bfq_set_in_service_queue(bfqd); + if (bfqq) { +- bfq_log_bfqq(bfqd, bfqq, "select_queue: checking new queue"); ++ bfq_log_bfqq(bfqd, bfqq, "checking new queue"); + goto check_queue; + } + keep_queue: + if (bfqq) +- bfq_log_bfqq(bfqd, bfqq, "select_queue: returned this queue"); ++ bfq_log_bfqq(bfqd, bfqq, "returned this queue"); + else +- bfq_log(bfqd, "select_queue: no queue returned"); ++ bfq_log(bfqd, "no queue returned"); + + return bfqq; + } +@@ -3857,8 +3856,7 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) + /* see comments on max_service_from_wr */ + bfq_bfqq_end_wr(bfqq); + bfq_log_bfqq(bfqd, bfqq, +- "[%s] too much service", +- __func__); ++ "too much service"); + } + } + /* +@@ -3987,7 +3985,7 @@ static int bfq_dispatch_requests(struct request_queue *q, int force) + struct bfq_data *bfqd = q->elevator->elevator_data; + struct bfq_queue *bfqq; + +- bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues); ++ bfq_log(bfqd, "%d busy queues", bfqd->busy_queues); + + if (bfqd->busy_queues == 0) + return 0; +@@ -4021,7 +4019,7 @@ static int bfq_dispatch_requests(struct request_queue *q, int force) + if (!bfq_dispatch_request(bfqd, bfqq)) + return 0; + +- bfq_log_bfqq(bfqd, bfqq, "dispatched %s request", ++ bfq_log_bfqq(bfqd, bfqq, "%s request", + bfq_bfqq_sync(bfqq) ? "sync" : "async"); + + BUG_ON(bfqq->next_rq == NULL && +@@ -4044,7 +4042,7 @@ static void bfq_put_queue(struct bfq_queue *bfqq) + + BUG_ON(bfqq->ref <= 0); + +- bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p %d", bfqq, bfqq->ref); ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "%p %d", bfqq, bfqq->ref); + bfqq->ref--; + if (bfqq->ref) + return; +@@ -4086,7 +4084,7 @@ static void bfq_put_queue(struct bfq_queue *bfqq) + bfqq->bfqd->burst_size--; + } + +- bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p freed", bfqq); ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "%p freed", bfqq); + + kmem_cache_free(bfq_pool, bfqq); + #ifdef BFQ_GROUP_IOSCHED_ENABLED +@@ -4120,7 +4118,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) + bfq_schedule_dispatch(bfqd); + } + +- bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, bfqq->ref); ++ bfq_log_bfqq(bfqd, bfqq, "%p, %d", bfqq, bfqq->ref); + + bfq_put_cooperator(bfqq); + +@@ -4200,7 +4198,7 @@ static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, + bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio); + bfqq->entity.prio_changed = 1; + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "set_next_ioprio_data: bic_class %d prio %d class %d", ++ "bic_class %d prio %d class %d", + ioprio_class, bfqq->new_ioprio, bfqq->new_ioprio_class); + } + +@@ -4227,7 +4225,7 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio) + bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic); + bic_set_bfqq(bic, bfqq, false); + bfq_log_bfqq(bfqd, bfqq, +- "check_ioprio_change: bfqq %p %d", ++ "bfqq %p %d", + bfqq, bfqq->ref); + } + +@@ -4362,14 +4360,14 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, + * guarantee that this queue is not freed + * until its group goes away. + */ +- bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d", ++ bfq_log_bfqq(bfqd, bfqq, "bfqq not in async: %p, %d", + bfqq, bfqq->ref); + *async_bfqq = bfqq; + } + + out: + bfqq->ref++; /* get a process reference to this queue */ +- bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, bfqq->ref); ++ bfq_log_bfqq(bfqd, bfqq, "at end: %p, %d", bfqq, bfqq->ref); + rcu_read_unlock(); + return bfqq; + } +@@ -4428,7 +4426,7 @@ static void bfq_update_has_short_ttime(struct bfq_data *bfqd, + bic->ttime.ttime_mean > bfqd->bfq_slice_idle)) + has_short_ttime = false; + +- bfq_log_bfqq(bfqd, bfqq, "update_has_short_ttime: has_short_ttime %d", ++ bfq_log_bfqq(bfqd, bfqq, "has_short_ttime %d", + has_short_ttime); + + if (has_short_ttime) +@@ -4454,7 +4452,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, + bfq_update_io_seektime(bfqd, bfqq, rq); + + bfq_log_bfqq(bfqd, bfqq, +- "rq_enqueued: has_short_ttime=%d (seeky %d)", ++ "has_short_ttime=%d (seeky %d)", + bfq_bfqq_has_short_ttime(bfqq), BFQQ_SEEKY(bfqq)); + + bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); +@@ -4629,7 +4627,7 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq) + */ + delta_us = div_u64(now_ns - bfqd->last_completion, NSEC_PER_USEC); + +- bfq_log(bfqd, "rq_completed: delta %uus/%luus max_size %u rate %llu/%llu", ++ bfq_log(bfqd, "delta %uus/%luus max_size %u rate %llu/%llu", + delta_us, BFQ_MIN_TT/NSEC_PER_USEC, bfqd->last_rq_max_size, + delta_us > 0 ? + (USEC_PER_SEC* +@@ -4750,7 +4748,7 @@ static void bfq_put_request(struct request *rq) + rq->elv.priv[0] = NULL; + rq->elv.priv[1] = NULL; + +- bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d", ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "%p, %d", + bfqq, bfqq->ref); + bfq_put_queue(bfqq); + } +@@ -4816,7 +4814,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, + bic_set_bfqq(bic, bfqq, is_sync); + if (split && is_sync) { + bfq_log_bfqq(bfqd, bfqq, +- "set_request: was_in_list %d " ++ "was_in_list %d " + "was_in_large_burst %d " + "large burst in progress %d", + bic->was_in_burst_list, +@@ -4826,12 +4824,12 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, + if ((bic->was_in_burst_list && bfqd->large_burst) || + bic->saved_in_large_burst) { + bfq_log_bfqq(bfqd, bfqq, +- "set_request: marking in " ++ "marking in " + "large burst"); + bfq_mark_bfqq_in_large_burst(bfqq); + } else { + bfq_log_bfqq(bfqd, bfqq, +- "set_request: clearing in " ++ "clearing in " + "large burst"); + bfq_clear_bfqq_in_large_burst(bfqq); + if (bic->was_in_burst_list) +@@ -4888,7 +4886,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, + + bfqq->allocated[rw]++; + bfqq->ref++; +- bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq, bfqq->ref); ++ bfq_log_bfqq(bfqd, bfqq, "bfqq %p, %d", bfqq, bfqq->ref); + + rq->elv.priv[0] = bic; + rq->elv.priv[1] = bfqq; +@@ -4962,7 +4960,7 @@ static enum hrtimer_restart bfq_idle_slice_timer(struct hrtimer *timer) + * case we just expire a queue too early. + */ + if (bfqq) { +- bfq_log_bfqq(bfqd, bfqq, "slice_timer expired"); ++ bfq_log_bfqq(bfqd, bfqq, "expired"); + bfq_clear_bfqq_wait_request(bfqq); + + if (bfq_bfqq_budget_timeout(bfqq)) +@@ -5005,10 +5003,10 @@ static void __bfq_put_async_bfqq(struct bfq_data *bfqd, + struct bfq_group *root_group = bfqd->root_group; + struct bfq_queue *bfqq = *bfqq_ptr; + +- bfq_log(bfqd, "put_async_bfqq: %p", bfqq); ++ bfq_log(bfqd, "%p", bfqq); + if (bfqq) { + bfq_bfqq_move(bfqd, bfqq, root_group); +- bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d", ++ bfq_log_bfqq(bfqd, bfqq, "putting %p, %d", + bfqq, bfqq->ref); + bfq_put_queue(bfqq); + *bfqq_ptr = NULL; +diff --git a/block/bfq.h b/block/bfq.h +index 0cd7a3f251a7..4d2fe7f77af1 100644 +--- a/block/bfq.h ++++ b/block/bfq.h +@@ -698,37 +698,37 @@ static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); + \ + assert_spin_locked((bfqd)->queue->queue_lock); \ + blkg_path(bfqg_to_blkg(bfqq_group(bfqq)), __pbuf, sizeof(__pbuf)); \ +- pr_crit("%s bfq%d%c %s " fmt "\n", \ ++ pr_crit("%s bfq%d%c %s [%s] " fmt "\n", \ + checked_dev_name((bfqd)->queue->backing_dev_info->dev), \ + (bfqq)->pid, \ + bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ +- __pbuf, ##args); \ ++ __pbuf, __func__, ##args); \ + } while (0) + + #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \ + char __pbuf[128]; \ + \ + blkg_path(bfqg_to_blkg(bfqg), __pbuf, sizeof(__pbuf)); \ +- pr_crit("%s %s " fmt "\n", \ ++ pr_crit("%s %s [%s] " fmt "\n", \ + checked_dev_name((bfqd)->queue->backing_dev_info->dev), \ +- __pbuf, ##args); \ ++ __pbuf, __func__, ##args); \ + } while (0) + + #else /* BFQ_GROUP_IOSCHED_ENABLED */ + + #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ +- pr_crit("%s bfq%d%c " fmt "\n", \ ++ pr_crit("%s bfq%d%c [%s] " fmt "\n", \ + checked_dev_name((bfqd)->queue->backing_dev_info->dev), \ + (bfqq)->pid, bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ +- ##args) ++ __func__, ##args) + #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0) + + #endif /* BFQ_GROUP_IOSCHED_ENABLED */ + + #define bfq_log(bfqd, fmt, args...) \ +- pr_crit("%s bfq " fmt "\n", \ ++ pr_crit("%s bfq [%s] " fmt "\n", \ + checked_dev_name((bfqd)->queue->backing_dev_info->dev), \ +- ##args) ++ __func__, ##args) + + #else /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */ + +@@ -755,31 +755,32 @@ static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); + \ + assert_spin_locked((bfqd)->queue->queue_lock); \ + blkg_path(bfqg_to_blkg(bfqq_group(bfqq)), __pbuf, sizeof(__pbuf)); \ +- blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s " fmt, \ ++ blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s [%s] " fmt, \ + (bfqq)->pid, \ + bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ +- __pbuf, ##args); \ ++ __pbuf, __func__, ##args); \ + } while (0) + + #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \ + char __pbuf[128]; \ + \ + blkg_path(bfqg_to_blkg(bfqg), __pbuf, sizeof(__pbuf)); \ +- blk_add_trace_msg((bfqd)->queue, "%s " fmt, __pbuf, ##args); \ ++ blk_add_trace_msg((bfqd)->queue, "%s [%s] " fmt, __pbuf, \ ++ __func__, ##args); \ + } while (0) + + #else /* BFQ_GROUP_IOSCHED_ENABLED */ + + #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ +- blk_add_trace_msg((bfqd)->queue, "bfq%d%c " fmt, (bfqq)->pid, \ ++ blk_add_trace_msg((bfqd)->queue, "bfq%d%c [%s] " fmt, (bfqq)->pid, \ + bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ +- ##args) ++ __func__, ##args) + #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0) + + #endif /* BFQ_GROUP_IOSCHED_ENABLED */ + + #define bfq_log(bfqd, fmt, args...) \ +- blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args) ++ blk_add_trace_msg((bfqd)->queue, "bfq [%s] " fmt, __func__, ##args) + + #endif /* CONFIG_BLK_DEV_IO_TRACE */ + #endif /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */ +@@ -928,7 +929,7 @@ bfq_entity_service_tree(struct bfq_entity *entity) + + if (bfqq) + bfq_log_bfqq(bfqq->bfqd, bfqq, +- "entity_service_tree %p %d", ++ "%p %d", + sched_data->service_tree + idx, idx); + #ifdef BFQ_GROUP_IOSCHED_ENABLED + else { +@@ -936,7 +937,7 @@ bfq_entity_service_tree(struct bfq_entity *entity) + container_of(entity, struct bfq_group, entity); + + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, +- "entity_service_tree %p %d", ++ "%p %d", + sched_data->service_tree + idx, idx); + } + #endif + +From 673a457e8a54d1c4b66e61b1a50956ba0b8c6a60 Mon Sep 17 00:00:00 2001 +From: Davide Paganelli +Date: Thu, 8 Feb 2018 11:49:58 +0100 +Subject: [PATCH 19/23] block, bfq-mq, bfq-sq: make bfq_bfqq_expire print + expiration reason + +Improve readability of the log messages related to the expiration +reasons of the function bfq_bfqq_expire. +Change the printing of the number that represents the reason for +expiration with an actual textual description of the reason. + +Signed-off-by: Davide Paganelli +Signed-off-by: Paolo Valente +--- + block/bfq-mq-iosched.c | 10 ++++++++-- + block/bfq-sq-iosched.c | 10 ++++++++-- + 2 files changed, 16 insertions(+), 4 deletions(-) + +diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c +index edc93b6af186..9268dd47a4e5 100644 +--- a/block/bfq-mq-iosched.c ++++ b/block/bfq-mq-iosched.c +@@ -133,6 +133,12 @@ static const int bfq_timeout = (HZ / 8); + */ + static const unsigned long bfq_merge_time_limit = HZ/10; + ++#define MAX_LENGTH_REASON_NAME 25 ++ ++static const char reason_name[][MAX_LENGTH_REASON_NAME] = {"TOO_IDLE", ++"BUDGET_TIMEOUT", "BUDGET_EXHAUSTED", "NO_MORE_REQUESTS", ++"PREEMPTED"}; ++ + static struct kmem_cache *bfq_pool; + + /* Below this threshold (in ns), we consider thinktime immediate. */ +@@ -3553,8 +3559,8 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, + } + + bfq_log_bfqq(bfqd, bfqq, +- "expire (%d, slow %d, num_disp %d, short_ttime %d, weight %d)", +- reason, slow, bfqq->dispatched, ++ "expire (%s, slow %d, num_disp %d, short_ttime %d, weight %d)", ++ reason_name[reason], slow, bfqq->dispatched, + bfq_bfqq_has_short_ttime(bfqq), entity->weight); + + /* +diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c +index e49e8ac882b3..f95deaab49a1 100644 +--- a/block/bfq-sq-iosched.c ++++ b/block/bfq-sq-iosched.c +@@ -127,6 +127,12 @@ static const int bfq_timeout = (HZ / 8); + */ + static const unsigned long bfq_merge_time_limit = HZ/10; + ++#define MAX_LENGTH_REASON_NAME 25 ++ ++static const char reason_name[][MAX_LENGTH_REASON_NAME] = {"TOO_IDLE", ++"BUDGET_TIMEOUT", "BUDGET_EXHAUSTED", "NO_MORE_REQUESTS", ++"PREEMPTED"}; ++ + static struct kmem_cache *bfq_pool; + + /* Below this threshold (in ns), we consider thinktime immediate. */ +@@ -3366,8 +3372,8 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, + } + + bfq_log_bfqq(bfqd, bfqq, +- "expire (%d, slow %d, num_disp %d, short_ttime %d, weight %d)", +- reason, slow, bfqq->dispatched, ++ "expire (%s, slow %d, num_disp %d, short_ttime %d, weight %d)", ++ reason_name[reason], slow, bfqq->dispatched, + bfq_bfqq_has_short_ttime(bfqq), entity->weight); + + /* + +From 62e80623fbb58367c3f667dab22fea0804001f3b Mon Sep 17 00:00:00 2001 +From: Melzani Alessandro +Date: Mon, 26 Feb 2018 22:21:59 +0100 +Subject: [PATCH 20/23] bfq-mq: port of "block, bfq: remove batches of + confusing ifdefs" + +Commit a33801e8b473 ("block, bfq: move debug blkio stats behind +CONFIG_DEBUG_BLK_CGROUP") introduced two batches of confusing ifdefs: +one reported in [1], plus a similar one in another function. This +commit removes both batches, in the way suggested in [1]. + +[1] https://www.spinics.net/lists/linux-block/msg20043.html + +Fixes: a33801e8b473 ("block, bfq: move debug blkio stats behind CONFIG_DEBUG_BLK_CGROUP") + +Signed-off-by: Alessandro Melzani +--- + block/bfq-mq-iosched.c | 128 ++++++++++++++++++++++++++++--------------------- + 1 file changed, 73 insertions(+), 55 deletions(-) + +diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c +index 9268dd47a4e5..5a211620f316 100644 +--- a/block/bfq-mq-iosched.c ++++ b/block/bfq-mq-iosched.c +@@ -4256,35 +4256,17 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) + return rq; + } + +-static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) +-{ +- struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; +- struct request *rq; +-#if defined(BFQ_GROUP_IOSCHED_ENABLED) && defined(CONFIG_DEBUG_BLK_CGROUP) +- struct bfq_queue *in_serv_queue, *bfqq; +- bool waiting_rq, idle_timer_disabled; +-#endif + +- spin_lock_irq(&bfqd->lock); +- +-#if defined(BFQ_GROUP_IOSCHED_ENABLED) && defined(CONFIG_DEBUG_BLK_CGROUP) +- in_serv_queue = bfqd->in_service_queue; +- waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue); +- +- rq = __bfq_dispatch_request(hctx); +- +- idle_timer_disabled = +- waiting_rq && !bfq_bfqq_wait_request(in_serv_queue); +- +-#else +- rq = __bfq_dispatch_request(hctx); +-#endif +- spin_unlock_irq(&bfqd->lock); ++#if defined(BFQ_GROUP_IOSCHED_ENABLED) && defined(CONFIG_DEBUG_BLK_CGROUP) ++static void bfq_update_dispatch_stats(struct request_queue *q, ++ struct request *rq, ++ struct bfq_queue *in_serv_queue, ++ bool idle_timer_disabled) ++{ ++ struct bfq_queue *bfqq = rq ? RQ_BFQQ(rq) : NULL; + +-#if defined(BFQ_GROUP_IOSCHED_ENABLED) && defined(CONFIG_DEBUG_BLK_CGROUP) +- bfqq = rq ? RQ_BFQQ(rq) : NULL; + if (!idle_timer_disabled && !bfqq) +- return rq; ++ return; + + /* + * rq and bfqq are guaranteed to exist until this function +@@ -4299,7 +4281,7 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) + * In addition, the following queue lock guarantees that + * bfqq_group(bfqq) exists as well. + */ +- spin_lock_irq(hctx->queue->queue_lock); ++ spin_lock_irq(q->queue_lock); + if (idle_timer_disabled) + /* + * Since the idle timer has been disabled, +@@ -4318,8 +4300,35 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) + bfqg_stats_set_start_empty_time(bfqg); + bfqg_stats_update_io_remove(bfqg, rq->cmd_flags); + } +- spin_unlock_irq(hctx->queue->queue_lock); ++ spin_unlock_irq(q->queue_lock); ++} ++#else ++static inline void bfq_update_dispatch_stats(struct request_queue *q, ++ struct request *rq, ++ struct bfq_queue *in_serv_queue, ++ bool idle_timer_disabled) {} + #endif ++static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) ++{ ++ struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; ++ struct request *rq; ++ struct bfq_queue *in_serv_queue; ++ bool waiting_rq, idle_timer_disabled; ++ ++ spin_lock_irq(&bfqd->lock); ++ ++ in_serv_queue = bfqd->in_service_queue; ++ waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue); ++ ++ rq = __bfq_dispatch_request(hctx); ++ ++ idle_timer_disabled = ++ waiting_rq && !bfq_bfqq_wait_request(in_serv_queue); ++ ++ spin_unlock_irq(&bfqd->lock); ++ ++ bfq_update_dispatch_stats(hctx->queue, rq, in_serv_queue, ++ idle_timer_disabled); + + return rq; + } +@@ -4881,6 +4890,38 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) + return idle_timer_disabled; + } + ++#if defined(BFQ_GROUP_IOSCHED_ENABLED) && defined(CONFIG_DEBUG_BLK_CGROUP) ++static void bfq_update_insert_stats(struct request_queue *q, ++ struct bfq_queue *bfqq, ++ bool idle_timer_disabled, ++ unsigned int cmd_flags) ++{ ++ if (!bfqq) ++ return; ++ ++ /* ++ * bfqq still exists, because it can disappear only after ++ * either it is merged with another queue, or the process it ++ * is associated with exits. But both actions must be taken by ++ * the same process currently executing this flow of ++ * instructions. ++ * ++ * In addition, the following queue lock guarantees that ++ * bfqq_group(bfqq) exists as well. ++ */ ++ spin_lock_irq(q->queue_lock); ++ bfqg_stats_update_io_add(bfqq_group(bfqq), bfqq, cmd_flags); ++ if (idle_timer_disabled) ++ bfqg_stats_update_idle_time(bfqq_group(bfqq)); ++ spin_unlock_irq(q->queue_lock); ++} ++#else ++static inline void bfq_update_insert_stats(struct request_queue *q, ++ struct bfq_queue *bfqq, ++ bool idle_timer_disabled, ++ unsigned int cmd_flags) {} ++#endif ++ + static void bfq_prepare_request(struct request *rq, struct bio *bio); + + static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, +@@ -4889,10 +4930,8 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, + struct request_queue *q = hctx->queue; + struct bfq_data *bfqd = q->elevator->elevator_data; + struct bfq_queue *bfqq = RQ_BFQQ(rq); +-#if defined(BFQ_GROUP_IOSCHED_ENABLED) && defined(CONFIG_DEBUG_BLK_CGROUP) + bool idle_timer_disabled = false; + unsigned int cmd_flags; +-#endif + + spin_lock_irq(&bfqd->lock); + if (blk_mq_sched_try_insert_merge(q, rq)) { +@@ -4938,7 +4977,6 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, + bfqq = RQ_BFQQ(rq); + } + +-#if defined(BFQ_GROUP_IOSCHED_ENABLED) && defined(CONFIG_DEBUG_BLK_CGROUP) + idle_timer_disabled = __bfq_insert_request(bfqd, rq); + /* + * Update bfqq, because, if a queue merge has occurred +@@ -4946,9 +4984,6 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, + * redirected into a new queue. + */ + bfqq = RQ_BFQQ(rq); +-#else +- __bfq_insert_request(bfqd, rq); +-#endif + + if (rq_mergeable(rq)) { + elv_rqhash_add(q, rq); +@@ -4956,34 +4991,17 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, + q->last_merge = rq; + } + } +-#if defined(BFQ_GROUP_IOSCHED_ENABLED) && defined(CONFIG_DEBUG_BLK_CGROUP) ++ + /* + * Cache cmd_flags before releasing scheduler lock, because rq + * may disappear afterwards (for example, because of a request + * merge). + */ + cmd_flags = rq->cmd_flags; +-#endif ++ + spin_unlock_irq(&bfqd->lock); +-#if defined(BFQ_GROUP_IOSCHED_ENABLED) && defined(CONFIG_DEBUG_BLK_CGROUP) +- if (!bfqq) +- return; +- /* +- * bfqq still exists, because it can disappear only after +- * either it is merged with another queue, or the process it +- * is associated with exits. But both actions must be taken by +- * the same process currently executing this flow of +- * instruction. +- * +- * In addition, the following queue lock guarantees that +- * bfqq_group(bfqq) exists as well. +- */ +- spin_lock_irq(q->queue_lock); +- bfqg_stats_update_io_add(bfqq_group(bfqq), bfqq, cmd_flags); +- if (idle_timer_disabled) +- bfqg_stats_update_idle_time(bfqq_group(bfqq)); +- spin_unlock_irq(q->queue_lock); +-#endif ++ bfq_update_insert_stats(q, bfqq, idle_timer_disabled, ++ cmd_flags); + } + + static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx, + +From 0d0d05632872b226f4fae5e56af8736a4c24bf57 Mon Sep 17 00:00:00 2001 +From: Melzani Alessandro +Date: Mon, 26 Feb 2018 22:43:30 +0100 +Subject: [PATCH 21/23] bfq-sq, bfq-mq: port of "bfq: Use icq_to_bic() + consistently" + +Some code uses icq_to_bic() to convert an io_cq pointer to a +bfq_io_cq pointer while other code uses a direct cast. Convert +the code that uses a direct cast such that it uses icq_to_bic(). + +Signed-off-by: Alessandro Melzani +--- + block/bfq-mq-iosched.c | 2 +- + block/bfq-sq-iosched.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c +index 5a211620f316..7b1269558c47 100644 +--- a/block/bfq-mq-iosched.c ++++ b/block/bfq-mq-iosched.c +@@ -272,7 +272,7 @@ static const unsigned long max_service_from_wr = 120000; + #define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \ + { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 }) + +-#define RQ_BIC(rq) ((struct bfq_io_cq *) (rq)->elv.priv[0]) ++#define RQ_BIC(rq) icq_to_bic((rq)->elv.priv[0]) + #define RQ_BFQQ(rq) ((rq)->elv.priv[1]) + + /** +diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c +index f95deaab49a1..c4aff8d55fc4 100644 +--- a/block/bfq-sq-iosched.c ++++ b/block/bfq-sq-iosched.c +@@ -266,7 +266,7 @@ static const unsigned long max_service_from_wr = 120000; + #define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \ + { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 }) + +-#define RQ_BIC(rq) ((struct bfq_io_cq *) (rq)->elv.priv[0]) ++#define RQ_BIC(rq) icq_to_bic((rq)->elv.priv[0]) + #define RQ_BFQQ(rq) ((rq)->elv.priv[1]) + + static void bfq_schedule_dispatch(struct bfq_data *bfqd); + +From 4cb5de6add7d6ad0d25d73cb95dc871305db1522 Mon Sep 17 00:00:00 2001 +From: Melzani Alessandro +Date: Mon, 26 Feb 2018 22:59:30 +0100 +Subject: [PATCH 22/23] bfq-sq, bfq-mq: port of "block, bfq: fix error handle + in bfq_init" + +if elv_register fail, bfq_pool should be free. + +Signed-off-by: Alessandro Melzani +--- + block/bfq-mq-iosched.c | 4 +++- + block/bfq-sq-iosched.c | 4 +++- + 2 files changed, 6 insertions(+), 2 deletions(-) + +diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c +index 7b1269558c47..964e88c2ce59 100644 +--- a/block/bfq-mq-iosched.c ++++ b/block/bfq-mq-iosched.c +@@ -6129,7 +6129,7 @@ static int __init bfq_init(void) + + ret = elv_register(&iosched_bfq_mq); + if (ret) +- goto err_pol_unreg; ++ goto slab_kill; + + #ifdef BFQ_GROUP_IOSCHED_ENABLED + strcat(msg, " (with cgroups support)"); +@@ -6138,6 +6138,8 @@ static int __init bfq_init(void) + + return 0; + ++slab_kill: ++ bfq_slab_kill(); + err_pol_unreg: + #ifdef BFQ_GROUP_IOSCHED_ENABLED + blkcg_policy_unregister(&blkcg_policy_bfq); +diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c +index c4aff8d55fc4..7f0cf1f01ffc 100644 +--- a/block/bfq-sq-iosched.c ++++ b/block/bfq-sq-iosched.c +@@ -5590,7 +5590,7 @@ static int __init bfq_init(void) + + ret = elv_register(&iosched_bfq); + if (ret) +- goto err_pol_unreg; ++ goto slab_kill; + + #ifdef BFQ_GROUP_IOSCHED_ENABLED + strcat(msg, " (with cgroups support)"); +@@ -5599,6 +5599,8 @@ static int __init bfq_init(void) + + return 0; + ++slab_kill: ++ bfq_slab_kill(); + err_pol_unreg: + #ifdef BFQ_GROUP_IOSCHED_ENABLED + blkcg_policy_unregister(&blkcg_policy_bfq); + +From 1f77c173aaa87ffb22c9f062a6449245d14311e4 Mon Sep 17 00:00:00 2001 +From: Paolo Valente +Date: Wed, 4 Apr 2018 11:28:16 +0200 +Subject: [PATCH 23/23] block, bfq-sq, bfq-mq: lower-bound the estimated peak + rate to 1 + +If a storage device handled by BFQ happens to be slower than 7.5 KB/s +for a certain amount of time (in the order of a second), then the +estimated peak rate of the device, maintained in BFQ, becomes equal to +0. The reason is the limited precision with which the rate is +represented (details on the range of representable values in the +comments introduced by this commit). This leads to a division-by-zero +error where the estimated peak rate is used as divisor. Such a type of +failure has been reported in [1]. + +This commit addresses this issue by: +1. Lower-bounding the estimated peak rate to 1 +2. Adding and improving comments on the range of rates representable + +[1] https://www.spinics.net/lists/kernel/msg2739205.html + +Signed-off-by: Konstantin Khlebnikov +Signed-off-by: Paolo Valente +--- + block/bfq-mq-iosched.c | 25 ++++++++++++++++++++++++- + block/bfq-mq.h | 7 ++++++- + block/bfq-sq-iosched.c | 25 ++++++++++++++++++++++++- + block/bfq.h | 7 ++++++- + 4 files changed, 60 insertions(+), 4 deletions(-) + +diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c +index 964e88c2ce59..03efd90c5d20 100644 +--- a/block/bfq-mq-iosched.c ++++ b/block/bfq-mq-iosched.c +@@ -160,7 +160,20 @@ static struct kmem_cache *bfq_pool; + /* Target observation time interval for a peak-rate update (ns) */ + #define BFQ_RATE_REF_INTERVAL NSEC_PER_SEC + +-/* Shift used for peak rate fixed precision calculations. */ ++/* ++ * Shift used for peak-rate fixed precision calculations. ++ * With ++ * - the current shift: 16 positions ++ * - the current type used to store rate: u32 ++ * - the current unit of measure for rate: [sectors/usec], or, more precisely, ++ * [(sectors/usec) / 2^BFQ_RATE_SHIFT] to take into account the shift, ++ * the range of rates that can be stored is ++ * [1 / 2^BFQ_RATE_SHIFT, 2^(32 - BFQ_RATE_SHIFT)] sectors/usec = ++ * [1 / 2^16, 2^16] sectors/usec = [15e-6, 65536] sectors/usec = ++ * [15, 65G] sectors/sec ++ * Which, assuming a sector size of 512B, corresponds to a range of ++ * [7.5K, 33T] B/sec ++ */ + #define BFQ_RATE_SHIFT 16 + + /* +@@ -2881,6 +2894,16 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) + BUG_ON(bfqd->peak_rate > 20<peak_rate += rate; ++ ++ /* ++ * For a very slow device, bfqd->peak_rate can reach 0 (see ++ * the minimum representable values reported in the comments ++ * on BFQ_RATE_SHIFT). Push to 1 if this happens, to avoid ++ * divisions by zero where bfqd->peak_rate is used as a ++ * divisor. ++ */ ++ bfqd->peak_rate = max_t(u32, 1, bfqd->peak_rate); ++ + update_thr_responsiveness_params(bfqd); + BUG_ON(bfqd->peak_rate > 20<peak_rate > 20<peak_rate += rate; ++ ++ /* ++ * For a very slow device, bfqd->peak_rate can reach 0 (see ++ * the minimum representable values reported in the comments ++ * on BFQ_RATE_SHIFT). Push to 1 if this happens, to avoid ++ * divisions by zero where bfqd->peak_rate is used as a ++ * divisor. ++ */ ++ bfqd->peak_rate = max_t(u32, 1, bfqd->peak_rate); ++ + update_thr_responsiveness_params(bfqd); + BUG_ON(bfqd->peak_rate > 20<