From 7bd365a925748767d7ed807e5498f90bae0ebc25 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Tue, 14 Nov 2017 08:28:45 +0100 Subject: [PATCH 01/23] block, bfq-mq: turn BUG_ON on request-size into WARN_ON BFQ has many checks of internal and external consistency. One of them checks that an I/O request has still sectors to serve, if it happens to be retired without being served. If the request has no sector to serve, a BUG_ON signals the failure and causes the kernel to terminate. Yet, from a crash report by a user [1], this condition may happen to hold, in apparently correct functioning, for I/O with a CD/DVD. To address this issue, this commit turns the above BUG_ON into a WARN_ON. This commit also adds a companion WARN_ON on request insertion into the scheduler. [1] https://groups.google.com/d/msg/bfq-iosched/DDOTJBroBa4/VyU1zUFtCgAJ Reported-by: Alexandre Frade Signed-off-by: Paolo Valente --- block/bfq-mq-iosched.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c index 0c09609a6099..0fc757ae7a42 100644 --- a/block/bfq-mq-iosched.c +++ b/block/bfq-mq-iosched.c @@ -1540,6 +1540,8 @@ static void bfq_add_request(struct request *rq) BUG_ON(!RQ_BFQQ(rq)); BUG_ON(RQ_BFQQ(rq) != bfqq); + WARN_ON(blk_rq_sectors(rq) == 0); + elv_rb_add(&bfqq->sort_list, rq); /* @@ -4962,7 +4964,7 @@ static void bfq_finish_request(struct request *rq) rq_io_start_time_ns(rq), rq->cmd_flags); - BUG_ON(blk_rq_sectors(rq) == 0 && !(rq->rq_flags & RQF_STARTED)); + WARN_ON(blk_rq_sectors(rq) == 0 && !(rq->rq_flags & RQF_STARTED)); if (likely(rq->rq_flags & RQF_STARTED)) { unsigned long flags; From 1097d368a20456c88acd75b3184c68df38e8f7b8 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Sun, 12 Nov 2017 22:43:46 +0100 Subject: [PATCH 02/23] block, bfq-sq, bfq-mq: consider also past I/O in soft real-time detection BFQ privileges the I/O of soft real-time applications, such as video players, to guarantee to these application a high bandwidth and a low latency. In this respect, it is not easy to correctly detect when an application is soft real-time. A particularly nasty false positive is that of an I/O-bound application that occasionally happens to meet all requirements to be deemed as soft real-time. After being detected as soft real-time, such an application monopolizes the device. Fortunately, BFQ will realize soon that the application is actually not soft real-time and suspend every privilege. Yet, the application may happen again to be wrongly detected as soft real-time, and so on. As highlighted by our tests, this problem causes BFQ to occasionally fail to guarantee a high responsiveness, in the presence of heavy background I/O workloads. The reason is that the background workload happens to be detected as soft real-time, more or less frequently, during the execution of the interactive task under test. To give an idea, because of this problem, Libreoffice Writer occasionally takes 8 seconds, instead of 3, to start up, if there are sequential reads and writes in the background, on a Kingston SSDNow V300. This commit addresses this issue by leveraging the following facts. The reason why some applications are detected as soft real-time despite all BFQ checks to avoid false positives, is simply that, during high CPU or storage-device load, I/O-bound applications may happen to do I/O slowly enough to meet all soft real-time requirements, and pass all BFQ extra checks. Yet, this happens only for limited time periods: slow-speed time intervals are usually interspersed between other time intervals during which these applications do I/O at a very high speed. To exploit these facts, this commit introduces a little change, in the detection of soft real-time behavior, to systematically consider also the recent past: the higher the speed was in the recent past, the later next I/O should arrive for the application to be considered as soft real-time. At the beginning of a slow-speed interval, the minimum arrival time allowed for the next I/O usually happens to still be so high, to fall *after* the end of the slow-speed period itself. As a consequence, the application does not risk to be deemed as soft real-time during the slow-speed interval. Then, during the next high-speed interval, the application cannot, evidently, be deemed as soft real-time (exactly because of its speed), and so on. This extra filtering proved to be rather effective: in the above test, the frequency of false positives became so low that the start-up time was 3 seconds in all iterations (apart from occasional outliers, caused by page-cache-management issues, which are out of the scope of this commit, and cannot be solved by an I/O scheduler). Signed-off-by: Paolo Valente Signed-off-by: Angelo Ruocco --- block/bfq-mq-iosched.c | 115 ++++++++++++++++++++++++++++++++++--------------- block/bfq-sq-iosched.c | 115 ++++++++++++++++++++++++++++++++++--------------- 2 files changed, 162 insertions(+), 68 deletions(-) diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c index 0fc757ae7a42..4d06d900f45e 100644 --- a/block/bfq-mq-iosched.c +++ b/block/bfq-mq-iosched.c @@ -3201,37 +3201,78 @@ static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq, * whereas soft_rt_next_start is set to infinity for applications that do * not. * - * Unfortunately, even a greedy application may happen to behave in an - * isochronous way if the CPU load is high. In fact, the application may - * stop issuing requests while the CPUs are busy serving other processes, - * then restart, then stop again for a while, and so on. In addition, if - * the disk achieves a low enough throughput with the request pattern - * issued by the application (e.g., because the request pattern is random - * and/or the device is slow), then the application may meet the above - * bandwidth requirement too. To prevent such a greedy application to be - * deemed as soft real-time, a further rule is used in the computation of - * soft_rt_next_start: soft_rt_next_start must be higher than the current - * time plus the maximum time for which the arrival of a request is waited - * for when a sync queue becomes idle, namely bfqd->bfq_slice_idle. - * This filters out greedy applications, as the latter issue instead their - * next request as soon as possible after the last one has been completed - * (in contrast, when a batch of requests is completed, a soft real-time - * application spends some time processing data). + * Unfortunately, even a greedy (i.e., I/O-bound) application may + * happen to meet, occasionally or systematically, both the above + * bandwidth and isochrony requirements. This may happen at least in + * the following circumstances. First, if the CPU load is high. The + * application may stop issuing requests while the CPUs are busy + * serving other processes, then restart, then stop again for a while, + * and so on. The other circumstances are related to the storage + * device: the storage device is highly loaded or reaches a low-enough + * throughput with the I/O of the application (e.g., because the I/O + * is random and/or the device is slow). In all these cases, the + * I/O of the application may be simply slowed down enough to meet + * the bandwidth and isochrony requirements. To reduce the probability + * that greedy applications are deemed as soft real-time in these + * corner cases, a further rule is used in the computation of + * soft_rt_next_start: the return value of this function is forced to + * be higher than the maximum between the following two quantities. * - * Unfortunately, the last filter may easily generate false positives if - * only bfqd->bfq_slice_idle is used as a reference time interval and one - * or both the following cases occur: - * 1) HZ is so low that the duration of a jiffy is comparable to or higher - * than bfqd->bfq_slice_idle. This happens, e.g., on slow devices with - * HZ=100. + * (a) Current time plus: (1) the maximum time for which the arrival + * of a request is waited for when a sync queue becomes idle, + * namely bfqd->bfq_slice_idle, and (2) a few extra jiffies. We + * postpone for a moment the reason for adding a few extra + * jiffies; we get back to it after next item (b). Lower-bounding + * the return value of this function with the current time plus + * bfqd->bfq_slice_idle tends to filter out greedy applications, + * because the latter issue their next request as soon as possible + * after the last one has been completed. In contrast, a soft + * real-time application spends some time processing data, after a + * batch of its requests has been completed. + * + * (b) Current value of bfqq->soft_rt_next_start. As pointed out + * above, greedy applications may happen to meet both the + * bandwidth and isochrony requirements under heavy CPU or + * storage-device load. In more detail, in these scenarios, these + * applications happen, only for limited time periods, to do I/O + * slowly enough to meet all the requirements described so far, + * including the filtering in above item (a). These slow-speed + * time intervals are usually interspersed between other time + * intervals during which these applications do I/O at a very high + * speed. Fortunately, exactly because of the high speed of the + * I/O in the high-speed intervals, the values returned by this + * function happen to be so high, near the end of any such + * high-speed interval, to be likely to fall *after* the end of + * the low-speed time interval that follows. These high values are + * stored in bfqq->soft_rt_next_start after each invocation of + * this function. As a consequence, if the last value of + * bfqq->soft_rt_next_start is constantly used to lower-bound the + * next value that this function may return, then, from the very + * beginning of a low-speed interval, bfqq->soft_rt_next_start is + * likely to be constantly kept so high that any I/O request + * issued during the low-speed interval is considered as arriving + * to soon for the application to be deemed as soft + * real-time. Then, in the high-speed interval that follows, the + * application will not be deemed as soft real-time, just because + * it will do I/O at a high speed. And so on. + * + * Getting back to the filtering in item (a), in the following two + * cases this filtering might be easily passed by a greedy + * application, if the reference quantity was just + * bfqd->bfq_slice_idle: + * 1) HZ is so low that the duration of a jiffy is comparable to or + * higher than bfqd->bfq_slice_idle. This happens, e.g., on slow + * devices with HZ=100. The time granularity may be so coarse + * that the approximation, in jiffies, of bfqd->bfq_slice_idle + * is rather lower than the exact value. * 2) jiffies, instead of increasing at a constant rate, may stop increasing * for a while, then suddenly 'jump' by several units to recover the lost * increments. This seems to happen, e.g., inside virtual machines. - * To address this issue, we do not use as a reference time interval just - * bfqd->bfq_slice_idle, but bfqd->bfq_slice_idle plus a few jiffies. In - * particular we add the minimum number of jiffies for which the filter - * seems to be quite precise also in embedded systems and KVM/QEMU virtual - * machines. + * To address this issue, in the filtering in (a) we do not use as a + * reference time interval just bfqd->bfq_slice_idle, but + * bfqd->bfq_slice_idle plus a few jiffies. In particular, we add the + * minimum number of jiffies for which the filter seems to be quite + * precise also in embedded systems and KVM/QEMU virtual machines. */ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, struct bfq_queue *bfqq) @@ -3243,10 +3284,11 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, jiffies_to_msecs(HZ * bfqq->service_from_backlogged / bfqd->bfq_wr_max_softrt_rate)); - return max(bfqq->last_idle_bklogged + - HZ * bfqq->service_from_backlogged / - bfqd->bfq_wr_max_softrt_rate, - jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4); + return max3(bfqq->soft_rt_next_start, + bfqq->last_idle_bklogged + + HZ * bfqq->service_from_backlogged / + bfqd->bfq_wr_max_softrt_rate, + jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4); } /** @@ -4395,10 +4437,15 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfqq->split_time = bfq_smallest_from_now(); /* - * Set to the value for which bfqq will not be deemed as - * soft rt when it becomes backlogged. + * To not forget the possibly high bandwidth consumed by a + * process/queue in the recent past, + * bfq_bfqq_softrt_next_start() returns a value at least equal + * to the current value of bfqq->soft_rt_next_start (see + * comments on bfq_bfqq_softrt_next_start). Set + * soft_rt_next_start to now, to mean that bfqq has consumed + * no bandwidth so far. */ - bfqq->soft_rt_next_start = bfq_greatest_from_now(); + bfqq->soft_rt_next_start = jiffies; /* first request is almost certainly seeky */ bfqq->seek_history = 1; diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c index 4bbd7f4c0154..987dc255c82c 100644 --- a/block/bfq-sq-iosched.c +++ b/block/bfq-sq-iosched.c @@ -3089,37 +3089,78 @@ static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq, * whereas soft_rt_next_start is set to infinity for applications that do * not. * - * Unfortunately, even a greedy application may happen to behave in an - * isochronous way if the CPU load is high. In fact, the application may - * stop issuing requests while the CPUs are busy serving other processes, - * then restart, then stop again for a while, and so on. In addition, if - * the disk achieves a low enough throughput with the request pattern - * issued by the application (e.g., because the request pattern is random - * and/or the device is slow), then the application may meet the above - * bandwidth requirement too. To prevent such a greedy application to be - * deemed as soft real-time, a further rule is used in the computation of - * soft_rt_next_start: soft_rt_next_start must be higher than the current - * time plus the maximum time for which the arrival of a request is waited - * for when a sync queue becomes idle, namely bfqd->bfq_slice_idle. - * This filters out greedy applications, as the latter issue instead their - * next request as soon as possible after the last one has been completed - * (in contrast, when a batch of requests is completed, a soft real-time - * application spends some time processing data). + * Unfortunately, even a greedy (i.e., I/O-bound) application may + * happen to meet, occasionally or systematically, both the above + * bandwidth and isochrony requirements. This may happen at least in + * the following circumstances. First, if the CPU load is high. The + * application may stop issuing requests while the CPUs are busy + * serving other processes, then restart, then stop again for a while, + * and so on. The other circumstances are related to the storage + * device: the storage device is highly loaded or reaches a low-enough + * throughput with the I/O of the application (e.g., because the I/O + * is random and/or the device is slow). In all these cases, the + * I/O of the application may be simply slowed down enough to meet + * the bandwidth and isochrony requirements. To reduce the probability + * that greedy applications are deemed as soft real-time in these + * corner cases, a further rule is used in the computation of + * soft_rt_next_start: the return value of this function is forced to + * be higher than the maximum between the following two quantities. * - * Unfortunately, the last filter may easily generate false positives if - * only bfqd->bfq_slice_idle is used as a reference time interval and one - * or both the following cases occur: - * 1) HZ is so low that the duration of a jiffy is comparable to or higher - * than bfqd->bfq_slice_idle. This happens, e.g., on slow devices with - * HZ=100. + * (a) Current time plus: (1) the maximum time for which the arrival + * of a request is waited for when a sync queue becomes idle, + * namely bfqd->bfq_slice_idle, and (2) a few extra jiffies. We + * postpone for a moment the reason for adding a few extra + * jiffies; we get back to it after next item (b). Lower-bounding + * the return value of this function with the current time plus + * bfqd->bfq_slice_idle tends to filter out greedy applications, + * because the latter issue their next request as soon as possible + * after the last one has been completed. In contrast, a soft + * real-time application spends some time processing data, after a + * batch of its requests has been completed. + * + * (b) Current value of bfqq->soft_rt_next_start. As pointed out + * above, greedy applications may happen to meet both the + * bandwidth and isochrony requirements under heavy CPU or + * storage-device load. In more detail, in these scenarios, these + * applications happen, only for limited time periods, to do I/O + * slowly enough to meet all the requirements described so far, + * including the filtering in above item (a). These slow-speed + * time intervals are usually interspersed between other time + * intervals during which these applications do I/O at a very high + * speed. Fortunately, exactly because of the high speed of the + * I/O in the high-speed intervals, the values returned by this + * function happen to be so high, near the end of any such + * high-speed interval, to be likely to fall *after* the end of + * the low-speed time interval that follows. These high values are + * stored in bfqq->soft_rt_next_start after each invocation of + * this function. As a consequence, if the last value of + * bfqq->soft_rt_next_start is constantly used to lower-bound the + * next value that this function may return, then, from the very + * beginning of a low-speed interval, bfqq->soft_rt_next_start is + * likely to be constantly kept so high that any I/O request + * issued during the low-speed interval is considered as arriving + * to soon for the application to be deemed as soft + * real-time. Then, in the high-speed interval that follows, the + * application will not be deemed as soft real-time, just because + * it will do I/O at a high speed. And so on. + * + * Getting back to the filtering in item (a), in the following two + * cases this filtering might be easily passed by a greedy + * application, if the reference quantity was just + * bfqd->bfq_slice_idle: + * 1) HZ is so low that the duration of a jiffy is comparable to or + * higher than bfqd->bfq_slice_idle. This happens, e.g., on slow + * devices with HZ=100. The time granularity may be so coarse + * that the approximation, in jiffies, of bfqd->bfq_slice_idle + * is rather lower than the exact value. * 2) jiffies, instead of increasing at a constant rate, may stop increasing * for a while, then suddenly 'jump' by several units to recover the lost * increments. This seems to happen, e.g., inside virtual machines. - * To address this issue, we do not use as a reference time interval just - * bfqd->bfq_slice_idle, but bfqd->bfq_slice_idle plus a few jiffies. In - * particular we add the minimum number of jiffies for which the filter - * seems to be quite precise also in embedded systems and KVM/QEMU virtual - * machines. + * To address this issue, in the filtering in (a) we do not use as a + * reference time interval just bfqd->bfq_slice_idle, but + * bfqd->bfq_slice_idle plus a few jiffies. In particular, we add the + * minimum number of jiffies for which the filter seems to be quite + * precise also in embedded systems and KVM/QEMU virtual machines. */ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, struct bfq_queue *bfqq) @@ -3131,10 +3172,11 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, jiffies_to_msecs(HZ * bfqq->service_from_backlogged / bfqd->bfq_wr_max_softrt_rate)); - return max(bfqq->last_idle_bklogged + - HZ * bfqq->service_from_backlogged / - bfqd->bfq_wr_max_softrt_rate, - jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4); + return max3(bfqq->soft_rt_next_start, + bfqq->last_idle_bklogged + + HZ * bfqq->service_from_backlogged / + bfqd->bfq_wr_max_softrt_rate, + jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4); } /** @@ -4167,10 +4209,15 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfqq->split_time = bfq_smallest_from_now(); /* - * Set to the value for which bfqq will not be deemed as - * soft rt when it becomes backlogged. + * To not forget the possibly high bandwidth consumed by a + * process/queue in the recent past, + * bfq_bfqq_softrt_next_start() returns a value at least equal + * to the current value of bfqq->soft_rt_next_start (see + * comments on bfq_bfqq_softrt_next_start). Set + * soft_rt_next_start to now, to mean that bfqq has consumed + * no bandwidth so far. */ - bfqq->soft_rt_next_start = bfq_greatest_from_now(); + bfqq->soft_rt_next_start = jiffies; /* first request is almost certainly seeky */ bfqq->seek_history = 1; From 2a09b505660c81dbb80a5d68c9bc558c326d041f Mon Sep 17 00:00:00 2001 From: Chiara Bruschi Date: Thu, 7 Dec 2017 09:57:19 +0100 Subject: [PATCH 03/23] block, bfq-mq: fix occurrences of request prepare/finish methods' old names Commits 'b01f1fa3bb19' (Port of "blk-mq-sched: unify request prepare methods") and 'cc10d2d7d2c1' (Port of "blk-mq-sched: unify request finished methods") changed the old names of current bfq_prepare_request and bfq_finish_request methods, but left them unchanged elsewhere in the code (related comments, part of function name bfq_put_rq_priv_body). This commit fixes every occurrence of the old names of these methods by changing them into the current names. Fixes: b01f1fa3bb19 (Port of "blk-mq-sched: unify request prepare methods") Fixes: cc10d2d7d2c1 (Port of "blk-mq-sched: unify request finished methods") Reviewed-by: Paolo Valente Signed-off-by: Federico Motta Signed-off-by: Chiara Bruschi --- block/bfq-mq-iosched.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c index 4d06d900f45e..8f8d5eccb016 100644 --- a/block/bfq-mq-iosched.c +++ b/block/bfq-mq-iosched.c @@ -4018,20 +4018,20 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) /* * TESTING: reset DISP_LIST flag, because: 1) * this rq this request has passed through - * get_rq_private, 2) then it will have - * put_rq_private invoked on it, and 3) in - * put_rq_private we use this flag to check - * that put_rq_private is not invoked on - * requests for which get_rq_private has been - * invoked. + * bfq_prepare_request, 2) then it will have + * bfq_finish_request invoked on it, and 3) in + * bfq_finish_request we use this flag to check + * that bfq_finish_request is not invoked on + * requests for which bfq_prepare_request has + * been invoked. */ rq->rq_flags &= ~RQF_DISP_LIST; goto inc_in_driver_start_rq; } /* - * We exploit the put_rq_private hook to decrement - * rq_in_driver, but put_rq_private will not be + * We exploit the bfq_finish_request hook to decrement + * rq_in_driver, but bfq_finish_request will not be * invoked on this request. So, to avoid unbalance, * just start this request, without incrementing * rq_in_driver. As a negative consequence, @@ -4040,14 +4040,14 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) * bfq_schedule_dispatch to be invoked uselessly. * * As for implementing an exact solution, the - * put_request hook, if defined, is probably invoked - * also on this request. So, by exploiting this hook, - * we could 1) increment rq_in_driver here, and 2) - * decrement it in put_request. Such a solution would - * let the value of the counter be always accurate, - * but it would entail using an extra interface - * function. This cost seems higher than the benefit, - * being the frequency of non-elevator-private + * bfq_finish_request hook, if defined, is probably + * invoked also on this request. So, by exploiting + * this hook, we could 1) increment rq_in_driver here, + * and 2) decrement it in bfq_finish_request. Such a + * solution would let the value of the counter be + * always accurate, but it would entail using an extra + * interface function. This cost seems higher than the + * benefit, being the frequency of non-elevator-private * requests very low. */ goto start_rq; @@ -4963,7 +4963,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) } } -static void bfq_put_rq_priv_body(struct bfq_queue *bfqq) +static void bfq_finish_request_body(struct bfq_queue *bfqq) { bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request_body: allocated %d", bfqq->allocated); @@ -5019,7 +5019,7 @@ static void bfq_finish_request(struct request *rq) spin_lock_irqsave(&bfqd->lock, flags); bfq_completed_request(bfqq, bfqd); - bfq_put_rq_priv_body(bfqq); + bfq_finish_request_body(bfqq); spin_unlock_irqrestore(&bfqd->lock, flags); } else { @@ -5042,7 +5042,7 @@ static void bfq_finish_request(struct request *rq) bfqg_stats_update_io_remove(bfqq_group(bfqq), rq->cmd_flags); } - bfq_put_rq_priv_body(bfqq); + bfq_finish_request_body(bfqq); } rq->elv.priv[0] = NULL; From 4df19943c3a767df453abea3d2ac3433c3326ce0 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Thu, 16 Nov 2017 18:38:13 +0100 Subject: [PATCH 04/23] block, bfq-sq, bfq-mq: add missing rq_pos_tree update on rq removal If two processes do I/O close to each other, then BFQ merges the bfq_queues associated with these processes, to get a more sequential I/O, and thus a higher throughput. In this respect, to detect whether two processes are doing I/O close to each other, BFQ keeps a list of the head-of-line I/O requests of all active bfq_queues. The list is ordered by initial sectors, and implemented through a red-black tree (rq_pos_tree). Unfortunately, the update of the rq_pos_tree was incomplete, because the tree was not updated on the removal of the head-of-line I/O request of a bfq_queue, in case the queue did not remain empty. This commit adds the missing update. Signed-off-by: Paolo Valente Signed-off-by: Angelo Ruocco --- block/bfq-mq-iosched.c | 3 +++ block/bfq-sq-iosched.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c index 8f8d5eccb016..603191c9008f 100644 --- a/block/bfq-mq-iosched.c +++ b/block/bfq-mq-iosched.c @@ -1729,6 +1729,9 @@ static void bfq_remove_request(struct request_queue *q, rb_erase(&bfqq->pos_node, bfqq->pos_root); bfqq->pos_root = NULL; } + } else { + BUG_ON(!bfqq->next_rq); + bfq_pos_tree_add_move(bfqd, bfqq); } if (rq->cmd_flags & REQ_META) { diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c index 987dc255c82c..ea90ace79e49 100644 --- a/block/bfq-sq-iosched.c +++ b/block/bfq-sq-iosched.c @@ -1669,6 +1669,9 @@ static void bfq_remove_request(struct request *rq) rb_erase(&bfqq->pos_node, bfqq->pos_root); bfqq->pos_root = NULL; } + } else { + BUG_ON(!bfqq->next_rq); + bfq_pos_tree_add_move(bfqd, bfqq); } if (rq->cmd_flags & REQ_META) { From b844e345140aaea957d84a21d2aa67588b020cd5 Mon Sep 17 00:00:00 2001 From: Angelo Ruocco Date: Mon, 18 Dec 2017 08:28:08 +0100 Subject: [PATCH 05/23] block, bfq-sq, bfq-mq: check low_latency flag in bfq_bfqq_save_state() A just-created bfq_queue will certainly be deemed as interactive on the arrival of its first I/O request, if the low_latency flag is set. Yet, if the queue is merged with another queue on the arrival of its first I/O request, it will not have the chance to be flagged as interactive. Nevertheless, if the queue is then split soon enough, it has to be flagged as interactive after the split. To handle this early-merge scenario correctly, BFQ saves the state of the queue, on the merge, as if the latter had already been deemed interactive. So, if the queue is split soon, it will get weight-raised, because the previous state of the queue is resumed on the split. Unfortunately, in the act of saving the state of the newly-created queue, BFQ doesn't check whether the low_latency flag is set, and this causes early-merged queues to be then weight-raised, on queue splits, even if low_latency is off. This commit addresses this problem by adding the missing check. Signed-off-by: Angelo Ruocco Signed-off-by: Paolo Valente --- block/bfq-mq-iosched.c | 3 ++- block/bfq-sq-iosched.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c index 603191c9008f..ff9776c8836a 100644 --- a/block/bfq-mq-iosched.c +++ b/block/bfq-mq-iosched.c @@ -2231,7 +2231,8 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq); bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node); if (unlikely(bfq_bfqq_just_created(bfqq) && - !bfq_bfqq_in_large_burst(bfqq))) { + !bfq_bfqq_in_large_burst(bfqq) && + bfqq->bfqd->low_latency)) { /* * bfqq being merged ritgh after being created: bfqq * would have deserved interactive weight raising, but diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c index ea90ace79e49..3a2d764e760c 100644 --- a/block/bfq-sq-iosched.c +++ b/block/bfq-sq-iosched.c @@ -2109,7 +2109,8 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq); bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node); if (unlikely(bfq_bfqq_just_created(bfqq) && - !bfq_bfqq_in_large_burst(bfqq))) { + !bfq_bfqq_in_large_burst(bfqq) && + bfqq->bfqd->low_latency)) { /* * bfqq being merged ritgh after being created: bfqq * would have deserved interactive weight raising, but From 4cc6896fe1de2e0b4de151a6e70658f10b9ec2fa Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Fri, 27 Oct 2017 11:12:14 +0200 Subject: [PATCH 06/23] block, bfq-sq, bfq-mq: let a queue be merged only shortly after starting I/O In BFQ and CFQ, two processes are said to be cooperating if they do I/O in such a way that the union of their I/O requests yields a sequential I/O pattern. To get such a sequential I/O pattern out of the non-sequential pattern of each cooperating process, BFQ and CFQ merge the queues associated with these processes. In more detail, cooperating processes, and thus their associated queues, usually start, or restart, to do I/O shortly after each other. This is the case, e.g., for the I/O threads of KVM/QEMU and of the dump utility. Basing on this assumption, this commit allows a bfq_queue to be merged only during a short time interval (100ms) after it starts, or re-starts, to do I/O. This filtering provides two important benefits. First, it greatly reduces the probability that two non-cooperating processes have their queues merged by mistake, if they just happen to do I/O close to each other for a short time interval. These spurious merges cause loss of service guarantees. A low-weight bfq_queue may unjustly get more than its expected share of the throughput: if such a low-weight queue is merged with a high-weight queue, then the I/O for the low-weight queue is served as if the queue had a high weight. This may damage other high-weight queues unexpectedly. For instance, because of this issue, lxterminal occasionally took 7.5 seconds to start, instead of 6.5 seconds, when some sequential readers and writers did I/O in the background on a FUJITSU MHX2300BT HDD. The reason is that the bfq_queues associated with some of the readers or the writers were merged with the high-weight queues of some processes that had to do some urgent but little I/O. The readers then exploited the inherited high weight for all or most of their I/O, during the start-up of terminal. The filtering introduced by this commit eliminated any outlier caused by spurious queue merges in our start-up time tests. This filtering also provides a little boost of the throughput sustainable by BFQ: 3-4%, depending on the CPU. The reason is that, once a bfq_queue cannot be merged any longer, this commit makes BFQ stop updating the data needed to handle merging for the queue. Signed-off-by: Paolo Valente Signed-off-by: Angelo Ruocco --- block/bfq-mq-iosched.c | 64 +++++++++++++++++++++++++++++++++++++++++--------- block/bfq-mq.h | 1 + block/bfq-sched.c | 4 ++++ block/bfq-sq-iosched.c | 64 +++++++++++++++++++++++++++++++++++++++++--------- block/bfq.h | 2 ++ 5 files changed, 113 insertions(+), 22 deletions(-) diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c index ff9776c8836a..8b17b25a3c30 100644 --- a/block/bfq-mq-iosched.c +++ b/block/bfq-mq-iosched.c @@ -119,6 +119,20 @@ static const int bfq_async_charge_factor = 10; /* Default timeout values, in jiffies, approximating CFQ defaults. */ static const int bfq_timeout = (HZ / 8); +/* + * Time limit for merging (see comments in bfq_setup_cooperator). Set + * to the slowest value that, in our tests, proved to be effective in + * removing false positives, while not causing true positives to miss + * queue merging. + * + * As can be deduced from the low time limit below, queue merging, if + * successful, happens at the very beggining of the I/O of the involved + * cooperating processes, as a consequence of the arrival of the very + * first requests from each cooperator. After that, there is very + * little chance to find cooperators. + */ +static const unsigned long bfq_merge_time_limit = HZ/10; + static struct kmem_cache *bfq_pool; /* Below this threshold (in ns), we consider thinktime immediate. */ @@ -389,6 +403,13 @@ bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, return bfqq; } +static bool bfq_too_late_for_merging(struct bfq_queue *bfqq) +{ + return bfqq->service_from_backlogged > 0 && + time_is_before_jiffies(bfqq->first_IO_time + + bfq_merge_time_limit); +} + static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) { struct rb_node **p, *parent; @@ -399,6 +420,14 @@ static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfqq->pos_root = NULL; } + /* + * bfqq cannot be merged any longer (see comments in + * bfq_setup_cooperator): no point in adding bfqq into the + * position tree. + */ + if (bfq_too_late_for_merging(bfqq)) + return; + if (bfq_class_idle(bfqq)) return; if (!bfqq->next_rq) @@ -2081,6 +2110,13 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) { + if (bfq_too_late_for_merging(new_bfqq)) { + bfq_log_bfqq(bfqq->bfqd, bfqq, + "[%s] too late for bfq%d to be merged", + __func__, new_bfqq->pid); + return false; + } + if (bfq_class_idle(bfqq) || bfq_class_idle(new_bfqq) || (bfqq->ioprio_class != new_bfqq->ioprio_class)) return false; @@ -2149,6 +2185,23 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, { struct bfq_queue *in_service_bfqq, *new_bfqq; + /* + * Prevent bfqq from being merged if it has been created too + * long ago. The idea is that true cooperating processes, and + * thus their associated bfq_queues, are supposed to be + * created shortly after each other. This is the case, e.g., + * for KVM/QEMU and dump I/O threads. Basing on this + * assumption, the following filtering greatly reduces the + * probability that two non-cooperating processes, which just + * happen to do close I/O for some short time interval, have + * their queues merged by mistake. + */ + if (bfq_too_late_for_merging(bfqq)) { + bfq_log_bfqq(bfqd, bfqq, + "would have looked for coop, but too late"); + return NULL; + } + if (bfqq->new_bfqq) return bfqq->new_bfqq; @@ -3338,17 +3391,6 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, */ slow = bfq_bfqq_is_slow(bfqd, bfqq, compensate, reason, &delta); - /* - * Increase service_from_backlogged before next statement, - * because the possible next invocation of - * bfq_bfqq_charge_time would likely inflate - * entity->service. In contrast, service_from_backlogged must - * contain real service, to enable the soft real-time - * heuristic to correctly compute the bandwidth consumed by - * bfqq. - */ - bfqq->service_from_backlogged += entity->service; - /* * As above explained, charge slow (typically seeky) and * timed-out queues with the time and not the service diff --git a/block/bfq-mq.h b/block/bfq-mq.h index 1cb05bb853d2..a5947b203ef2 100644 --- a/block/bfq-mq.h +++ b/block/bfq-mq.h @@ -337,6 +337,7 @@ struct bfq_queue { unsigned long wr_start_at_switch_to_srt; unsigned long split_time; /* time of last split */ + unsigned long first_IO_time; /* time of first I/O for this queue */ }; /** diff --git a/block/bfq-sched.c b/block/bfq-sched.c index 616c0692335a..9d261dd428e4 100644 --- a/block/bfq-sched.c +++ b/block/bfq-sched.c @@ -939,6 +939,10 @@ static void bfq_bfqq_served(struct bfq_queue *bfqq, int served) struct bfq_entity *entity = &bfqq->entity; struct bfq_service_tree *st; + if (!bfqq->service_from_backlogged) + bfqq->first_IO_time = jiffies; + + bfqq->service_from_backlogged += served; for_each_entity(entity) { st = bfq_entity_service_tree(entity); diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c index 3a2d764e760c..cd00a41ca35d 100644 --- a/block/bfq-sq-iosched.c +++ b/block/bfq-sq-iosched.c @@ -113,6 +113,20 @@ static const int bfq_async_charge_factor = 10; /* Default timeout values, in jiffies, approximating CFQ defaults. */ static const int bfq_timeout = (HZ / 8); +/* + * Time limit for merging (see comments in bfq_setup_cooperator). Set + * to the slowest value that, in our tests, proved to be effective in + * removing false positives, while not causing true positives to miss + * queue merging. + * + * As can be deduced from the low time limit below, queue merging, if + * successful, happens at the very beggining of the I/O of the involved + * cooperating processes, as a consequence of the arrival of the very + * first requests from each cooperator. After that, there is very + * little chance to find cooperators. + */ +static const unsigned long bfq_merge_time_limit = HZ/10; + static struct kmem_cache *bfq_pool; /* Below this threshold (in ns), we consider thinktime immediate. */ @@ -351,6 +365,13 @@ bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, return bfqq; } +static bool bfq_too_late_for_merging(struct bfq_queue *bfqq) +{ + return bfqq->service_from_backlogged > 0 && + time_is_before_jiffies(bfqq->first_IO_time + + bfq_merge_time_limit); +} + static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) { struct rb_node **p, *parent; @@ -361,6 +382,14 @@ static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfqq->pos_root = NULL; } + /* + * bfqq cannot be merged any longer (see comments in + * bfq_setup_cooperator): no point in adding bfqq into the + * position tree. + */ + if (bfq_too_late_for_merging(bfqq)) + return; + if (bfq_class_idle(bfqq)) return; if (!bfqq->next_rq) @@ -1960,6 +1989,13 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) { + if (bfq_too_late_for_merging(new_bfqq)) { + bfq_log_bfqq(bfqq->bfqd, bfqq, + "[%s] too late for bfq%d to be merged", + __func__, new_bfqq->pid); + return false; + } + if (bfq_class_idle(bfqq) || bfq_class_idle(new_bfqq) || (bfqq->ioprio_class != new_bfqq->ioprio_class)) return false; @@ -2028,6 +2064,23 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, { struct bfq_queue *in_service_bfqq, *new_bfqq; + /* + * Prevent bfqq from being merged if it has been created too + * long ago. The idea is that true cooperating processes, and + * thus their associated bfq_queues, are supposed to be + * created shortly after each other. This is the case, e.g., + * for KVM/QEMU and dump I/O threads. Basing on this + * assumption, the following filtering greatly reduces the + * probability that two non-cooperating processes, which just + * happen to do close I/O for some short time interval, have + * their queues merged by mistake. + */ + if (bfq_too_late_for_merging(bfqq)) { + bfq_log_bfqq(bfqd, bfqq, + "would have looked for coop, but too late"); + return NULL; + } + if (bfqq->new_bfqq) return bfqq->new_bfqq; @@ -3226,17 +3279,6 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, */ slow = bfq_bfqq_is_slow(bfqd, bfqq, compensate, reason, &delta); - /* - * Increase service_from_backlogged before next statement, - * because the possible next invocation of - * bfq_bfqq_charge_time would likely inflate - * entity->service. In contrast, service_from_backlogged must - * contain real service, to enable the soft real-time - * heuristic to correctly compute the bandwidth consumed by - * bfqq. - */ - bfqq->service_from_backlogged += entity->service; - /* * As above explained, charge slow (typically seeky) and * timed-out queues with the time and not the service diff --git a/block/bfq.h b/block/bfq.h index 47cd4d5a8c32..59539adc00a5 100644 --- a/block/bfq.h +++ b/block/bfq.h @@ -329,6 +329,8 @@ struct bfq_queue { unsigned long wr_start_at_switch_to_srt; unsigned long split_time; /* time of last split */ + + unsigned long first_IO_time; /* time of first I/O for this queue */ }; /** From 157f39c43ab182280634cd4f6335d0187b3741a0 Mon Sep 17 00:00:00 2001 From: Angelo Ruocco Date: Mon, 11 Dec 2017 14:19:54 +0100 Subject: [PATCH 07/23] block, bfq-sq, bfq-mq: remove superfluous check in queue-merging setup When two or more processes do I/O in a way that the their requests are sequential in respect to one another, BFQ merges the bfq_queues associated with the processes. This way the overall I/O pattern becomes sequential, and thus there is a boost in througput. These cooperating processes usually start or restart to do I/O shortly after each other. So, in order to avoid merging non-cooperating processes, BFQ ensures that none of these queues has been in weight raising for too long. In this respect, from commit "block, bfq-sq, bfq-mq: let a queue be merged only shortly after being created", BFQ checks whether any queue (and not only weight-raised ones) is doing I/O continuously from too long to be merged. This new additional check makes the first one useless: a queue doing I/O from long enough, if being weight-raised, is also a queue in weight raising for too long to be merged. Accordingly, this commit removes the first check. Signed-off-by: Angelo Ruocco Signed-off-by: Paolo Valente --- block/bfq-mq-iosched.c | 53 ++++---------------------------------------------- block/bfq-sq-iosched.c | 53 ++++---------------------------------------------- 2 files changed, 8 insertions(+), 98 deletions(-) diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c index 8b17b25a3c30..f5db8613a70f 100644 --- a/block/bfq-mq-iosched.c +++ b/block/bfq-mq-iosched.c @@ -2140,20 +2140,6 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, return true; } -/* - * If this function returns true, then bfqq cannot be merged. The idea - * is that true cooperation happens very early after processes start - * to do I/O. Usually, late cooperations are just accidental false - * positives. In case bfqq is weight-raised, such false positives - * would evidently degrade latency guarantees for bfqq. - */ -static bool wr_from_too_long(struct bfq_queue *bfqq) -{ - return bfqq->wr_coeff > 1 && - time_is_before_jiffies(bfqq->last_wr_start_finish + - msecs_to_jiffies(100)); -} - /* * Attempt to schedule a merge of bfqq with the currently in-service * queue or with a close queue among the scheduled queues. Return @@ -2167,11 +2153,6 @@ static bool wr_from_too_long(struct bfq_queue *bfqq) * to maintain. Besides, in such a critical condition as an out of memory, * the benefits of queue merging may be little relevant, or even negligible. * - * Weight-raised queues can be merged only if their weight-raising - * period has just started. In fact cooperating processes are usually - * started together. Thus, with this filter we avoid false positives - * that would jeopardize low-latency guarantees. - * * WARNING: queue merging may impair fairness among non-weight raised * queues, for at least two reasons: 1) the original weight of a * merged queue may change during the merged state, 2) even being the @@ -2205,15 +2186,7 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (bfqq->new_bfqq) return bfqq->new_bfqq; - if (io_struct && wr_from_too_long(bfqq) && - likely(bfqq != &bfqd->oom_bfqq)) - bfq_log_bfqq(bfqd, bfqq, - "would have looked for coop, but bfq%d wr", - bfqq->pid); - - if (!io_struct || - wr_from_too_long(bfqq) || - unlikely(bfqq == &bfqd->oom_bfqq)) + if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq)) return NULL; /* If there is only one backlogged queue, don't search. */ @@ -2223,17 +2196,8 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, in_service_bfqq = bfqd->in_service_queue; if (in_service_bfqq && in_service_bfqq != bfqq && - wr_from_too_long(in_service_bfqq) - && likely(in_service_bfqq == &bfqd->oom_bfqq)) - bfq_log_bfqq(bfqd, bfqq, - "would have tried merge with in-service-queue, but wr"); - - if (!in_service_bfqq || in_service_bfqq == bfqq - || wr_from_too_long(in_service_bfqq) || - unlikely(in_service_bfqq == &bfqd->oom_bfqq)) - goto check_scheduled; - - if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) && + likely(in_service_bfqq != &bfqd->oom_bfqq) && + bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) && bfqq->entity.parent == in_service_bfqq->entity.parent && bfq_may_be_close_cooperator(bfqq, in_service_bfqq)) { new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq); @@ -2245,21 +2209,12 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, * queues. The only thing we need is that the bio/request is not * NULL, as we need it to establish whether a cooperator exists. */ -check_scheduled: new_bfqq = bfq_find_close_cooperator(bfqd, bfqq, bfq_io_struct_pos(io_struct, request)); BUG_ON(new_bfqq && bfqq->entity.parent != new_bfqq->entity.parent); - if (new_bfqq && wr_from_too_long(new_bfqq) && - likely(new_bfqq != &bfqd->oom_bfqq) && - bfq_may_be_close_cooperator(bfqq, new_bfqq)) - bfq_log_bfqq(bfqd, bfqq, - "would have merged with bfq%d, but wr", - new_bfqq->pid); - - if (new_bfqq && !wr_from_too_long(new_bfqq) && - likely(new_bfqq != &bfqd->oom_bfqq) && + if (new_bfqq && likely(new_bfqq != &bfqd->oom_bfqq) && bfq_may_be_close_cooperator(bfqq, new_bfqq)) return bfq_setup_merge(bfqq, new_bfqq); diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c index cd00a41ca35d..d8a358e5e284 100644 --- a/block/bfq-sq-iosched.c +++ b/block/bfq-sq-iosched.c @@ -2019,20 +2019,6 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, return true; } -/* - * If this function returns true, then bfqq cannot be merged. The idea - * is that true cooperation happens very early after processes start - * to do I/O. Usually, late cooperations are just accidental false - * positives. In case bfqq is weight-raised, such false positives - * would evidently degrade latency guarantees for bfqq. - */ -static bool wr_from_too_long(struct bfq_queue *bfqq) -{ - return bfqq->wr_coeff > 1 && - time_is_before_jiffies(bfqq->last_wr_start_finish + - msecs_to_jiffies(100)); -} - /* * Attempt to schedule a merge of bfqq with the currently in-service * queue or with a close queue among the scheduled queues. Return @@ -2046,11 +2032,6 @@ static bool wr_from_too_long(struct bfq_queue *bfqq) * to maintain. Besides, in such a critical condition as an out of memory, * the benefits of queue merging may be little relevant, or even negligible. * - * Weight-raised queues can be merged only if their weight-raising - * period has just started. In fact cooperating processes are usually - * started together. Thus, with this filter we avoid false positives - * that would jeopardize low-latency guarantees. - * * WARNING: queue merging may impair fairness among non-weight raised * queues, for at least two reasons: 1) the original weight of a * merged queue may change during the merged state, 2) even being the @@ -2084,15 +2065,7 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (bfqq->new_bfqq) return bfqq->new_bfqq; - if (io_struct && wr_from_too_long(bfqq) && - likely(bfqq != &bfqd->oom_bfqq)) - bfq_log_bfqq(bfqd, bfqq, - "would have looked for coop, but bfq%d wr", - bfqq->pid); - - if (!io_struct || - wr_from_too_long(bfqq) || - unlikely(bfqq == &bfqd->oom_bfqq)) + if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq)) return NULL; /* If there is only one backlogged queue, don't search. */ @@ -2102,17 +2075,8 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, in_service_bfqq = bfqd->in_service_queue; if (in_service_bfqq && in_service_bfqq != bfqq && - bfqd->in_service_bic && wr_from_too_long(in_service_bfqq) - && likely(in_service_bfqq == &bfqd->oom_bfqq)) - bfq_log_bfqq(bfqd, bfqq, - "would have tried merge with in-service-queue, but wr"); - - if (!in_service_bfqq || in_service_bfqq == bfqq || - !bfqd->in_service_bic || wr_from_too_long(in_service_bfqq) || - unlikely(in_service_bfqq == &bfqd->oom_bfqq)) - goto check_scheduled; - - if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) && + likely(in_service_bfqq != &bfqd->oom_bfqq) && + bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) && bfqq->entity.parent == in_service_bfqq->entity.parent && bfq_may_be_close_cooperator(bfqq, in_service_bfqq)) { new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq); @@ -2124,21 +2088,12 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, * queues. The only thing we need is that the bio/request is not * NULL, as we need it to establish whether a cooperator exists. */ -check_scheduled: new_bfqq = bfq_find_close_cooperator(bfqd, bfqq, bfq_io_struct_pos(io_struct, request)); BUG_ON(new_bfqq && bfqq->entity.parent != new_bfqq->entity.parent); - if (new_bfqq && wr_from_too_long(new_bfqq) && - likely(new_bfqq != &bfqd->oom_bfqq) && - bfq_may_be_close_cooperator(bfqq, new_bfqq)) - bfq_log_bfqq(bfqd, bfqq, - "would have merged with bfq%d, but wr", - new_bfqq->pid); - - if (new_bfqq && !wr_from_too_long(new_bfqq) && - likely(new_bfqq != &bfqd->oom_bfqq) && + if (new_bfqq && likely(new_bfqq != &bfqd->oom_bfqq) && bfq_may_be_close_cooperator(bfqq, new_bfqq)) return bfq_setup_merge(bfqq, new_bfqq); From b82eb91d87f172aba7eb5eb98e8d5e2a621adf51 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Thu, 30 Nov 2017 17:48:28 +0100 Subject: [PATCH 08/23] block, bfq-sq, bfq-mq: increase threshold to deem I/O as random If two processes do I/O close to each other, i.e., are cooperating processes in BFQ (and CFQ'S) nomenclature, then BFQ merges their associated bfq_queues, so as to get sequential I/O from the union of the I/O requests of the processes, and thus reach a higher throughput. A merged queue is then split if its I/O stops being sequential. In this respect, BFQ deems the I/O of a bfq_queue as (mostly) sequential only if less than 4 I/O requests are random, out of the last 32 requests inserted into the queue. Unfortunately, extensive testing (with the interleaved_io benchmark of the S suite [1], and with real applications spawning cooperating processes) has clearly shown that, with such a low threshold, only a rather low I/O throughput may be reached when several cooperating processes do I/O. In particular, the outcome of each test run was bimodal: if queue merging occurred and was stable during the test, then the throughput was close to the peak rate of the storage device, otherwise the throughput was arbitrarily low (usually around 1/10 of the peak rate with a rotational device). The probability to get the unlucky outcomes grew with the number of cooperating processes: it was already significant with 5 processes, and close to one with 7 or more processes. The cause of the low throughput in the unlucky runs was that the merged queues containing the I/O of these cooperating processes were soon split, because they contained more random I/O requests than those tolerated by the 4/32 threshold, but - that I/O would have however allowed the storage device to reach peak throughput or almost peak throughput; - in contrast, the I/O of these processes, if served individually (from separate queues) yielded a rather low throughput. So we repeated our tests with increasing values of the threshold, until we found the minimum value (19) for which we obtained maximum throughput, reliably, with at least up to 9 cooperating processes. Then we checked that the use of that higher threshold value did not cause any regression for any other benchmark in the suite [1]. This commit raises the threshold to such a higher value. [1] https://github.com/Algodev-github/S Signed-off-by: Angelo Ruocco Signed-off-by: Paolo Valente --- block/bfq-mq-iosched.c | 2 +- block/bfq-sq-iosched.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c index f5db8613a70f..cb5f49ddecb6 100644 --- a/block/bfq-mq-iosched.c +++ b/block/bfq-mq-iosched.c @@ -145,7 +145,7 @@ static struct kmem_cache *bfq_pool; #define BFQQ_SEEK_THR (sector_t)(8 * 100) #define BFQQ_SECT_THR_NONROT (sector_t)(2 * 32) #define BFQQ_CLOSE_THR (sector_t)(8 * 1024) -#define BFQQ_SEEKY(bfqq) (hweight32(bfqq->seek_history) > 32/8) +#define BFQQ_SEEKY(bfqq) (hweight32(bfqq->seek_history) > 19) /* Min number of samples required to perform peak-rate update */ #define BFQ_RATE_MIN_SAMPLES 32 diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c index d8a358e5e284..e1c6dc651be1 100644 --- a/block/bfq-sq-iosched.c +++ b/block/bfq-sq-iosched.c @@ -139,7 +139,7 @@ static struct kmem_cache *bfq_pool; #define BFQQ_SEEK_THR (sector_t)(8 * 100) #define BFQQ_SECT_THR_NONROT (sector_t)(2 * 32) #define BFQQ_CLOSE_THR (sector_t)(8 * 1024) -#define BFQQ_SEEKY(bfqq) (hweight32(bfqq->seek_history) > 32/8) +#define BFQQ_SEEKY(bfqq) (hweight32(bfqq->seek_history) > 19) /* Min number of samples required to perform peak-rate update */ #define BFQ_RATE_MIN_SAMPLES 32 From b739dda4e4b3a1cbbc905f86f9fbb0860b068ce7 Mon Sep 17 00:00:00 2001 From: Chiara Bruschi Date: Mon, 11 Dec 2017 18:55:26 +0100 Subject: [PATCH 09/23] block, bfq-sq, bfq-mq: specify usage condition of delta_us in bfq_log_bfqq call Inside the function bfq_completed_request the value of a variable called delta_us is computed as current request completion time. delta_us is used inside a call to the function bfq_log_bfqq as divisor in a division operation to compute a rate value, but no check makes sure that delta_us has non-zero value. A divisor with value 0 leads to a division error that could result in a kernel oops (therefore unstable/unreliable system state) and consequently cause kernel panic if resources are unavailable after the system fault. This commit fixes this call to bfq_log_bfqq specifying the condition that allows delta_us to be safely used as divisor. Signed-off-by: Paolo Valente Signed-off-by: Chiara Bruschi --- block/bfq-mq-iosched.c | 5 ++++- block/bfq-sq-iosched.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c index cb5f49ddecb6..6ce2c0789046 100644 --- a/block/bfq-mq-iosched.c +++ b/block/bfq-mq-iosched.c @@ -4904,9 +4904,12 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) bfq_log_bfqq(bfqd, bfqq, "rq_completed: delta %uus/%luus max_size %u rate %llu/%llu", delta_us, BFQ_MIN_TT/NSEC_PER_USEC, bfqd->last_rq_max_size, + delta_us > 0 ? (USEC_PER_SEC* (u64)((bfqd->last_rq_max_size<>BFQ_RATE_SHIFT, + >>BFQ_RATE_SHIFT : + (USEC_PER_SEC* + (u64)(bfqd->last_rq_max_size<>BFQ_RATE_SHIFT, (USEC_PER_SEC*(u64)(1UL<<(BFQ_RATE_SHIFT-10)))>>BFQ_RATE_SHIFT); /* diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c index e1c6dc651be1..eff4c4edf5a0 100644 --- a/block/bfq-sq-iosched.c +++ b/block/bfq-sq-iosched.c @@ -4565,9 +4565,12 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq) bfq_log(bfqd, "rq_completed: delta %uus/%luus max_size %u rate %llu/%llu", delta_us, BFQ_MIN_TT/NSEC_PER_USEC, bfqd->last_rq_max_size, + delta_us > 0 ? (USEC_PER_SEC* (u64)((bfqd->last_rq_max_size<>BFQ_RATE_SHIFT, + >>BFQ_RATE_SHIFT : + (USEC_PER_SEC* + (u64)(bfqd->last_rq_max_size<>BFQ_RATE_SHIFT, (USEC_PER_SEC*(u64)(1UL<<(BFQ_RATE_SHIFT-10)))>>BFQ_RATE_SHIFT); /* From ae4310c13eca762644734d53074d8456c85e2dec Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Tue, 19 Dec 2017 12:07:12 +0100 Subject: [PATCH 10/23] block, bfq-mq: limit tags for writes and async I/O Asynchronous I/O can easily starve synchronous I/O (both sync reads and sync writes), by consuming all request tags. Similarly, storms of synchronous writes, such as those that sync(2) may trigger, can starve synchronous reads. In their turn, these two problems may also cause BFQ to loose control on latency for interactive and soft real-time applications. For example, on a PLEXTOR PX-256M5S SSD, LibreOffice Writer takes 0.6 seconds to start if the device is idle, but it takes more than 45 seconds (!) if there are sequential writes in the background. This commit addresses this issue by limiting the maximum percentage of tags that asynchronous I/O requests and synchronous write requests can consume. In particular, this commit grants a higher threshold to synchronous writes, to prevent the latter from being starved by asynchronous I/O. According to the above test, LibreOffice Writer now starts in about 1.2 seconds on average, regardless of the background workload, and apart from some rare outlier. To check this improvement, run, e.g., sudo ./comm_startup_lat.sh bfq-mq 5 5 seq 10 "lowriter --terminate_after_init" for the comm_startup_lat benchmark in the S suite [1]. [1] https://github.com/Algodev-github/S Signed-off-by: Paolo Valente --- block/bfq-mq-iosched.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++ block/bfq-mq.h | 12 ++++++++ 2 files changed, 89 insertions(+) diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c index 6ce2c0789046..f384f5566672 100644 --- a/block/bfq-mq-iosched.c +++ b/block/bfq-mq-iosched.c @@ -362,6 +362,82 @@ static struct request *bfq_choose_req(struct bfq_data *bfqd, } } +/* + * See the comments on bfq_limit_depth for the purpose of + * the depths set in the function. + */ +static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt) +{ + bfqd->sb_shift = bt->sb.shift; + + /* + * In-word depths if no bfq_queue is being weight-raised: + * leaving 25% of tags only for sync reads. + * + * In next formulas, right-shift the value + * (1U<sb_shift), instead of computing directly + * (1U<<(bfqd->sb_shift - something)), to be robust against + * any possible value of bfqd->sb_shift, without having to + * limit 'something'. + */ + /* no more than 50% of tags for async I/O */ + bfqd->word_depths[0][0] = max((1U<sb_shift)>>1, 1U); + /* + * no more than 75% of tags for sync writes (25% extra tags + * w.r.t. async I/O, to prevent async I/O from starving sync + * writes) + */ + bfqd->word_depths[0][1] = max(((1U<sb_shift) * 3)>>2, 1U); + + /* + * In-word depths in case some bfq_queue is being weight- + * raised: leaving ~63% of tags for sync reads. This is the + * highest percentage for which, in our tests, application + * start-up times didn't suffer from any regression due to tag + * shortage. + */ + /* no more than ~18% of tags for async I/O */ + bfqd->word_depths[1][0] = max(((1U<sb_shift) * 3)>>4, 1U); + /* no more than ~37% of tags for sync writes (~20% extra tags) */ + bfqd->word_depths[1][1] = max(((1U<sb_shift) * 6)>>4, 1U); +} + +/* + * Async I/O can easily starve sync I/O (both sync reads and sync + * writes), by consuming all tags. Similarly, storms of sync writes, + * such as those that sync(2) may trigger, can starve sync reads. + * Limit depths of async I/O and sync writes so as to counter both + * problems. + */ +static void bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) +{ + struct blk_mq_tags *tags = blk_mq_tags_from_data(data); + struct bfq_data *bfqd = data->q->elevator->elevator_data; + struct sbitmap_queue *bt; + + if (op_is_sync(op) && !op_is_write(op)) + return; + + if (data->flags & BLK_MQ_REQ_RESERVED) { + if (unlikely(!tags->nr_reserved_tags)) { + WARN_ON_ONCE(1); + return; + } + bt = &tags->breserved_tags; + } else + bt = &tags->bitmap_tags; + + if (unlikely(bfqd->sb_shift != bt->sb.shift)) + bfq_update_depths(bfqd, bt); + + data->shallow_depth = + bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(op)]; + + bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u", + __func__, bfqd->wr_busy_queues, op_is_sync(op), + data->shallow_depth); +} + static struct bfq_queue * bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, sector_t sector, struct rb_node **ret_parent, @@ -5812,6 +5888,7 @@ static struct elv_fs_entry bfq_attrs[] = { static struct elevator_type iosched_bfq_mq = { .ops.mq = { + .limit_depth = bfq_limit_depth, .prepare_request = bfq_prepare_request, .finish_request = bfq_finish_request, .exit_icq = bfq_exit_icq, diff --git a/block/bfq-mq.h b/block/bfq-mq.h index a5947b203ef2..458099ee0308 100644 --- a/block/bfq-mq.h +++ b/block/bfq-mq.h @@ -619,6 +619,18 @@ struct bfq_data { struct bfq_queue *bio_bfqq; /* Extra flag used only for TESTING */ bool bio_bfqq_set; + + /* + * Cached sbitmap shift, used to compute depth limits in + * bfq_update_depths. + */ + unsigned int sb_shift; + + /* + * Depth limits used in bfq_limit_depth (see comments on the + * function) + */ + unsigned int word_depths[2][2]; }; enum bfqq_state_flags { From 402e5f6b59662d290ab2b3c10b0016207a63ad21 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Thu, 21 Dec 2017 15:51:39 +0100 Subject: [PATCH 11/23] bfq-sq, bfq-mq: limit sectors served with interactive weight raising To maximise responsiveness, BFQ raises the weight, and performs device idling, for bfq_queues associated with processes deemed as interactive. In particular, weight raising has a maximum duration, equal to the time needed to start a large application. If a weight-raised process goes on doing I/O beyond this maximum duration, it loses weight-raising. This mechanism is evidently vulnerable to the following false positives: I/O-bound applications that will go on doing I/O for much longer than the duration of weight-raising. These applications have basically no benefit from being weight-raised at the beginning of their I/O. On the opposite end, while being weight-raised, these applications a) unjustly steal throughput to applications that may truly need low latency; b) make BFQ uselessly perform device idling; device idling results in loss of device throughput with most flash-based storage, and may increase latencies when used purposelessly. This commit adds a countermeasure to reduce both the above problems. To introduce this countermeasure, we provide the following extra piece of information (full details in the comments added by this commit). During the start-up of the large application used as a reference to set the duration of weight-raising, involved processes transfer at most ~110K sectors each. Accordingly, a process initially deemed as interactive has no right to be weight-raised any longer, once transferred 110K sectors or more. Basing on this consideration, this commit early-ends weight-raising for a bfq_queue if the latter happens to have received an amount of service at least equal to 110K sectors (actually, a little bit more, to keep a safety margin). I/O-bound applications that reach a high throughput, such as file copy, get to this threshold much before the allowed weight-raising period finishes. Thus this early ending of weight-raising reduces the amount of time during which these applications cause the problems described above. Signed-off-by: Paolo Valente --- block/bfq-mq-iosched.c | 84 ++++++++++++++++++++++++++++++++++++++++++++------ block/bfq-mq.h | 5 +++ block/bfq-sched.c | 3 ++ block/bfq-sq-iosched.c | 84 ++++++++++++++++++++++++++++++++++++++++++++------ block/bfq.h | 5 +++ 5 files changed, 163 insertions(+), 18 deletions(-) diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c index f384f5566672..63fdd16dec3c 100644 --- a/block/bfq-mq-iosched.c +++ b/block/bfq-mq-iosched.c @@ -162,15 +162,17 @@ static struct kmem_cache *bfq_pool; * interactive applications automatically, using the following formula: * duration = (R / r) * T, where r is the peak rate of the device, and * R and T are two reference parameters. - * In particular, R is the peak rate of the reference device (see below), - * and T is a reference time: given the systems that are likely to be - * installed on the reference device according to its speed class, T is - * about the maximum time needed, under BFQ and while reading two files in - * parallel, to load typical large applications on these systems. - * In practice, the slower/faster the device at hand is, the more/less it - * takes to load applications with respect to the reference device. - * Accordingly, the longer/shorter BFQ grants weight raising to interactive - * applications. + * In particular, R is the peak rate of the reference device (see + * below), and T is a reference time: given the systems that are + * likely to be installed on the reference device according to its + * speed class, T is about the maximum time needed, under BFQ and + * while reading two files in parallel, to load typical large + * applications on these systems (see the comments on + * max_service_from_wr below, for more details on how T is obtained). + * In practice, the slower/faster the device at hand is, the more/less + * it takes to load applications with respect to the reference device. + * Accordingly, the longer/shorter BFQ grants weight raising to + * interactive applications. * * BFQ uses four different reference pairs (R, T), depending on: * . whether the device is rotational or non-rotational; @@ -207,6 +209,60 @@ static int T_slow[2]; static int T_fast[2]; static int device_speed_thresh[2]; +/* + * BFQ uses the above-detailed, time-based weight-raising mechanism to + * privilege interactive tasks. This mechanism is vulnerable to the + * following false positives: I/O-bound applications that will go on + * doing I/O for much longer than the duration of weight + * raising. These applications have basically no benefit from being + * weight-raised at the beginning of their I/O. On the opposite end, + * while being weight-raised, these applications + * a) unjustly steal throughput to applications that may actually need + * low latency; + * b) make BFQ uselessly perform device idling; device idling results + * in loss of device throughput with most flash-based storage, and may + * increase latencies when used purposelessly. + * + * BFQ tries to reduce these problems, by adopting the following + * countermeasure. To introduce this countermeasure, we need first to + * finish explaining how the duration of weight-raising for + * interactive tasks is computed. + * + * For a bfq_queue deemed as interactive, the duration of weight + * raising is dynamically adjusted, as a function of the estimated + * peak rate of the device, so as to be equal to the time needed to + * execute the 'largest' interactive task we benchmarked so far. By + * largest task, we mean the task for which each involved process has + * to do more I/O than for any of the other tasks we benchmarked. This + * reference interactive task is the start-up of LibreOffice Writer, + * and in this task each process/bfq_queue needs to have at most ~110K + * sectors transferred. + * + * This last piece of information enables BFQ to reduce the actual + * duration of weight-raising for at least one class of I/O-bound + * applications: those doing sequential or quasi-sequential I/O. An + * example is file copy. In fact, once started, the main I/O-bound + * processes of these applications usually consume the above 110K + * sectors in much less time than the processes of an application that + * is starting, because these I/O-bound processes will greedily devote + * almost all their CPU cycles only to their target, + * throughput-friendly I/O operations. This is even more true if BFQ + * happens to be underestimating the device peak rate, and thus + * overestimating the duration of weight raising. But, according to + * our measurements, once transferred 110K sectors, these processes + * have no right to be weight-raised any longer. + * + * Basing on the last consideration, BFQ ends weight-raising for a + * bfq_queue if the latter happens to have received an amount of + * service at least equal to the following constant. The constant is + * set to slightly more than 110K, to have a minimum safety margin. + * + * This early ending of weight-raising reduces the amount of time + * during which interactive false positives cause the two problems + * described at the beginning of these comments. + */ +static const unsigned long max_service_from_wr = 120000; + #define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \ { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 }) @@ -1361,6 +1417,7 @@ static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd, if (old_wr_coeff == 1 && wr_or_deserves_wr) { /* start a weight-raising period */ if (interactive) { + bfqq->service_from_wr = 0; bfqq->wr_coeff = bfqd->bfq_wr_coeff; bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); } else { @@ -3980,6 +4037,15 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) "back to interactive wr"); } } + if (bfqq->wr_coeff > 1 && + bfqq->wr_cur_max_time != bfqd->bfq_wr_rt_max_time && + bfqq->service_from_wr > max_service_from_wr) { + /* see comments on max_service_from_wr */ + bfq_bfqq_end_wr(bfqq); + bfq_log_bfqq(bfqd, bfqq, + "[%s] too much service", + __func__); + } } /* * To improve latency (for this or other queues), immediately diff --git a/block/bfq-mq.h b/block/bfq-mq.h index 458099ee0308..9a5ce1168ff5 100644 --- a/block/bfq-mq.h +++ b/block/bfq-mq.h @@ -331,6 +331,11 @@ struct bfq_queue { * last transition from idle to backlogged. */ unsigned long service_from_backlogged; + /* + * Cumulative service received from the @bfq_queue since its + * last transition to weight-raised state. + */ + unsigned long service_from_wr; /* * Value of wr start time when switching to soft rt */ diff --git a/block/bfq-sched.c b/block/bfq-sched.c index 9d261dd428e4..4e6c5232e2fb 100644 --- a/block/bfq-sched.c +++ b/block/bfq-sched.c @@ -942,6 +942,9 @@ static void bfq_bfqq_served(struct bfq_queue *bfqq, int served) if (!bfqq->service_from_backlogged) bfqq->first_IO_time = jiffies; + if (bfqq->wr_coeff > 1) + bfqq->service_from_wr += served; + bfqq->service_from_backlogged += served; for_each_entity(entity) { st = bfq_entity_service_tree(entity); diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c index eff4c4edf5a0..486493aafaf8 100644 --- a/block/bfq-sq-iosched.c +++ b/block/bfq-sq-iosched.c @@ -156,15 +156,17 @@ static struct kmem_cache *bfq_pool; * interactive applications automatically, using the following formula: * duration = (R / r) * T, where r is the peak rate of the device, and * R and T are two reference parameters. - * In particular, R is the peak rate of the reference device (see below), - * and T is a reference time: given the systems that are likely to be - * installed on the reference device according to its speed class, T is - * about the maximum time needed, under BFQ and while reading two files in - * parallel, to load typical large applications on these systems. - * In practice, the slower/faster the device at hand is, the more/less it - * takes to load applications with respect to the reference device. - * Accordingly, the longer/shorter BFQ grants weight raising to interactive - * applications. + * In particular, R is the peak rate of the reference device (see + * below), and T is a reference time: given the systems that are + * likely to be installed on the reference device according to its + * speed class, T is about the maximum time needed, under BFQ and + * while reading two files in parallel, to load typical large + * applications on these systems (see the comments on + * max_service_from_wr below, for more details on how T is obtained). + * In practice, the slower/faster the device at hand is, the more/less + * it takes to load applications with respect to the reference device. + * Accordingly, the longer/shorter BFQ grants weight raising to + * interactive applications. * * BFQ uses four different reference pairs (R, T), depending on: * . whether the device is rotational or non-rotational; @@ -201,6 +203,60 @@ static int T_slow[2]; static int T_fast[2]; static int device_speed_thresh[2]; +/* + * BFQ uses the above-detailed, time-based weight-raising mechanism to + * privilege interactive tasks. This mechanism is vulnerable to the + * following false positives: I/O-bound applications that will go on + * doing I/O for much longer than the duration of weight + * raising. These applications have basically no benefit from being + * weight-raised at the beginning of their I/O. On the opposite end, + * while being weight-raised, these applications + * a) unjustly steal throughput to applications that may actually need + * low latency; + * b) make BFQ uselessly perform device idling; device idling results + * in loss of device throughput with most flash-based storage, and may + * increase latencies when used purposelessly. + * + * BFQ tries to reduce these problems, by adopting the following + * countermeasure. To introduce this countermeasure, we need first to + * finish explaining how the duration of weight-raising for + * interactive tasks is computed. + * + * For a bfq_queue deemed as interactive, the duration of weight + * raising is dynamically adjusted, as a function of the estimated + * peak rate of the device, so as to be equal to the time needed to + * execute the 'largest' interactive task we benchmarked so far. By + * largest task, we mean the task for which each involved process has + * to do more I/O than for any of the other tasks we benchmarked. This + * reference interactive task is the start-up of LibreOffice Writer, + * and in this task each process/bfq_queue needs to have at most ~110K + * sectors transfered. + * + * This last piece of information enables BFQ to reduce the actual + * duration of weight-raising for at least one class of I/O-bound + * applications: those doing sequential or quasi-sequential I/O. An + * example is file copy. In fact, once started, the main I/O-bound + * processes of these applications usually consume the above 110K + * sectors in much less time than the processes of an application that + * is starting, because these I/O-bound processes will greedily devote + * almost all their CPU cycles only to their target, + * throughput-friendly I/O operations. This is even more true if BFQ + * happens to be underestimating the device peak rate, and thus + * overestimating the duration of weight raising. But, according to + * our measurements, once transferred 110K sectors, these processes + * have no right to be weight-raised any longer. + * + * Basing on the last consideration, BFQ ends weight-raising for a + * bfq_queue if the latter happens to have received an amount of + * service at least equal to the following constant. The constant is + * set to slightly more than 110K, to have a minimum safety margin. + * + * This early ending of weight-raising reduces the amount of time + * during which interactive false positives cause the two problems + * described at the beginning of these comments. + */ +static const unsigned long max_service_from_wr = 120000; + #define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \ { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 }) @@ -1246,6 +1302,7 @@ static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd, if (old_wr_coeff == 1 && wr_or_deserves_wr) { /* start a weight-raising period */ if (interactive) { + bfqq->service_from_wr = 0; bfqq->wr_coeff = bfqd->bfq_wr_coeff; bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); } else { @@ -3794,6 +3851,15 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) "back to interactive wr"); } } + if (bfqq->wr_coeff > 1 && + bfqq->wr_cur_max_time != bfqd->bfq_wr_rt_max_time && + bfqq->service_from_wr > max_service_from_wr) { + /* see comments on max_service_from_wr */ + bfq_bfqq_end_wr(bfqq); + bfq_log_bfqq(bfqd, bfqq, + "[%s] too much service", + __func__); + } } /* * To improve latency (for this or other queues), immediately diff --git a/block/bfq.h b/block/bfq.h index 59539adc00a5..0cd7a3f251a7 100644 --- a/block/bfq.h +++ b/block/bfq.h @@ -323,6 +323,11 @@ struct bfq_queue { * last transition from idle to backlogged. */ unsigned long service_from_backlogged; + /* + * Cumulative service received from the @bfq_queue since its + * last transition to weight-raised state. + */ + unsigned long service_from_wr; /* * Value of wr start time when switching to soft rt */ From 59efebb94b2f9bac653faf62dadb45b83bd27fa7 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Thu, 4 Jan 2018 16:29:58 +0100 Subject: [PATCH 12/23] bfq-sq, bfq-mq: put async queues for root bfq groups too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For each pair [device for which bfq is selected as I/O scheduler, group in blkio/io], bfq maintains a corresponding bfq group. Each such bfq group contains a set of async queues, with each async queue created on demand, i.e., when some I/O request arrives for it. On creation, an async queue gets an extra reference, to make sure that the queue is not freed as long as its bfq group exists. Accordingly, to allow the queue to be freed after the group exited, this extra reference must released on group exit. The above holds also for a bfq root group, i.e., for the bfq group corresponding to the root blkio/io root for a given device. Yet, by mistake, the references to the existing async queues of a root group are not released when the latter exits. This causes a memory leak when the instance of bfq for a given device exits. In a similar vein, bfqg_stats_xfer_dead is not executed for a root group. This commit fixes bfq_pd_offline so that the latter executes the above missing operations for a root group too. Reported-by: Holger Hoffstätte Reported-by: Guoqing Jiang Signed-off-by: Davide Ferrari Signed-off-by: Paolo Valente --- block/bfq-cgroup-included.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/block/bfq-cgroup-included.c b/block/bfq-cgroup-included.c index 562b0ce581a7..45fefb2e2d57 100644 --- a/block/bfq-cgroup-included.c +++ b/block/bfq-cgroup-included.c @@ -885,13 +885,13 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) entity = bfqg->my_entity; - if (!entity) /* root group */ - return; - #ifdef BFQ_MQ spin_lock_irqsave(&bfqd->lock, flags); #endif + if (!entity) /* root group */ + goto put_async_queues; + /* * Empty all service_trees belonging to this group before * deactivating the group itself. @@ -926,6 +926,8 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) BUG_ON(bfqg->sched_data.in_service_entity); __bfq_deactivate_entity(entity, false); + +put_async_queues: bfq_put_async_queues(bfqd, bfqg); #ifdef BFQ_MQ From 2dfbaaaf95054e2da3ededc0deb1ba5a4f589e53 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Mon, 8 Jan 2018 19:38:45 +0100 Subject: [PATCH 13/23] bfq-sq, bfq-mq: release oom-queue ref to root group on exit On scheduler init, a reference to the root group, and a reference to its corresponding blkg are taken for the oom queue. Yet these references are not released on scheduler exit, which prevents these objects from be freed. This commit adds the missing reference releases. Reported-by: Davide Ferrari Signed-off-by: Paolo Valente --- block/bfq-mq-iosched.c | 3 +++ block/bfq-sq-iosched.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c index 63fdd16dec3c..b82c52fabf91 100644 --- a/block/bfq-mq-iosched.c +++ b/block/bfq-mq-iosched.c @@ -5507,6 +5507,9 @@ static void bfq_exit_queue(struct elevator_queue *e) BUG_ON(hrtimer_active(&bfqd->idle_slice_timer)); + /* release oom-queue reference to root group */ + bfqg_and_blkg_put(bfqd->root_group); + #ifdef BFQ_GROUP_IOSCHED_ENABLED blkcg_deactivate_policy(bfqd->queue, &blkcg_policy_bfq); #else diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c index 486493aafaf8..851af055664d 100644 --- a/block/bfq-sq-iosched.c +++ b/block/bfq-sq-iosched.c @@ -5052,6 +5052,9 @@ static void bfq_exit_queue(struct elevator_queue *e) BUG_ON(hrtimer_active(&bfqd->idle_slice_timer)); + /* release oom-queue reference to root group */ + bfqg_put(bfqd->root_group); + #ifdef BFQ_GROUP_IOSCHED_ENABLED blkcg_deactivate_policy(q, &blkcg_policy_bfq); #else From 13efe00c8292d78d223e1090a7f36426e360eb38 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Mon, 8 Jan 2018 19:40:38 +0100 Subject: [PATCH 14/23] block, bfq-sq, bfq-mq: trace get and put of bfq groups Signed-off-by: Paolo Valente --- block/bfq-cgroup-included.c | 15 +++++++++++++++ block/bfq-mq-iosched.c | 3 ++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/block/bfq-cgroup-included.c b/block/bfq-cgroup-included.c index 45fefb2e2d57..f94743fb2e7d 100644 --- a/block/bfq-cgroup-included.c +++ b/block/bfq-cgroup-included.c @@ -267,6 +267,8 @@ static struct bfq_group *bfqq_group(struct bfq_queue *bfqq) static void bfqg_get(struct bfq_group *bfqg) { + trace_printk("bfqg %p\n", bfqg); + #ifdef BFQ_MQ bfqg->ref++; #else @@ -280,6 +282,9 @@ static void bfqg_put(struct bfq_group *bfqg) bfqg->ref--; BUG_ON(bfqg->ref < 0); + trace_printk("putting bfqg %p %s\n", bfqg, + bfqg->ref == 0 ? "and freeing it" : ""); + if (bfqg->ref == 0) kfree(bfqg); #else @@ -293,6 +298,7 @@ static void bfqg_and_blkg_get(struct bfq_group *bfqg) /* see comments in bfq_bic_update_cgroup for why refcounting bfqg */ bfqg_get(bfqg); + trace_printk("getting blkg for bfqg %p\n", bfqg); blkg_get(bfqg_to_blkg(bfqg)); } @@ -300,6 +306,7 @@ static void bfqg_and_blkg_put(struct bfq_group *bfqg) { bfqg_put(bfqg); + trace_printk("putting blkg for bfqg %p\n", bfqg); blkg_put(bfqg_to_blkg(bfqg)); } #endif @@ -382,6 +389,8 @@ static void bfq_init_entity(struct bfq_entity *entity, * Make sure that bfqg and its associated blkg do not * disappear before entity. */ + bfq_log_bfqq(bfqq->bfqd, bfqq, "[%s] getting bfqg %p and blkg\n", __func__, bfqg); + bfqg_and_blkg_get(bfqg); #else bfqg_get(bfqg); @@ -475,6 +484,7 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node) kfree(bfqg); return NULL; } + trace_printk("bfqg %p\n", bfqg); #ifdef BFQ_MQ /* see comments in bfq_bic_update_cgroup for why refcounting */ @@ -513,6 +523,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd) static void bfq_pd_free(struct blkg_policy_data *pd) { struct bfq_group *bfqg = pd_to_bfqg(pd); + trace_printk("bfqg %p\n", bfqg); bfqg_stats_exit(&bfqg->stats); #ifdef BFQ_MQ @@ -650,6 +661,8 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); } #ifdef BFQ_MQ + bfq_log_bfqq(bfqq->bfqd, bfqq, "[%s] putting blkg and bfqg %p\n", __func__, bfqg); + bfqg_and_blkg_put(bfqq_group(bfqq)); #else bfqg_put(bfqq_group(bfqq)); @@ -658,6 +671,8 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, entity->parent = bfqg->my_entity; entity->sched_data = &bfqg->sched_data; #ifdef BFQ_MQ + bfq_log_bfqq(bfqq->bfqd, bfqq, "[%s] getting blkg and bfqg %p\n", __func__, bfqg); + /* pin down bfqg and its associated blkg */ bfqg_and_blkg_get(bfqg); #else diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c index b82c52fabf91..d5b7a6b985d7 100644 --- a/block/bfq-mq-iosched.c +++ b/block/bfq-mq-iosched.c @@ -4385,10 +4385,11 @@ static void bfq_put_queue(struct bfq_queue *bfqq) if (bfqq->bfqd) bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p freed", bfqq); - kmem_cache_free(bfq_pool, bfqq); #ifdef BFQ_GROUP_IOSCHED_ENABLED + bfq_log_bfqq(bfqq->bfqd, bfqq, "[%s] putting blkg and bfqg %p\n", __func__, bfqg); bfqg_and_blkg_put(bfqg); #endif + kmem_cache_free(bfq_pool, bfqq); } static void bfq_put_cooperator(struct bfq_queue *bfqq) From 816b77fba966171974eb5ee25d81bc4e19eaf1b4 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Wed, 10 Jan 2018 09:08:22 +0100 Subject: [PATCH 15/23] bfq-sq, bfq-mq: compile group put for oom queue only if BFQ_GROUP_IOSCHED is set Commit ("bfq-sq, bfq-mq: release oom-queue ref to root group on exit") added a missing put of the root bfq group for the oom queue. That put has to be, and can be, performed only if CONFIG_BFQ_GROUP_IOSCHED is defined: the function doing the put is even not defined at all if CONFIG_BFQ_GROUP_IOSCHED is not defined. But that commit makes that put be invoked regardless of whether CONFIG_BFQ_GROUP_IOSCHED is defined. This commit fixes this mistake, by making that invocation be compiled only if CONFIG_BFQ_GROUP_IOSCHED is actually defined. Fixes ("block, bfq: release oom-queue ref to root group on exit") Reported-by: Jan Alexander Steffens Signed-off-by: Paolo Valente --- block/bfq-mq-iosched.c | 2 +- block/bfq-sq-iosched.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c index d5b7a6b985d7..2581fe0f6f2f 100644 --- a/block/bfq-mq-iosched.c +++ b/block/bfq-mq-iosched.c @@ -5508,10 +5508,10 @@ static void bfq_exit_queue(struct elevator_queue *e) BUG_ON(hrtimer_active(&bfqd->idle_slice_timer)); +#ifdef BFQ_GROUP_IOSCHED_ENABLED /* release oom-queue reference to root group */ bfqg_and_blkg_put(bfqd->root_group); -#ifdef BFQ_GROUP_IOSCHED_ENABLED blkcg_deactivate_policy(bfqd->queue, &blkcg_policy_bfq); #else spin_lock_irq(&bfqd->lock); diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c index 851af055664d..c4df156b1fb4 100644 --- a/block/bfq-sq-iosched.c +++ b/block/bfq-sq-iosched.c @@ -5052,10 +5052,10 @@ static void bfq_exit_queue(struct elevator_queue *e) BUG_ON(hrtimer_active(&bfqd->idle_slice_timer)); +#ifdef BFQ_GROUP_IOSCHED_ENABLED /* release oom-queue reference to root group */ bfqg_put(bfqd->root_group); -#ifdef BFQ_GROUP_IOSCHED_ENABLED blkcg_deactivate_policy(q, &blkcg_policy_bfq); #else bfq_put_async_queues(bfqd, bfqd->root_group); From 643a89c659172b2c9ae16adfe03af4e3e88e1326 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Sat, 13 Jan 2018 18:48:41 +0100 Subject: [PATCH 16/23] block, bfq-sq, bfq-mq: remove trace_printks Commit ("block, bfq-sq, bfq-mq: trace get and put of bfq groups") unwisely added some invocations of the function trace_printk, which is inappropriate in production kernels. This commit removes those invocations. Signed-off-by: Paolo Valente --- block/bfq-cgroup-included.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/block/bfq-cgroup-included.c b/block/bfq-cgroup-included.c index f94743fb2e7d..a4f8a03edfc9 100644 --- a/block/bfq-cgroup-included.c +++ b/block/bfq-cgroup-included.c @@ -267,8 +267,6 @@ static struct bfq_group *bfqq_group(struct bfq_queue *bfqq) static void bfqg_get(struct bfq_group *bfqg) { - trace_printk("bfqg %p\n", bfqg); - #ifdef BFQ_MQ bfqg->ref++; #else @@ -282,9 +280,6 @@ static void bfqg_put(struct bfq_group *bfqg) bfqg->ref--; BUG_ON(bfqg->ref < 0); - trace_printk("putting bfqg %p %s\n", bfqg, - bfqg->ref == 0 ? "and freeing it" : ""); - if (bfqg->ref == 0) kfree(bfqg); #else @@ -298,7 +293,6 @@ static void bfqg_and_blkg_get(struct bfq_group *bfqg) /* see comments in bfq_bic_update_cgroup for why refcounting bfqg */ bfqg_get(bfqg); - trace_printk("getting blkg for bfqg %p\n", bfqg); blkg_get(bfqg_to_blkg(bfqg)); } @@ -306,7 +300,6 @@ static void bfqg_and_blkg_put(struct bfq_group *bfqg) { bfqg_put(bfqg); - trace_printk("putting blkg for bfqg %p\n", bfqg); blkg_put(bfqg_to_blkg(bfqg)); } #endif @@ -484,8 +477,6 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node) kfree(bfqg); return NULL; } - trace_printk("bfqg %p\n", bfqg); - #ifdef BFQ_MQ /* see comments in bfq_bic_update_cgroup for why refcounting */ bfqg_get(bfqg); @@ -523,7 +514,6 @@ static void bfq_pd_init(struct blkg_policy_data *pd) static void bfq_pd_free(struct blkg_policy_data *pd) { struct bfq_group *bfqg = pd_to_bfqg(pd); - trace_printk("bfqg %p\n", bfqg); bfqg_stats_exit(&bfqg->stats); #ifdef BFQ_MQ From ce050275e24fecec800f346c09d9494563e9fc8a Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Mon, 15 Jan 2018 15:07:05 +0100 Subject: [PATCH 17/23] block, bfq-mq: add requeue-request hook Commit 'a6a252e64914 ("blk-mq-sched: decide how to handle flush rq via RQF_FLUSH_SEQ")' makes all non-flush re-prepared requests for a device be re-inserted into the active I/O scheduler for that device. As a consequence, I/O schedulers may get the same request inserted again, even several times, without a finish_request invoked on that request before each re-insertion. This fact is the cause of the failure reported in [1]. For an I/O scheduler, every re-insertion of the same re-prepared request is equivalent to the insertion of a new request. For schedulers like mq-deadline or kyber, this fact causes no harm. In contrast, it confuses a stateful scheduler like BFQ, which keeps state for an I/O request, until the finish_request hook is invoked on the request. In particular, BFQ may get stuck, waiting forever for the number of request dispatches, of the same request, to be balanced by an equal number of request completions (while there will be one completion for that request). In this state, BFQ may refuse to serve I/O requests from other bfq_queues. The hang reported in [1] then follows. However, the above re-prepared requests undergo a requeue, thus the requeue_request hook of the active elevator is invoked for these requests, if set. This commit then addresses the above issue by properly implementing the hook requeue_request in BFQ. [1] https://marc.info/?l=linux-block&m=151211117608676 Reported-by: Ivan Kozik Reported-by: Alban Browaeys Signed-off-by: Paolo Valente Signed-off-by: Serena Ziviani --- block/bfq-mq-iosched.c | 90 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 73 insertions(+), 17 deletions(-) diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c index 2581fe0f6f2f..bb7ccc2f1165 100644 --- a/block/bfq-mq-iosched.c +++ b/block/bfq-mq-iosched.c @@ -4162,9 +4162,9 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) * TESTING: reset DISP_LIST flag, because: 1) * this rq this request has passed through * bfq_prepare_request, 2) then it will have - * bfq_finish_request invoked on it, and 3) in - * bfq_finish_request we use this flag to check - * that bfq_finish_request is not invoked on + * bfq_finish_requeue_request invoked on it, and 3) in + * bfq_finish_requeue_request we use this flag to check + * that bfq_finish_requeue_request is not invoked on * requests for which bfq_prepare_request has * been invoked. */ @@ -4173,8 +4173,8 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) } /* - * We exploit the bfq_finish_request hook to decrement - * rq_in_driver, but bfq_finish_request will not be + * We exploit the bfq_finish_requeue_request hook to decrement + * rq_in_driver, but bfq_finish_requeue_request will not be * invoked on this request. So, to avoid unbalance, * just start this request, without incrementing * rq_in_driver. As a negative consequence, @@ -4183,10 +4183,10 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) * bfq_schedule_dispatch to be invoked uselessly. * * As for implementing an exact solution, the - * bfq_finish_request hook, if defined, is probably + * bfq_finish_requeue_request hook, if defined, is probably * invoked also on this request. So, by exploiting * this hook, we could 1) increment rq_in_driver here, - * and 2) decrement it in bfq_finish_request. Such a + * and 2) decrement it in bfq_finish_requeue_request. Such a * solution would let the value of the counter be * always accurate, but it would entail using an extra * interface function. This cost seems higher than the @@ -4878,6 +4878,8 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) return idle_timer_disabled; } +static void bfq_prepare_request(struct request *rq, struct bio *bio); + static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bool at_head) { @@ -4919,6 +4921,20 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, BUG_ON(!(rq->rq_flags & RQF_GOT)); rq->rq_flags &= ~RQF_GOT; + if (!bfqq) { + /* + * This should never happen. Most likely rq is + * a requeued regular request, being + * re-inserted without being first + * re-prepared. Do a prepare, to avoid + * failure. + */ + pr_warn("Regular request associated with no queue"); + WARN_ON(1); + bfq_prepare_request(rq, rq->bio); + bfqq = RQ_BFQQ(rq); + } + #if defined(BFQ_GROUP_IOSCHED_ENABLED) && defined(CONFIG_DEBUG_BLK_CGROUP) idle_timer_disabled = __bfq_insert_request(bfqd, rq); /* @@ -5110,7 +5126,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) } } -static void bfq_finish_request_body(struct bfq_queue *bfqq) +static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq) { bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request_body: allocated %d", bfqq->allocated); @@ -5120,7 +5136,13 @@ static void bfq_finish_request_body(struct bfq_queue *bfqq) bfq_put_queue(bfqq); } -static void bfq_finish_request(struct request *rq) +/* + * Handle either a requeue or a finish for rq. The things to do are + * the same in both cases: all references to rq are to be dropped. In + * particular, rq is considered completed from the point of view of + * the scheduler. + */ +static void bfq_finish_requeue_request(struct request *rq) { struct bfq_queue *bfqq; struct bfq_data *bfqd; @@ -5128,11 +5150,27 @@ static void bfq_finish_request(struct request *rq) BUG_ON(!rq); - if (!rq->elv.icq) + bfqq = RQ_BFQQ(rq); + + /* + * Requeue and finish hooks are invoked in blk-mq without + * checking whether the involved request is actually still + * referenced in the scheduler. To handle this fact, the + * following two checks make this function exit in case of + * spurious invocations, for which there is nothing to do. + * + * First, check whether rq has nothing to do with an elevator. + */ + if (unlikely(!(rq->rq_flags & RQF_ELVPRIV))) return; - bfqq = RQ_BFQQ(rq); - BUG_ON(!bfqq); + /* + * rq either is not associated with any icq, or is an already + * requeued request that has not (yet) been re-inserted into + * a bfq_queue. + */ + if (!rq->elv.icq || !bfqq) + return; bic = RQ_BIC(rq); BUG_ON(!bic); @@ -5145,7 +5183,6 @@ static void bfq_finish_request(struct request *rq) BUG(); } BUG_ON(rq->rq_flags & RQF_QUEUED); - BUG_ON(!(rq->rq_flags & RQF_ELVPRIV)); bfq_log_bfqq(bfqd, bfqq, "putting rq %p with %u sects left, STARTED %d", @@ -5166,13 +5203,14 @@ static void bfq_finish_request(struct request *rq) spin_lock_irqsave(&bfqd->lock, flags); bfq_completed_request(bfqq, bfqd); - bfq_finish_request_body(bfqq); + bfq_finish_requeue_request_body(bfqq); spin_unlock_irqrestore(&bfqd->lock, flags); } else { /* * Request rq may be still/already in the scheduler, - * in which case we need to remove it. And we cannot + * in which case we need to remove it (this should + * never happen in case of requeue). And we cannot * defer such a check and removal, to avoid * inconsistencies in the time interval from the end * of this function to the start of the deferred work. @@ -5189,9 +5227,26 @@ static void bfq_finish_request(struct request *rq) bfqg_stats_update_io_remove(bfqq_group(bfqq), rq->cmd_flags); } - bfq_finish_request_body(bfqq); + bfq_finish_requeue_request_body(bfqq); } + /* + * Reset private fields. In case of a requeue, this allows + * this function to correctly do nothing if it is spuriously + * invoked again on this same request (see the check at the + * beginning of the function). Probably, a better general + * design would be to prevent blk-mq from invoking the requeue + * or finish hooks of an elevator, for a request that is not + * referred by that elevator. + * + * Resetting the following fields would break the + * request-insertion logic if rq is re-inserted into a bfq + * internal queue, without a re-preparation. Here we assume + * that re-insertions of requeued requests, without + * re-preparation, can happen only for pass_through or at_head + * requests (which are not re-inserted into bfq internal + * queues). + */ rq->elv.priv[0] = NULL; rq->elv.priv[1] = NULL; } @@ -5960,7 +6015,8 @@ static struct elevator_type iosched_bfq_mq = { .ops.mq = { .limit_depth = bfq_limit_depth, .prepare_request = bfq_prepare_request, - .finish_request = bfq_finish_request, + .requeue_request = bfq_finish_requeue_request, + .finish_request = bfq_finish_requeue_request, .exit_icq = bfq_exit_icq, .insert_requests = bfq_insert_requests, .dispatch_request = bfq_dispatch_request, From 3e4f292191cc62b3844316b9741534c3f1b36f0a Mon Sep 17 00:00:00 2001 From: Davide Paganelli Date: Thu, 8 Feb 2018 12:19:24 +0100 Subject: [PATCH 18/23] block, bfq-mq, bfq-sq: make log functions print names of calling functions Add the macro __func__ as a parameter to the invocations of the functions pr_crit, blk_add_trace_msg and blk_add_cgroup_trace_msg in bfq_log* functions, in order to include automatically in the log messages the names of the functions that call the log functions. The programmer can then avoid doing it. Signed-off-by: Davide Paganelli Signed-off-by: Paolo Valente --- block/bfq-cgroup-included.c | 9 +-- block/bfq-mq-iosched.c | 167 ++++++++++++++++++++++---------------------- block/bfq-mq.h | 33 ++++----- block/bfq-sched.c | 54 +++++++------- block/bfq-sq-iosched.c | 134 +++++++++++++++++------------------ block/bfq.h | 33 ++++----- 6 files changed, 214 insertions(+), 216 deletions(-) diff --git a/block/bfq-cgroup-included.c b/block/bfq-cgroup-included.c index a4f8a03edfc9..613f154e9da5 100644 --- a/block/bfq-cgroup-included.c +++ b/block/bfq-cgroup-included.c @@ -382,7 +382,8 @@ static void bfq_init_entity(struct bfq_entity *entity, * Make sure that bfqg and its associated blkg do not * disappear before entity. */ - bfq_log_bfqq(bfqq->bfqd, bfqq, "[%s] getting bfqg %p and blkg\n", __func__, bfqg); + bfq_log_bfqq(bfqq->bfqd, bfqq, "getting bfqg %p and blkg\n", + bfqg); bfqg_and_blkg_get(bfqg); #else @@ -651,7 +652,7 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); } #ifdef BFQ_MQ - bfq_log_bfqq(bfqq->bfqd, bfqq, "[%s] putting blkg and bfqg %p\n", __func__, bfqg); + bfq_log_bfqq(bfqq->bfqd, bfqq, "putting blkg and bfqg %p\n", bfqg); bfqg_and_blkg_put(bfqq_group(bfqq)); #else @@ -661,7 +662,7 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, entity->parent = bfqg->my_entity; entity->sched_data = &bfqg->sched_data; #ifdef BFQ_MQ - bfq_log_bfqq(bfqq->bfqd, bfqq, "[%s] getting blkg and bfqg %p\n", __func__, bfqg); + bfq_log_bfqq(bfqq->bfqd, bfqq, "getting blkg and bfqg %p\n", bfqg); /* pin down bfqg and its associated blkg */ bfqg_and_blkg_get(bfqg); @@ -721,7 +722,7 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, if (entity->sched_data != &bfqg->sched_data) { bic_set_bfqq(bic, NULL, 0); bfq_log_bfqq(bfqd, async_bfqq, - "bic_change_group: %p %d", + "%p %d", async_bfqq, async_bfqq->ref); bfq_put_queue(async_bfqq); diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c index bb7ccc2f1165..edc93b6af186 100644 --- a/block/bfq-mq-iosched.c +++ b/block/bfq-mq-iosched.c @@ -310,7 +310,7 @@ static struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd, static void bfq_schedule_dispatch(struct bfq_data *bfqd) { if (bfqd->queued != 0) { - bfq_log(bfqd, "schedule dispatch"); + bfq_log(bfqd, ""); blk_mq_run_hw_queues(bfqd->queue, true); } } @@ -489,8 +489,8 @@ static void bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) data->shallow_depth = bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(op)]; - bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u", - __func__, bfqd->wr_busy_queues, op_is_sync(op), + bfq_log(bfqd, "wr_busy %d sync %d depth %u", + bfqd->wr_busy_queues, op_is_sync(op), data->shallow_depth); } @@ -528,7 +528,7 @@ bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, if (rb_link) *rb_link = p; - bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d", + bfq_log(bfqd, "%llu: returning %d", (unsigned long long) sector, bfqq ? bfqq->pid : 0); @@ -749,7 +749,7 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq, if (rq == last || ktime_get_ns() < rq->fifo_time) return NULL; - bfq_log_bfqq(bfqq->bfqd, bfqq, "check_fifo: returned %p", rq); + bfq_log_bfqq(bfqq->bfqd, bfqq, "returned %p", rq); BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); return rq; } @@ -842,7 +842,7 @@ static void bfq_updated_next_req(struct bfq_data *bfqd, bfq_serv_to_charge(next_rq, bfqq)); if (entity->budget != new_budget) { entity->budget = new_budget; - bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu", + bfq_log_bfqq(bfqd, bfqq, "new budget %lu", new_budget); bfq_requeue_bfqq(bfqd, bfqq, false); } @@ -915,8 +915,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish)); bfq_log_bfqq(bfqq->bfqd, bfqq, - "[%s] bic %p wr_coeff %d start_finish %lu max_time %lu", - __func__, + "bic %p wr_coeff %d start_finish %lu max_time %lu", bic, bfqq->wr_coeff, bfqq->last_wr_start_finish, bfqq->wr_cur_max_time); @@ -929,11 +928,11 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, bfq_wr_duration(bfqd))) { switch_back_to_interactive_wr(bfqq, bfqd); bfq_log_bfqq(bfqq->bfqd, bfqq, - "resume state: switching back to interactive"); + "switching back to interactive"); } else { bfqq->wr_coeff = 1; bfq_log_bfqq(bfqq->bfqd, bfqq, - "resume state: switching off wr (%lu + %lu < %lu)", + "switching off wr (%lu + %lu < %lu)", bfqq->last_wr_start_finish, bfqq->wr_cur_max_time, jiffies); } @@ -985,7 +984,7 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) /* Increment burst size to take into account also bfqq */ bfqd->burst_size++; - bfq_log_bfqq(bfqd, bfqq, "add_to_burst %d", bfqd->burst_size); + bfq_log_bfqq(bfqd, bfqq, "%d", bfqd->burst_size); BUG_ON(bfqd->burst_size > bfqd->bfq_large_burst_thresh); @@ -998,7 +997,7 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) * other to consider this burst as large. */ bfqd->large_burst = true; - bfq_log_bfqq(bfqd, bfqq, "add_to_burst: large burst started"); + bfq_log_bfqq(bfqd, bfqq, "large burst started"); /* * We can now mark all queues in the burst list as @@ -1170,7 +1169,7 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfqd->large_burst = false; bfq_reset_burst_list(bfqd, bfqq); bfq_log_bfqq(bfqd, bfqq, - "handle_burst: late activation or different group"); + "late activation or different group"); goto end; } @@ -1180,7 +1179,7 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) * bfqq as belonging to this large burst immediately. */ if (bfqd->large_burst) { - bfq_log_bfqq(bfqd, bfqq, "handle_burst: marked in burst"); + bfq_log_bfqq(bfqd, bfqq, "marked in burst"); bfq_mark_bfqq_in_large_burst(bfqq); goto end; } @@ -1686,7 +1685,7 @@ static void bfq_add_request(struct request *rq) unsigned int old_wr_coeff = bfqq->wr_coeff; bool interactive = false; - bfq_log_bfqq(bfqd, bfqq, "add_request: size %u %s", + bfq_log_bfqq(bfqd, bfqq, "size %u %s", blk_rq_sectors(rq), rq_is_sync(rq) ? "S" : "A"); if (bfqq->wr_coeff > 1) /* queue is being weight-raised */ @@ -1952,7 +1951,7 @@ static int bfq_request_merge(struct request_queue *q, struct request **req, __rq = bfq_find_rq_fmerge(bfqd, bio, q); if (__rq && elv_bio_merge_ok(__rq, bio)) { *req = __rq; - bfq_log(bfqd, "request_merge: req %p", __rq); + bfq_log(bfqd, "req %p", __rq); return ELEVATOR_FRONT_MERGE; } @@ -1989,7 +1988,7 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, bfqq->next_rq = next_rq; bfq_log_bfqq(bfqd, bfqq, - "request_merged: req %p prev %p next_rq %p bfqq %p", + "req %p prev %p next_rq %p bfqq %p", req, prev, next_rq, bfqq); /* @@ -2018,7 +2017,7 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq, goto end; bfq_log_bfqq(bfqq->bfqd, bfqq, - "requests_merged: rq %p next %p bfqq %p next_bfqq %p", + "rq %p next %p bfqq %p next_bfqq %p", rq, next, bfqq, next_bfqq); spin_lock_irq(&bfqq->bfqd->lock); @@ -2069,10 +2068,10 @@ static void bfq_bfqq_end_wr(struct bfq_queue *bfqq) */ bfqq->entity.prio_changed = 1; bfq_log_bfqq(bfqq->bfqd, bfqq, - "end_wr: wrais ending at %lu, rais_max_time %u", + "wrais ending at %lu, rais_max_time %u", bfqq->last_wr_start_finish, jiffies_to_msecs(bfqq->wr_cur_max_time)); - bfq_log_bfqq(bfqq->bfqd, bfqq, "end_wr: wr_busy %d", + bfq_log_bfqq(bfqq->bfqd, bfqq, "wr_busy %d", bfqq->bfqd->wr_busy_queues); } @@ -2245,8 +2244,8 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, { if (bfq_too_late_for_merging(new_bfqq)) { bfq_log_bfqq(bfqq->bfqd, bfqq, - "[%s] too late for bfq%d to be merged", - __func__, new_bfqq->pid); + "too late for bfq%d to be merged", + new_bfqq->pid); return false; } @@ -2395,8 +2394,7 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) } BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish)); bfq_log_bfqq(bfqq->bfqd, bfqq, - "[%s] bic %p wr_coeff %d start_finish %lu max_time %lu", - __func__, + "bic %p wr_coeff %d start_finish %lu max_time %lu", bic, bfqq->wr_coeff, bfqq->last_wr_start_finish, bfqq->wr_cur_max_time); } @@ -2453,7 +2451,7 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, } - bfq_log_bfqq(bfqd, new_bfqq, "merge_bfqqs: wr_busy %d", + bfq_log_bfqq(bfqd, new_bfqq, "wr_busy %d", bfqd->wr_busy_queues); /* @@ -2554,7 +2552,7 @@ static void bfq_set_budget_timeout(struct bfq_data *bfqd, bfqq->budget_timeout = jiffies + bfqd->bfq_timeout * timeout_coeff; - bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u", + bfq_log_bfqq(bfqd, bfqq, "%u", jiffies_to_msecs(bfqd->bfq_timeout * timeout_coeff)); } @@ -2620,10 +2618,10 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd, bfq_set_budget_timeout(bfqd, bfqq); bfq_log_bfqq(bfqd, bfqq, - "set_in_service_queue, cur-budget = %d", + "cur-budget = %d", bfqq->entity.budget); } else - bfq_log(bfqd, "set_in_service_queue: NULL"); + bfq_log(bfqd, "NULL"); bfqd->in_service_queue = bfqq; } @@ -2746,7 +2744,7 @@ static void bfq_reset_rate_computation(struct bfq_data *bfqd, struct request *rq bfqd->peak_rate_samples = 0; /* full re-init on next disp. */ bfq_log(bfqd, - "reset_rate_computation at end, sample %u/%u tot_sects %llu", + "at end, sample %u/%u tot_sects %llu", bfqd->peak_rate_samples, bfqd->sequential_samples, bfqd->tot_sectors_dispatched); } @@ -2766,7 +2764,7 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) if (bfqd->peak_rate_samples < BFQ_RATE_MIN_SAMPLES || bfqd->delta_from_first < BFQ_RATE_MIN_INTERVAL) { bfq_log(bfqd, - "update_rate_reset: only resetting, delta_first %lluus samples %d", + "only resetting, delta_first %lluus samples %d", bfqd->delta_from_first>>10, bfqd->peak_rate_samples); goto reset_computation; } @@ -2790,7 +2788,7 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) div_u64(bfqd->delta_from_first, NSEC_PER_USEC)); bfq_log(bfqd, -"update_rate_reset: tot_sects %llu delta_first %lluus rate %llu sects/s (%d)", +"tot_sects %llu delta_first %lluus rate %llu sects/s (%d)", bfqd->tot_sectors_dispatched, bfqd->delta_from_first>>10, ((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT), rate > 20<peak_rate) || rate > 20<peak_rate_samples, bfqd->sequential_samples, ((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT), ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT)); goto reset_computation; } else { bfq_log(bfqd, - "update_rate_reset: do update, samples %u/%u rate/peak %llu/%llu", + "do update, samples %u/%u rate/peak %llu/%llu", bfqd->peak_rate_samples, bfqd->sequential_samples, ((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT), ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT)); @@ -2868,7 +2866,7 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) rate /= divisor; /* smoothing constant alpha = 1/divisor */ bfq_log(bfqd, - "update_rate_reset: divisor %d tmp_peak_rate %llu tmp_rate %u", + "divisor %d tmp_peak_rate %llu tmp_rate %u", divisor, ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT), (u32)((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT)); @@ -2922,7 +2920,7 @@ static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq) if (bfqd->peak_rate_samples == 0) { /* first dispatch */ bfq_log(bfqd, - "update_peak_rate: goto reset, samples %d", + "goto reset, samples %d", bfqd->peak_rate_samples) ; bfq_reset_rate_computation(bfqd, rq); goto update_last_values; /* will add one sample */ @@ -2943,7 +2941,7 @@ static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq) if (now_ns - bfqd->last_dispatch > 100*NSEC_PER_MSEC && bfqd->rq_in_driver == 0) { bfq_log(bfqd, -"update_peak_rate: jumping to updating&resetting delta_last %lluus samples %d", +"jumping to updating&resetting delta_last %lluus samples %d", (now_ns - bfqd->last_dispatch)>>10, bfqd->peak_rate_samples) ; goto update_rate_and_reset; @@ -2969,7 +2967,7 @@ static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq) bfqd->delta_from_first = now_ns - bfqd->first_dispatch; bfq_log(bfqd, - "update_peak_rate: added samples %u/%u tot_sects %llu delta_first %lluus", + "added samples %u/%u tot_sects %llu delta_first %lluus", bfqd->peak_rate_samples, bfqd->sequential_samples, bfqd->tot_sectors_dispatched, bfqd->delta_from_first>>10); @@ -2985,12 +2983,12 @@ static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq) bfqd->last_dispatch = now_ns; bfq_log(bfqd, - "update_peak_rate: delta_first %lluus last_pos %llu peak_rate %llu", + "delta_first %lluus last_pos %llu peak_rate %llu", (now_ns - bfqd->first_dispatch)>>10, (unsigned long long) bfqd->last_position, ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT)); bfq_log(bfqd, - "update_peak_rate: samples at end %d", bfqd->peak_rate_samples); + "samples at end %d", bfqd->peak_rate_samples); } /* @@ -3088,11 +3086,11 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, */ budget = 2 * min_budget; - bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %d, budg left %d", + bfq_log_bfqq(bfqd, bfqq, "last budg %d, budg left %d", bfqq->entity.budget, bfq_bfqq_budget_left(bfqq)); - bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %d, min budg %d", + bfq_log_bfqq(bfqd, bfqq, "last max_budg %d, min budg %d", budget, bfq_min_budget(bfqd)); - bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d", + bfq_log_bfqq(bfqd, bfqq, "sync %d, seeky %d", bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue)); if (bfq_bfqq_sync(bfqq) && bfqq->wr_coeff == 1) { @@ -3294,7 +3292,7 @@ static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq, else /* charge at least one seek */ *delta_ms = bfq_slice_idle / NSEC_PER_MSEC; - bfq_log(bfqd, "bfq_bfqq_is_slow: too short %u", delta_usecs); + bfq_log(bfqd, "too short %u", delta_usecs); return slow; } @@ -3317,11 +3315,11 @@ static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq, * peak rate. */ slow = bfqq->entity.service < bfqd->bfq_max_budget / 2; - bfq_log(bfqd, "bfq_bfqq_is_slow: relative rate %d/%d", + bfq_log(bfqd, "relative rate %d/%d", bfqq->entity.service, bfqd->bfq_max_budget); } - bfq_log_bfqq(bfqd, bfqq, "bfq_bfqq_is_slow: slow %d", slow); + bfq_log_bfqq(bfqd, bfqq, "slow %d", slow); return slow; } @@ -3423,7 +3421,7 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, struct bfq_queue *bfqq) { bfq_log_bfqq(bfqd, bfqq, -"softrt_next_start: service_blkg %lu soft_rate %u sects/sec interval %u", +"service_blkg %lu soft_rate %u sects/sec interval %u", bfqq->service_from_backlogged, bfqd->bfq_wr_max_softrt_rate, jiffies_to_msecs(HZ * bfqq->service_from_backlogged / @@ -3602,7 +3600,7 @@ static bool bfq_bfqq_budget_timeout(struct bfq_queue *bfqq) static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq) { bfq_log_bfqq(bfqq->bfqd, bfqq, - "may_budget_timeout: wait_request %d left %d timeout %d", + "wait_request %d left %d timeout %d", bfq_bfqq_wait_request(bfqq), bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3, bfq_bfqq_budget_timeout(bfqq)); @@ -3863,11 +3861,11 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) * either boosts the throughput (without issues), or is * necessary to preserve service guarantees. */ - bfq_log_bfqq(bfqd, bfqq, "may_idle: sync %d idling_boosts_thr %d", + bfq_log_bfqq(bfqd, bfqq, "sync %d idling_boosts_thr %d", bfq_bfqq_sync(bfqq), idling_boosts_thr); bfq_log_bfqq(bfqd, bfqq, - "may_idle: wr_busy %d boosts %d IO-bound %d guar %d", + "wr_busy %d boosts %d IO-bound %d guar %d", bfqd->wr_busy_queues, idling_boosts_thr_without_issues, bfq_bfqq_IO_bound(bfqq), @@ -3907,7 +3905,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) if (!bfqq) goto new_queue; - bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue"); + bfq_log_bfqq(bfqd, bfqq, "already in-service queue"); if (bfq_may_expire_for_budg_timeout(bfqq) && !bfq_bfqq_wait_request(bfqq) && @@ -3983,14 +3981,14 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) new_queue: bfqq = bfq_set_in_service_queue(bfqd); if (bfqq) { - bfq_log_bfqq(bfqd, bfqq, "select_queue: checking new queue"); + bfq_log_bfqq(bfqd, bfqq, "checking new queue"); goto check_queue; } keep_queue: if (bfqq) - bfq_log_bfqq(bfqd, bfqq, "select_queue: returned this queue"); + bfq_log_bfqq(bfqd, bfqq, "returned this queue"); else - bfq_log(bfqd, "select_queue: no queue returned"); + bfq_log(bfqd, "no queue returned"); return bfqq; } @@ -4043,8 +4041,7 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) /* see comments on max_service_from_wr */ bfq_bfqq_end_wr(bfqq); bfq_log_bfqq(bfqd, bfqq, - "[%s] too much service", - __func__); + "too much service"); } } /* @@ -4122,7 +4119,7 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx) { struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; - bfq_log(bfqd, "has_work, dispatch_non_empty %d busy_queues %d", + bfq_log(bfqd, "dispatch_non_empty %d busy_queues %d", !list_empty_careful(&bfqd->dispatch), bfqd->busy_queues > 0); /* @@ -4146,7 +4143,7 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) rq->rq_flags &= ~RQF_DISP_LIST; bfq_log(bfqd, - "dispatch requests: picked %p from dispatch list", rq); + "picked %p from dispatch list", rq); bfqq = RQ_BFQQ(rq); if (bfqq) { @@ -4196,7 +4193,7 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) goto start_rq; } - bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues); + bfq_log(bfqd, "%d busy queues", bfqd->busy_queues); if (bfqd->busy_queues == 0) goto exit; @@ -4236,13 +4233,13 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) rq->rq_flags |= RQF_STARTED; if (bfqq) bfq_log_bfqq(bfqd, bfqq, - "dispatched %s request %p, rq_in_driver %d", + "%s request %p, rq_in_driver %d", bfq_bfqq_sync(bfqq) ? "sync" : "async", rq, bfqd->rq_in_driver); else bfq_log(bfqd, - "dispatched request %p from dispatch list, rq_in_driver %d", + "request %p from dispatch list, rq_in_driver %d", rq, bfqd->rq_in_driver); } else bfq_log(bfqd, @@ -4339,7 +4336,7 @@ static void bfq_put_queue(struct bfq_queue *bfqq) BUG_ON(bfqq->ref <= 0); if (bfqq->bfqd) - bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p %d", bfqq, bfqq->ref); + bfq_log_bfqq(bfqq->bfqd, bfqq, "%p %d", bfqq, bfqq->ref); bfqq->ref--; if (bfqq->ref) @@ -4383,10 +4380,10 @@ static void bfq_put_queue(struct bfq_queue *bfqq) } if (bfqq->bfqd) - bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p freed", bfqq); + bfq_log_bfqq(bfqq->bfqd, bfqq, "%p freed", bfqq); #ifdef BFQ_GROUP_IOSCHED_ENABLED - bfq_log_bfqq(bfqq->bfqd, bfqq, "[%s] putting blkg and bfqg %p\n", __func__, bfqg); + bfq_log_bfqq(bfqq->bfqd, bfqq, "putting blkg and bfqg %p\n", bfqg); bfqg_and_blkg_put(bfqg); #endif kmem_cache_free(bfq_pool, bfqq); @@ -4418,7 +4415,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_schedule_dispatch(bfqd); } - bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, bfqq->ref); + bfq_log_bfqq(bfqd, bfqq, "%p, %d", bfqq, bfqq->ref); bfq_put_cooperator(bfqq); @@ -4502,7 +4499,7 @@ static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio); bfqq->entity.prio_changed = 1; bfq_log_bfqq(bfqq->bfqd, bfqq, - "set_next_ioprio_data: bic_class %d prio %d class %d", + "bic_class %d prio %d class %d", ioprio_class, bfqq->new_ioprio, bfqq->new_ioprio_class); } @@ -4529,7 +4526,7 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio) bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic); bic_set_bfqq(bic, bfqq, false); bfq_log_bfqq(bfqd, bfqq, - "check_ioprio_change: bfqq %p %d", + "bfqq %p %d", bfqq, bfqq->ref); } @@ -4667,14 +4664,14 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, * guarantee that this queue is not freed * until its group goes away. */ - bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d", + bfq_log_bfqq(bfqd, bfqq, "bfqq not in async: %p, %d", bfqq, bfqq->ref); *async_bfqq = bfqq; } out: bfqq->ref++; /* get a process reference to this queue */ - bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, bfqq->ref); + bfq_log_bfqq(bfqd, bfqq, "at end: %p, %d", bfqq, bfqq->ref); rcu_read_unlock(); return bfqq; } @@ -4733,7 +4730,7 @@ static void bfq_update_has_short_ttime(struct bfq_data *bfqd, bfqq->ttime.ttime_mean > bfqd->bfq_slice_idle)) has_short_ttime = false; - bfq_log_bfqq(bfqd, bfqq, "update_has_short_ttime: has_short_ttime %d", + bfq_log_bfqq(bfqd, bfqq, "has_short_ttime %d", has_short_ttime); if (has_short_ttime) @@ -4759,7 +4756,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_update_io_seektime(bfqd, bfqq, rq); bfq_log_bfqq(bfqd, bfqq, - "rq_enqueued: has_short_ttime=%d (seeky %d)", + "has_short_ttime=%d (seeky %d)", bfq_bfqq_has_short_ttime(bfqq), BFQQ_SEEKY(bfqq)); bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); @@ -4818,7 +4815,7 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) assert_spin_locked(&bfqd->lock); - bfq_log_bfqq(bfqd, bfqq, "__insert_req: rq %p bfqq %p", rq, bfqq); + bfq_log_bfqq(bfqd, bfqq, "rq %p bfqq %p", rq, bfqq); /* * An unplug may trigger a requeue of a request from the device @@ -4837,9 +4834,9 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) new_bfqq->allocated++; bfqq->allocated--; bfq_log_bfqq(bfqd, bfqq, - "insert_request: new allocated %d", bfqq->allocated); + "new allocated %d", bfqq->allocated); bfq_log_bfqq(bfqd, new_bfqq, - "insert_request: new_bfqq new allocated %d", + "new_bfqq new allocated %d", bfqq->allocated); new_bfqq->ref++; @@ -4911,11 +4908,11 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, rq->rq_flags |= RQF_DISP_LIST; if (bfqq) bfq_log_bfqq(bfqd, bfqq, - "insert_request %p in disp: at_head %d", + "%p in disp: at_head %d", rq, at_head); else bfq_log(bfqd, - "insert_request %p in disp: at_head %d", + "%p in disp: at_head %d", rq, at_head); } else { BUG_ON(!(rq->rq_flags & RQF_GOT)); @@ -5033,7 +5030,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) bfqq->dispatched--; bfq_log_bfqq(bfqd, bfqq, - "completed_requests: new disp %d, new rq_in_driver %d", + "new disp %d, new rq_in_driver %d", bfqq->dispatched, bfqd->rq_in_driver); if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) { @@ -5061,7 +5058,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) delta_us = div_u64(now_ns - bfqd->last_completion, NSEC_PER_USEC); bfq_log_bfqq(bfqd, bfqq, - "rq_completed: delta %uus/%luus max_size %u rate %llu/%llu", + "delta %uus/%luus max_size %u rate %llu/%llu", delta_us, BFQ_MIN_TT/NSEC_PER_USEC, bfqd->last_rq_max_size, delta_us > 0 ? (USEC_PER_SEC* @@ -5129,7 +5126,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq) { bfq_log_bfqq(bfqq->bfqd, bfqq, - "put_request_body: allocated %d", bfqq->allocated); + "allocated %d", bfqq->allocated); BUG_ON(!bfqq->allocated); bfqq->allocated--; @@ -5406,10 +5403,10 @@ static void bfq_prepare_request(struct request *rq, struct bio *bio) bfqq->allocated++; bfq_log_bfqq(bfqq->bfqd, bfqq, - "get_request: new allocated %d", bfqq->allocated); + "new allocated %d", bfqq->allocated); bfqq->ref++; - bfq_log_bfqq(bfqd, bfqq, "get_request %p: bfqq %p, %d", rq, bfqq, bfqq->ref); + bfq_log_bfqq(bfqd, bfqq, "%p: bfqq %p, %d", rq, bfqq, bfqq->ref); rq->elv.priv[0] = bic; rq->elv.priv[1] = bfqq; @@ -5493,7 +5490,7 @@ static enum hrtimer_restart bfq_idle_slice_timer(struct hrtimer *timer) idle_slice_timer); struct bfq_queue *bfqq = bfqd->in_service_queue; - bfq_log(bfqd, "slice_timer expired"); + bfq_log(bfqd, "expired"); /* * Theoretical race here: the in-service queue can be NULL or @@ -5515,10 +5512,10 @@ static void __bfq_put_async_bfqq(struct bfq_data *bfqd, struct bfq_group *root_group = bfqd->root_group; struct bfq_queue *bfqq = *bfqq_ptr; - bfq_log(bfqd, "put_async_bfqq: %p", bfqq); + bfq_log(bfqd, "%p", bfqq); if (bfqq) { bfq_bfqq_move(bfqd, bfqq, root_group); - bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d", + bfq_log_bfqq(bfqd, bfqq, "putting %p, %d", bfqq, bfqq->ref); bfq_put_queue(bfqq); *bfqq_ptr = NULL; @@ -5547,7 +5544,7 @@ static void bfq_exit_queue(struct elevator_queue *e) struct bfq_data *bfqd = e->elevator_data; struct bfq_queue *bfqq, *n; - bfq_log(bfqd, "exit_queue: starting ..."); + bfq_log(bfqd, "starting ..."); hrtimer_cancel(&bfqd->idle_slice_timer); @@ -5575,7 +5572,7 @@ static void bfq_exit_queue(struct elevator_queue *e) spin_unlock_irq(&bfqd->lock); #endif - bfq_log(bfqd, "exit_queue: finished ..."); + bfq_log(bfqd, "finished ..."); kfree(bfqd); } diff --git a/block/bfq-mq.h b/block/bfq-mq.h index 9a5ce1168ff5..e2ae11bf8f76 100644 --- a/block/bfq-mq.h +++ b/block/bfq-mq.h @@ -712,34 +712,34 @@ static struct bfq_group *bfqq_group(struct bfq_queue *bfqq); static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ - pr_crit("%s bfq%d%c %s " fmt "\n", \ + pr_crit("%s bfq%d%c %s [%s] " fmt "\n", \ checked_dev_name((bfqd)->queue->backing_dev_info->dev), \ (bfqq)->pid, \ bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ - bfqq_group(bfqq)->blkg_path, ##args); \ + bfqq_group(bfqq)->blkg_path, __func__, ##args); \ } while (0) #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \ - pr_crit("%s %s " fmt "\n", \ + pr_crit("%s %s [%s] " fmt "\n", \ checked_dev_name((bfqd)->queue->backing_dev_info->dev), \ - bfqg->blkg_path, ##args); \ + bfqg->blkg_path, __func__, ##args); \ } while (0) #else /* BFQ_GROUP_IOSCHED_ENABLED */ #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ - pr_crit("%s bfq%d%c " fmt "\n", \ + pr_crit("%s bfq%d%c [%s] " fmt "\n", \ checked_dev_name((bfqd)->queue->backing_dev_info->dev), \ (bfqq)->pid, bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ - ##args) + __func__, ##args) #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0) #endif /* BFQ_GROUP_IOSCHED_ENABLED */ #define bfq_log(bfqd, fmt, args...) \ - pr_crit("%s bfq " fmt "\n", \ + pr_crit("%s bfq [%s] " fmt "\n", \ checked_dev_name((bfqd)->queue->backing_dev_info->dev), \ - ##args) + __func__, ##args) #else /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */ @@ -762,28 +762,29 @@ static struct bfq_group *bfqq_group(struct bfq_queue *bfqq); static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ - blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s " fmt, \ + blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s [%s] " fmt, \ (bfqq)->pid, \ bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ - bfqq_group(bfqq)->blkg_path, ##args); \ + bfqq_group(bfqq)->blkg_path, __func__, ##args); \ } while (0) #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \ - blk_add_trace_msg((bfqd)->queue, "%s " fmt, bfqg->blkg_path, ##args);\ + blk_add_trace_msg((bfqd)->queue, "%s [%s] " fmt, bfqg->blkg_path, \ + __func__, ##args);\ } while (0) #else /* BFQ_GROUP_IOSCHED_ENABLED */ #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ - blk_add_trace_msg((bfqd)->queue, "bfq%d%c " fmt, (bfqq)->pid, \ + blk_add_trace_msg((bfqd)->queue, "bfq%d%c [%s] " fmt, (bfqq)->pid, \ bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ - ##args) + __func__, ##args) #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0) #endif /* BFQ_GROUP_IOSCHED_ENABLED */ #define bfq_log(bfqd, fmt, args...) \ - blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args) + blk_add_trace_msg((bfqd)->queue, "bfq [%s] " fmt, __func__, ##args) #endif /* CONFIG_BLK_DEV_IO_TRACE */ #endif /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */ @@ -938,7 +939,7 @@ bfq_entity_service_tree(struct bfq_entity *entity) if (bfqq) bfq_log_bfqq(bfqq->bfqd, bfqq, - "entity_service_tree %p %d", + "%p %d", sched_data->service_tree + idx, idx); #ifdef BFQ_GROUP_IOSCHED_ENABLED else { @@ -946,7 +947,7 @@ bfq_entity_service_tree(struct bfq_entity *entity) container_of(entity, struct bfq_group, entity); bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, - "entity_service_tree %p %d", + "%p %d", sched_data->service_tree + idx, idx); } #endif diff --git a/block/bfq-sched.c b/block/bfq-sched.c index 4e6c5232e2fb..ead34c30a7c2 100644 --- a/block/bfq-sched.c +++ b/block/bfq-sched.c @@ -119,7 +119,7 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd, if (bfqq) bfq_log_bfqq(bfqq->bfqd, bfqq, - "update_next_in_service: chose without lookup"); + "chose without lookup"); #ifdef BFQ_GROUP_IOSCHED_ENABLED else { struct bfq_group *bfqg = @@ -127,7 +127,7 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd, struct bfq_group, entity); bfq_log_bfqg((struct bfq_data*)bfqg->bfqd, bfqg, - "update_next_in_service: chose without lookup"); + "chose without lookup"); } #endif } @@ -148,7 +148,7 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd, bfqq = bfq_entity_to_bfqq(next_in_service); if (bfqq) bfq_log_bfqq(bfqq->bfqd, bfqq, - "update_next_in_service: chosen this queue"); + "chosen this queue"); #ifdef BFQ_GROUP_IOSCHED_ENABLED else { struct bfq_group *bfqg = @@ -156,7 +156,7 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd, struct bfq_group, entity); bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, - "update_next_in_service: chosen this entity"); + "chosen this entity"); } #endif return parent_sched_may_change; @@ -331,10 +331,10 @@ static void bfq_calc_finish(struct bfq_entity *entity, unsigned long service) if (bfqq) { bfq_log_bfqq(bfqq->bfqd, bfqq, - "calc_finish: serv %lu, w %d", + "serv %lu, w %d", service, entity->weight); bfq_log_bfqq(bfqq->bfqd, bfqq, - "calc_finish: start %llu, finish %llu, delta %llu", + "start %llu, finish %llu, delta %llu", start, finish, delta); #ifdef BFQ_GROUP_IOSCHED_ENABLED } else { @@ -342,10 +342,10 @@ static void bfq_calc_finish(struct bfq_entity *entity, unsigned long service) container_of(entity, struct bfq_group, entity); bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, - "calc_finish group: serv %lu, w %d", + "group: serv %lu, w %d", service, entity->weight); bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, - "calc_finish group: start %llu, finish %llu, delta %llu", + "group: start %llu, finish %llu, delta %llu", start, finish, delta); #endif } @@ -484,7 +484,7 @@ static void bfq_update_active_node(struct rb_node *node) if (bfqq) { bfq_log_bfqq(bfqq->bfqd, bfqq, - "update_active_node: new min_start %llu", + "new min_start %llu", ((entity->min_start>>10)*1000)>>12); #ifdef BFQ_GROUP_IOSCHED_ENABLED } else { @@ -492,7 +492,7 @@ static void bfq_update_active_node(struct rb_node *node) container_of(entity, struct bfq_group, entity); bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, - "update_active_node: new min_start %llu", + "new min_start %llu", ((entity->min_start>>10)*1000)>>12); #endif } @@ -620,7 +620,7 @@ static void bfq_get_entity(struct bfq_entity *entity) if (bfqq) { bfqq->ref++; - bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d", + bfq_log_bfqq(bfqq->bfqd, bfqq, "%p %d", bfqq, bfqq->ref); } } @@ -748,7 +748,7 @@ static void bfq_forget_entity(struct bfq_service_tree *st, entity->on_st = false; st->wsum -= entity->weight; if (bfqq && !is_in_service) { - bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity (before): %p %d", + bfq_log_bfqq(bfqq->bfqd, bfqq, "(before): %p %d", bfqq, bfqq->ref); bfq_put_queue(bfqq); } @@ -1008,7 +1008,7 @@ static void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq, tot_serv_to_charge = entity->service; bfq_log_bfqq(bfqq->bfqd, bfqq, - "charge_time: %lu/%u ms, %d/%d/%d sectors", + "%lu/%u ms, %d/%d/%d sectors", time_ms, timeout_ms, entity->service, tot_serv_to_charge, entity->budget); @@ -1080,7 +1080,7 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity, if (bfqq) { bfq_log_bfqq(bfqq->bfqd, bfqq, - "update_fin_time_enqueue: new queue finish %llu", + "new queue finish %llu", ((entity->finish>>10)*1000)>>12); #ifdef BFQ_GROUP_IOSCHED_ENABLED } else { @@ -1088,7 +1088,7 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity, container_of(entity, struct bfq_group, entity); bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, - "update_fin_time_enqueue: new group finish %llu", + "new group finish %llu", ((entity->finish>>10)*1000)>>12); #endif } @@ -1098,7 +1098,7 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity, if (bfqq) { bfq_log_bfqq(bfqq->bfqd, bfqq, - "update_fin_time_enqueue: queue %seligible in st %p", + "queue %seligible in st %p", entity->start <= st->vtime ? "" : "non ", st); #ifdef BFQ_GROUP_IOSCHED_ENABLED } else { @@ -1106,7 +1106,7 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity, container_of(entity, struct bfq_group, entity); bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, - "update_fin_time_enqueue: group %seligible in st %p", + "group %seligible in st %p", entity->start <= st->vtime ? "" : "non ", st); #endif } @@ -1550,7 +1550,7 @@ static u64 bfq_calc_vtime_jump(struct bfq_service_tree *st) if (bfqq) bfq_log_bfqq(bfqq->bfqd, bfqq, - "calc_vtime_jump: new value %llu", + "new value %llu", ((root_entity->min_start>>10)*1000)>>12); #ifdef BFQ_GROUP_IOSCHED_ENABLED else { @@ -1559,7 +1559,7 @@ static u64 bfq_calc_vtime_jump(struct bfq_service_tree *st) entity); bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, - "calc_vtime_jump: new value %llu", + "new value %llu", ((root_entity->min_start>>10)*1000)>>12); } #endif @@ -1677,7 +1677,7 @@ __bfq_lookup_next_entity(struct bfq_service_tree *st, bool in_service) bfqq = bfq_entity_to_bfqq(entity); if (bfqq) bfq_log_bfqq(bfqq->bfqd, bfqq, - "__lookup_next: start %llu vtime %llu st %p", + "start %llu vtime %llu st %p", ((entity->start>>10)*1000)>>12, ((new_vtime>>10)*1000)>>12, st); #ifdef BFQ_GROUP_IOSCHED_ENABLED @@ -1686,7 +1686,7 @@ __bfq_lookup_next_entity(struct bfq_service_tree *st, bool in_service) container_of(entity, struct bfq_group, entity); bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, - "__lookup_next: start %llu vtime %llu (%llu) st %p", + "start %llu vtime %llu (%llu) st %p", ((entity->start>>10)*1000)>>12, ((st->vtime>>10)*1000)>>12, ((new_vtime>>10)*1000)>>12, st); @@ -1821,14 +1821,14 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) container_of(entity, struct bfq_group, entity); bfq_log_bfqg(bfqd, bfqg, - "get_next_queue: lookup in this group"); + "lookup in this group"); if (!sd->next_in_service) - pr_crit("get_next_queue: lookup in this group"); + pr_crit("lookup in this group"); } else { bfq_log_bfqg(bfqd, bfqd->root_group, - "get_next_queue: lookup in root group"); + "lookup in root group"); if (!sd->next_in_service) - pr_crit("get_next_queue: lookup in root group"); + pr_crit("lookup in root group"); } #endif @@ -1903,7 +1903,7 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) bfqq = bfq_entity_to_bfqq(entity); if (bfqq) bfq_log_bfqq(bfqd, bfqq, - "get_next_queue: this queue, finish %llu", + "this queue, finish %llu", (((entity->finish>>10)*1000)>>10)>>2); #ifdef BFQ_GROUP_IOSCHED_ENABLED else { @@ -1911,7 +1911,7 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) container_of(entity, struct bfq_group, entity); bfq_log_bfqg(bfqd, bfqg, - "get_next_queue: this entity, finish %llu", + "this entity, finish %llu", (((entity->finish>>10)*1000)>>10)>>2); } #endif diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c index c4df156b1fb4..e49e8ac882b3 100644 --- a/block/bfq-sq-iosched.c +++ b/block/bfq-sq-iosched.c @@ -281,7 +281,7 @@ static void bfq_schedule_dispatch(struct bfq_data *bfqd); static void bfq_schedule_dispatch(struct bfq_data *bfqd) { if (bfqd->queued != 0) { - bfq_log(bfqd, "schedule dispatch"); + bfq_log(bfqd, ""); kblockd_schedule_work(&bfqd->unplug_work); } } @@ -414,7 +414,7 @@ bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, if (rb_link) *rb_link = p; - bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d", + bfq_log(bfqd, "%llu: returning %d", (unsigned long long) sector, bfqq ? bfqq->pid : 0); @@ -635,7 +635,7 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq, if (rq == last || ktime_get_ns() < rq->fifo_time) return NULL; - bfq_log_bfqq(bfqq->bfqd, bfqq, "check_fifo: returned %p", rq); + bfq_log_bfqq(bfqq->bfqd, bfqq, "returned %p", rq); BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); return rq; } @@ -728,7 +728,7 @@ static void bfq_updated_next_req(struct bfq_data *bfqd, bfq_serv_to_charge(next_rq, bfqq)); if (entity->budget != new_budget) { entity->budget = new_budget; - bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu", + bfq_log_bfqq(bfqd, bfqq, "new budget %lu", new_budget); bfq_requeue_bfqq(bfqd, bfqq, false); } @@ -800,8 +800,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish)); bfq_log_bfqq(bfqq->bfqd, bfqq, - "[%s] bic %p wr_coeff %d start_finish %lu max_time %lu", - __func__, + "bic %p wr_coeff %d start_finish %lu max_time %lu", bic, bfqq->wr_coeff, bfqq->last_wr_start_finish, bfqq->wr_cur_max_time); @@ -814,11 +813,11 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, bfq_wr_duration(bfqd))) { switch_back_to_interactive_wr(bfqq, bfqd); bfq_log_bfqq(bfqq->bfqd, bfqq, - "resume state: switching back to interactive"); + "switching back to interactive"); } else { bfqq->wr_coeff = 1; bfq_log_bfqq(bfqq->bfqd, bfqq, - "resume state: switching off wr (%lu + %lu < %lu)", + "switching off wr (%lu + %lu < %lu)", bfqq->last_wr_start_finish, bfqq->wr_cur_max_time, jiffies); } @@ -870,7 +869,7 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) /* Increment burst size to take into account also bfqq */ bfqd->burst_size++; - bfq_log_bfqq(bfqd, bfqq, "add_to_burst %d", bfqd->burst_size); + bfq_log_bfqq(bfqd, bfqq, "%d", bfqd->burst_size); BUG_ON(bfqd->burst_size > bfqd->bfq_large_burst_thresh); @@ -883,7 +882,7 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) * other to consider this burst as large. */ bfqd->large_burst = true; - bfq_log_bfqq(bfqd, bfqq, "add_to_burst: large burst started"); + bfq_log_bfqq(bfqd, bfqq, "large burst started"); /* * We can now mark all queues in the burst list as @@ -1055,7 +1054,7 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfqd->large_burst = false; bfq_reset_burst_list(bfqd, bfqq); bfq_log_bfqq(bfqd, bfqq, - "handle_burst: late activation or different group"); + "late activation or different group"); goto end; } @@ -1065,7 +1064,7 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) * bfqq as belonging to this large burst immediately. */ if (bfqd->large_burst) { - bfq_log_bfqq(bfqd, bfqq, "handle_burst: marked in burst"); + bfq_log_bfqq(bfqd, bfqq, "marked in burst"); bfq_mark_bfqq_in_large_burst(bfqq); goto end; } @@ -1572,7 +1571,7 @@ static void bfq_add_request(struct request *rq) unsigned int old_wr_coeff = bfqq->wr_coeff; bool interactive = false; - bfq_log_bfqq(bfqd, bfqq, "add_request: size %u %s", + bfq_log_bfqq(bfqd, bfqq, "size %u %s", blk_rq_sectors(rq), rq_is_sync(rq) ? "S" : "A"); if (bfqq->wr_coeff > 1) /* queue is being weight-raised */ @@ -1870,10 +1869,10 @@ static void bfq_bfqq_end_wr(struct bfq_queue *bfqq) */ bfqq->entity.prio_changed = 1; bfq_log_bfqq(bfqq->bfqd, bfqq, - "end_wr: wrais ending at %lu, rais_max_time %u", + "wrais ending at %lu, rais_max_time %u", bfqq->last_wr_start_finish, jiffies_to_msecs(bfqq->wr_cur_max_time)); - bfq_log_bfqq(bfqq->bfqd, bfqq, "end_wr: wr_busy %d", + bfq_log_bfqq(bfqq->bfqd, bfqq, "wr_busy %d", bfqq->bfqd->wr_busy_queues); } @@ -2048,8 +2047,8 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, { if (bfq_too_late_for_merging(new_bfqq)) { bfq_log_bfqq(bfqq->bfqd, bfqq, - "[%s] too late for bfq%d to be merged", - __func__, new_bfqq->pid); + "too late for bfq%d to be merged", + new_bfqq->pid); return false; } @@ -2258,7 +2257,7 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, } - bfq_log_bfqq(bfqd, new_bfqq, "merge_bfqqs: wr_busy %d", + bfq_log_bfqq(bfqd, new_bfqq, "wr_busy %d", bfqd->wr_busy_queues); /* @@ -2359,7 +2358,7 @@ static void bfq_set_budget_timeout(struct bfq_data *bfqd, bfqq->budget_timeout = jiffies + bfqd->bfq_timeout * timeout_coeff; - bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u", + bfq_log_bfqq(bfqd, bfqq, "%u", jiffies_to_msecs(bfqd->bfq_timeout * timeout_coeff)); } @@ -2427,10 +2426,10 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd, bfq_set_budget_timeout(bfqd, bfqq); bfq_log_bfqq(bfqd, bfqq, - "set_in_service_queue, cur-budget = %d", + "cur-budget = %d", bfqq->entity.budget); } else - bfq_log(bfqd, "set_in_service_queue: NULL"); + bfq_log(bfqd, "NULL"); bfqd->in_service_queue = bfqq; } @@ -2559,7 +2558,7 @@ static void bfq_reset_rate_computation(struct bfq_data *bfqd, struct request *rq bfqd->peak_rate_samples = 0; /* full re-init on next disp. */ bfq_log(bfqd, - "reset_rate_computation at end, sample %u/%u tot_sects %llu", + "at end, sample %u/%u tot_sects %llu", bfqd->peak_rate_samples, bfqd->sequential_samples, bfqd->tot_sectors_dispatched); } @@ -2579,7 +2578,7 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) if (bfqd->peak_rate_samples < BFQ_RATE_MIN_SAMPLES || bfqd->delta_from_first < BFQ_RATE_MIN_INTERVAL) { bfq_log(bfqd, - "update_rate_reset: only resetting, delta_first %lluus samples %d", + "only resetting, delta_first %lluus samples %d", bfqd->delta_from_first>>10, bfqd->peak_rate_samples); goto reset_computation; } @@ -2603,7 +2602,7 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) div_u64(bfqd->delta_from_first, NSEC_PER_USEC)); bfq_log(bfqd, -"update_rate_reset: tot_sects %llu delta_first %lluus rate %llu sects/s (%d)", +"tot_sects %llu delta_first %lluus rate %llu sects/s (%d)", bfqd->tot_sectors_dispatched, bfqd->delta_from_first>>10, ((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT), rate > 20<peak_rate) || rate > 20<peak_rate_samples, bfqd->sequential_samples, ((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT), ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT)); goto reset_computation; } else { bfq_log(bfqd, - "update_rate_reset: do update, samples %u/%u rate/peak %llu/%llu", + "do update, samples %u/%u rate/peak %llu/%llu", bfqd->peak_rate_samples, bfqd->sequential_samples, ((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT), ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT)); @@ -2681,7 +2680,7 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) rate /= divisor; /* smoothing constant alpha = 1/divisor */ bfq_log(bfqd, - "update_rate_reset: divisor %d tmp_peak_rate %llu tmp_rate %u", + "divisor %d tmp_peak_rate %llu tmp_rate %u", divisor, ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT), (u32)((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT)); @@ -2735,7 +2734,7 @@ static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq) if (bfqd->peak_rate_samples == 0) { /* first dispatch */ bfq_log(bfqd, - "update_peak_rate: goto reset, samples %d", + "goto reset, samples %d", bfqd->peak_rate_samples) ; bfq_reset_rate_computation(bfqd, rq); goto update_last_values; /* will add one sample */ @@ -2756,7 +2755,7 @@ static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq) if (now_ns - bfqd->last_dispatch > 100*NSEC_PER_MSEC && bfqd->rq_in_driver == 0) { bfq_log(bfqd, -"update_peak_rate: jumping to updating&resetting delta_last %lluus samples %d", +"jumping to updating&resetting delta_last %lluus samples %d", (now_ns - bfqd->last_dispatch)>>10, bfqd->peak_rate_samples) ; goto update_rate_and_reset; @@ -2782,7 +2781,7 @@ static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq) bfqd->delta_from_first = now_ns - bfqd->first_dispatch; bfq_log(bfqd, - "update_peak_rate: added samples %u/%u tot_sects %llu delta_first %lluus", + "added samples %u/%u tot_sects %llu delta_first %lluus", bfqd->peak_rate_samples, bfqd->sequential_samples, bfqd->tot_sectors_dispatched, bfqd->delta_from_first>>10); @@ -2798,12 +2797,12 @@ static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq) bfqd->last_dispatch = now_ns; bfq_log(bfqd, - "update_peak_rate: delta_first %lluus last_pos %llu peak_rate %llu", + "delta_first %lluus last_pos %llu peak_rate %llu", (now_ns - bfqd->first_dispatch)>>10, (unsigned long long) bfqd->last_position, ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT)); bfq_log(bfqd, - "update_peak_rate: samples at end %d", bfqd->peak_rate_samples); + "samples at end %d", bfqd->peak_rate_samples); } /* @@ -2900,11 +2899,11 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, */ budget = 2 * min_budget; - bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %d, budg left %d", + bfq_log_bfqq(bfqd, bfqq, "last budg %d, budg left %d", bfqq->entity.budget, bfq_bfqq_budget_left(bfqq)); - bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %d, min budg %d", + bfq_log_bfqq(bfqd, bfqq, "last max_budg %d, min budg %d", budget, bfq_min_budget(bfqd)); - bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d", + bfq_log_bfqq(bfqd, bfqq, "sync %d, seeky %d", bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue)); if (bfq_bfqq_sync(bfqq) && bfqq->wr_coeff == 1) { @@ -3106,7 +3105,7 @@ static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq, else /* charge at least one seek */ *delta_ms = bfq_slice_idle / NSEC_PER_MSEC; - bfq_log(bfqd, "bfq_bfqq_is_slow: too short %u", delta_usecs); + bfq_log(bfqd, "too short %u", delta_usecs); return slow; } @@ -3129,11 +3128,11 @@ static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq, * peak rate. */ slow = bfqq->entity.service < bfqd->bfq_max_budget / 2; - bfq_log(bfqd, "bfq_bfqq_is_slow: relative rate %d/%d", + bfq_log(bfqd, "relative rate %d/%d", bfqq->entity.service, bfqd->bfq_max_budget); } - bfq_log_bfqq(bfqd, bfqq, "bfq_bfqq_is_slow: slow %d", slow); + bfq_log_bfqq(bfqd, bfqq, "slow %d", slow); return slow; } @@ -3235,7 +3234,7 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, struct bfq_queue *bfqq) { bfq_log_bfqq(bfqd, bfqq, -"softrt_next_start: service_blkg %lu soft_rate %u sects/sec interval %u", +"service_blkg %lu soft_rate %u sects/sec interval %u", bfqq->service_from_backlogged, bfqd->bfq_wr_max_softrt_rate, jiffies_to_msecs(HZ * bfqq->service_from_backlogged / @@ -3414,7 +3413,7 @@ static bool bfq_bfqq_budget_timeout(struct bfq_queue *bfqq) static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq) { bfq_log_bfqq(bfqq->bfqd, bfqq, - "may_budget_timeout: wait_request %d left %d timeout %d", + "wait_request %d left %d timeout %d", bfq_bfqq_wait_request(bfqq), bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3, bfq_bfqq_budget_timeout(bfqq)); @@ -3675,11 +3674,11 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) * either boosts the throughput (without issues), or is * necessary to preserve service guarantees. */ - bfq_log_bfqq(bfqd, bfqq, "may_idle: sync %d idling_boosts_thr %d", + bfq_log_bfqq(bfqd, bfqq, "sync %d idling_boosts_thr %d", bfq_bfqq_sync(bfqq), idling_boosts_thr); bfq_log_bfqq(bfqd, bfqq, - "may_idle: wr_busy %d boosts %d IO-bound %d guar %d", + "wr_busy %d boosts %d IO-bound %d guar %d", bfqd->wr_busy_queues, idling_boosts_thr_without_issues, bfq_bfqq_IO_bound(bfqq), @@ -3719,7 +3718,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) if (!bfqq) goto new_queue; - bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue"); + bfq_log_bfqq(bfqd, bfqq, "already in-service queue"); if (bfq_may_expire_for_budg_timeout(bfqq) && !hrtimer_active(&bfqd->idle_slice_timer) && @@ -3797,14 +3796,14 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) new_queue: bfqq = bfq_set_in_service_queue(bfqd); if (bfqq) { - bfq_log_bfqq(bfqd, bfqq, "select_queue: checking new queue"); + bfq_log_bfqq(bfqd, bfqq, "checking new queue"); goto check_queue; } keep_queue: if (bfqq) - bfq_log_bfqq(bfqd, bfqq, "select_queue: returned this queue"); + bfq_log_bfqq(bfqd, bfqq, "returned this queue"); else - bfq_log(bfqd, "select_queue: no queue returned"); + bfq_log(bfqd, "no queue returned"); return bfqq; } @@ -3857,8 +3856,7 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) /* see comments on max_service_from_wr */ bfq_bfqq_end_wr(bfqq); bfq_log_bfqq(bfqd, bfqq, - "[%s] too much service", - __func__); + "too much service"); } } /* @@ -3987,7 +3985,7 @@ static int bfq_dispatch_requests(struct request_queue *q, int force) struct bfq_data *bfqd = q->elevator->elevator_data; struct bfq_queue *bfqq; - bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues); + bfq_log(bfqd, "%d busy queues", bfqd->busy_queues); if (bfqd->busy_queues == 0) return 0; @@ -4021,7 +4019,7 @@ static int bfq_dispatch_requests(struct request_queue *q, int force) if (!bfq_dispatch_request(bfqd, bfqq)) return 0; - bfq_log_bfqq(bfqd, bfqq, "dispatched %s request", + bfq_log_bfqq(bfqd, bfqq, "%s request", bfq_bfqq_sync(bfqq) ? "sync" : "async"); BUG_ON(bfqq->next_rq == NULL && @@ -4044,7 +4042,7 @@ static void bfq_put_queue(struct bfq_queue *bfqq) BUG_ON(bfqq->ref <= 0); - bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p %d", bfqq, bfqq->ref); + bfq_log_bfqq(bfqq->bfqd, bfqq, "%p %d", bfqq, bfqq->ref); bfqq->ref--; if (bfqq->ref) return; @@ -4086,7 +4084,7 @@ static void bfq_put_queue(struct bfq_queue *bfqq) bfqq->bfqd->burst_size--; } - bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p freed", bfqq); + bfq_log_bfqq(bfqq->bfqd, bfqq, "%p freed", bfqq); kmem_cache_free(bfq_pool, bfqq); #ifdef BFQ_GROUP_IOSCHED_ENABLED @@ -4120,7 +4118,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_schedule_dispatch(bfqd); } - bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, bfqq->ref); + bfq_log_bfqq(bfqd, bfqq, "%p, %d", bfqq, bfqq->ref); bfq_put_cooperator(bfqq); @@ -4200,7 +4198,7 @@ static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio); bfqq->entity.prio_changed = 1; bfq_log_bfqq(bfqq->bfqd, bfqq, - "set_next_ioprio_data: bic_class %d prio %d class %d", + "bic_class %d prio %d class %d", ioprio_class, bfqq->new_ioprio, bfqq->new_ioprio_class); } @@ -4227,7 +4225,7 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio) bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic); bic_set_bfqq(bic, bfqq, false); bfq_log_bfqq(bfqd, bfqq, - "check_ioprio_change: bfqq %p %d", + "bfqq %p %d", bfqq, bfqq->ref); } @@ -4362,14 +4360,14 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, * guarantee that this queue is not freed * until its group goes away. */ - bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d", + bfq_log_bfqq(bfqd, bfqq, "bfqq not in async: %p, %d", bfqq, bfqq->ref); *async_bfqq = bfqq; } out: bfqq->ref++; /* get a process reference to this queue */ - bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, bfqq->ref); + bfq_log_bfqq(bfqd, bfqq, "at end: %p, %d", bfqq, bfqq->ref); rcu_read_unlock(); return bfqq; } @@ -4428,7 +4426,7 @@ static void bfq_update_has_short_ttime(struct bfq_data *bfqd, bic->ttime.ttime_mean > bfqd->bfq_slice_idle)) has_short_ttime = false; - bfq_log_bfqq(bfqd, bfqq, "update_has_short_ttime: has_short_ttime %d", + bfq_log_bfqq(bfqd, bfqq, "has_short_ttime %d", has_short_ttime); if (has_short_ttime) @@ -4454,7 +4452,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_update_io_seektime(bfqd, bfqq, rq); bfq_log_bfqq(bfqd, bfqq, - "rq_enqueued: has_short_ttime=%d (seeky %d)", + "has_short_ttime=%d (seeky %d)", bfq_bfqq_has_short_ttime(bfqq), BFQQ_SEEKY(bfqq)); bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); @@ -4629,7 +4627,7 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq) */ delta_us = div_u64(now_ns - bfqd->last_completion, NSEC_PER_USEC); - bfq_log(bfqd, "rq_completed: delta %uus/%luus max_size %u rate %llu/%llu", + bfq_log(bfqd, "delta %uus/%luus max_size %u rate %llu/%llu", delta_us, BFQ_MIN_TT/NSEC_PER_USEC, bfqd->last_rq_max_size, delta_us > 0 ? (USEC_PER_SEC* @@ -4750,7 +4748,7 @@ static void bfq_put_request(struct request *rq) rq->elv.priv[0] = NULL; rq->elv.priv[1] = NULL; - bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d", + bfq_log_bfqq(bfqq->bfqd, bfqq, "%p, %d", bfqq, bfqq->ref); bfq_put_queue(bfqq); } @@ -4816,7 +4814,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, bic_set_bfqq(bic, bfqq, is_sync); if (split && is_sync) { bfq_log_bfqq(bfqd, bfqq, - "set_request: was_in_list %d " + "was_in_list %d " "was_in_large_burst %d " "large burst in progress %d", bic->was_in_burst_list, @@ -4826,12 +4824,12 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, if ((bic->was_in_burst_list && bfqd->large_burst) || bic->saved_in_large_burst) { bfq_log_bfqq(bfqd, bfqq, - "set_request: marking in " + "marking in " "large burst"); bfq_mark_bfqq_in_large_burst(bfqq); } else { bfq_log_bfqq(bfqd, bfqq, - "set_request: clearing in " + "clearing in " "large burst"); bfq_clear_bfqq_in_large_burst(bfqq); if (bic->was_in_burst_list) @@ -4888,7 +4886,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, bfqq->allocated[rw]++; bfqq->ref++; - bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq, bfqq->ref); + bfq_log_bfqq(bfqd, bfqq, "bfqq %p, %d", bfqq, bfqq->ref); rq->elv.priv[0] = bic; rq->elv.priv[1] = bfqq; @@ -4962,7 +4960,7 @@ static enum hrtimer_restart bfq_idle_slice_timer(struct hrtimer *timer) * case we just expire a queue too early. */ if (bfqq) { - bfq_log_bfqq(bfqd, bfqq, "slice_timer expired"); + bfq_log_bfqq(bfqd, bfqq, "expired"); bfq_clear_bfqq_wait_request(bfqq); if (bfq_bfqq_budget_timeout(bfqq)) @@ -5005,10 +5003,10 @@ static void __bfq_put_async_bfqq(struct bfq_data *bfqd, struct bfq_group *root_group = bfqd->root_group; struct bfq_queue *bfqq = *bfqq_ptr; - bfq_log(bfqd, "put_async_bfqq: %p", bfqq); + bfq_log(bfqd, "%p", bfqq); if (bfqq) { bfq_bfqq_move(bfqd, bfqq, root_group); - bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d", + bfq_log_bfqq(bfqd, bfqq, "putting %p, %d", bfqq, bfqq->ref); bfq_put_queue(bfqq); *bfqq_ptr = NULL; diff --git a/block/bfq.h b/block/bfq.h index 0cd7a3f251a7..4d2fe7f77af1 100644 --- a/block/bfq.h +++ b/block/bfq.h @@ -698,37 +698,37 @@ static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); \ assert_spin_locked((bfqd)->queue->queue_lock); \ blkg_path(bfqg_to_blkg(bfqq_group(bfqq)), __pbuf, sizeof(__pbuf)); \ - pr_crit("%s bfq%d%c %s " fmt "\n", \ + pr_crit("%s bfq%d%c %s [%s] " fmt "\n", \ checked_dev_name((bfqd)->queue->backing_dev_info->dev), \ (bfqq)->pid, \ bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ - __pbuf, ##args); \ + __pbuf, __func__, ##args); \ } while (0) #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \ char __pbuf[128]; \ \ blkg_path(bfqg_to_blkg(bfqg), __pbuf, sizeof(__pbuf)); \ - pr_crit("%s %s " fmt "\n", \ + pr_crit("%s %s [%s] " fmt "\n", \ checked_dev_name((bfqd)->queue->backing_dev_info->dev), \ - __pbuf, ##args); \ + __pbuf, __func__, ##args); \ } while (0) #else /* BFQ_GROUP_IOSCHED_ENABLED */ #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ - pr_crit("%s bfq%d%c " fmt "\n", \ + pr_crit("%s bfq%d%c [%s] " fmt "\n", \ checked_dev_name((bfqd)->queue->backing_dev_info->dev), \ (bfqq)->pid, bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ - ##args) + __func__, ##args) #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0) #endif /* BFQ_GROUP_IOSCHED_ENABLED */ #define bfq_log(bfqd, fmt, args...) \ - pr_crit("%s bfq " fmt "\n", \ + pr_crit("%s bfq [%s] " fmt "\n", \ checked_dev_name((bfqd)->queue->backing_dev_info->dev), \ - ##args) + __func__, ##args) #else /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */ @@ -755,31 +755,32 @@ static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); \ assert_spin_locked((bfqd)->queue->queue_lock); \ blkg_path(bfqg_to_blkg(bfqq_group(bfqq)), __pbuf, sizeof(__pbuf)); \ - blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s " fmt, \ + blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s [%s] " fmt, \ (bfqq)->pid, \ bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ - __pbuf, ##args); \ + __pbuf, __func__, ##args); \ } while (0) #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \ char __pbuf[128]; \ \ blkg_path(bfqg_to_blkg(bfqg), __pbuf, sizeof(__pbuf)); \ - blk_add_trace_msg((bfqd)->queue, "%s " fmt, __pbuf, ##args); \ + blk_add_trace_msg((bfqd)->queue, "%s [%s] " fmt, __pbuf, \ + __func__, ##args); \ } while (0) #else /* BFQ_GROUP_IOSCHED_ENABLED */ #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ - blk_add_trace_msg((bfqd)->queue, "bfq%d%c " fmt, (bfqq)->pid, \ + blk_add_trace_msg((bfqd)->queue, "bfq%d%c [%s] " fmt, (bfqq)->pid, \ bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ - ##args) + __func__, ##args) #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0) #endif /* BFQ_GROUP_IOSCHED_ENABLED */ #define bfq_log(bfqd, fmt, args...) \ - blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args) + blk_add_trace_msg((bfqd)->queue, "bfq [%s] " fmt, __func__, ##args) #endif /* CONFIG_BLK_DEV_IO_TRACE */ #endif /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */ @@ -928,7 +929,7 @@ bfq_entity_service_tree(struct bfq_entity *entity) if (bfqq) bfq_log_bfqq(bfqq->bfqd, bfqq, - "entity_service_tree %p %d", + "%p %d", sched_data->service_tree + idx, idx); #ifdef BFQ_GROUP_IOSCHED_ENABLED else { @@ -936,7 +937,7 @@ bfq_entity_service_tree(struct bfq_entity *entity) container_of(entity, struct bfq_group, entity); bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, - "entity_service_tree %p %d", + "%p %d", sched_data->service_tree + idx, idx); } #endif From 673a457e8a54d1c4b66e61b1a50956ba0b8c6a60 Mon Sep 17 00:00:00 2001 From: Davide Paganelli Date: Thu, 8 Feb 2018 11:49:58 +0100 Subject: [PATCH 19/23] block, bfq-mq, bfq-sq: make bfq_bfqq_expire print expiration reason Improve readability of the log messages related to the expiration reasons of the function bfq_bfqq_expire. Change the printing of the number that represents the reason for expiration with an actual textual description of the reason. Signed-off-by: Davide Paganelli Signed-off-by: Paolo Valente --- block/bfq-mq-iosched.c | 10 ++++++++-- block/bfq-sq-iosched.c | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c index edc93b6af186..9268dd47a4e5 100644 --- a/block/bfq-mq-iosched.c +++ b/block/bfq-mq-iosched.c @@ -133,6 +133,12 @@ static const int bfq_timeout = (HZ / 8); */ static const unsigned long bfq_merge_time_limit = HZ/10; +#define MAX_LENGTH_REASON_NAME 25 + +static const char reason_name[][MAX_LENGTH_REASON_NAME] = {"TOO_IDLE", +"BUDGET_TIMEOUT", "BUDGET_EXHAUSTED", "NO_MORE_REQUESTS", +"PREEMPTED"}; + static struct kmem_cache *bfq_pool; /* Below this threshold (in ns), we consider thinktime immediate. */ @@ -3553,8 +3559,8 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, } bfq_log_bfqq(bfqd, bfqq, - "expire (%d, slow %d, num_disp %d, short_ttime %d, weight %d)", - reason, slow, bfqq->dispatched, + "expire (%s, slow %d, num_disp %d, short_ttime %d, weight %d)", + reason_name[reason], slow, bfqq->dispatched, bfq_bfqq_has_short_ttime(bfqq), entity->weight); /* diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c index e49e8ac882b3..f95deaab49a1 100644 --- a/block/bfq-sq-iosched.c +++ b/block/bfq-sq-iosched.c @@ -127,6 +127,12 @@ static const int bfq_timeout = (HZ / 8); */ static const unsigned long bfq_merge_time_limit = HZ/10; +#define MAX_LENGTH_REASON_NAME 25 + +static const char reason_name[][MAX_LENGTH_REASON_NAME] = {"TOO_IDLE", +"BUDGET_TIMEOUT", "BUDGET_EXHAUSTED", "NO_MORE_REQUESTS", +"PREEMPTED"}; + static struct kmem_cache *bfq_pool; /* Below this threshold (in ns), we consider thinktime immediate. */ @@ -3366,8 +3372,8 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, } bfq_log_bfqq(bfqd, bfqq, - "expire (%d, slow %d, num_disp %d, short_ttime %d, weight %d)", - reason, slow, bfqq->dispatched, + "expire (%s, slow %d, num_disp %d, short_ttime %d, weight %d)", + reason_name[reason], slow, bfqq->dispatched, bfq_bfqq_has_short_ttime(bfqq), entity->weight); /* From 62e80623fbb58367c3f667dab22fea0804001f3b Mon Sep 17 00:00:00 2001 From: Melzani Alessandro Date: Mon, 26 Feb 2018 22:21:59 +0100 Subject: [PATCH 20/23] bfq-mq: port of "block, bfq: remove batches of confusing ifdefs" Commit a33801e8b473 ("block, bfq: move debug blkio stats behind CONFIG_DEBUG_BLK_CGROUP") introduced two batches of confusing ifdefs: one reported in [1], plus a similar one in another function. This commit removes both batches, in the way suggested in [1]. [1] https://www.spinics.net/lists/linux-block/msg20043.html Fixes: a33801e8b473 ("block, bfq: move debug blkio stats behind CONFIG_DEBUG_BLK_CGROUP") Signed-off-by: Alessandro Melzani --- block/bfq-mq-iosched.c | 128 ++++++++++++++++++++++++++++--------------------- 1 file changed, 73 insertions(+), 55 deletions(-) diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c index 9268dd47a4e5..5a211620f316 100644 --- a/block/bfq-mq-iosched.c +++ b/block/bfq-mq-iosched.c @@ -4256,35 +4256,17 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) return rq; } -static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) -{ - struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; - struct request *rq; -#if defined(BFQ_GROUP_IOSCHED_ENABLED) && defined(CONFIG_DEBUG_BLK_CGROUP) - struct bfq_queue *in_serv_queue, *bfqq; - bool waiting_rq, idle_timer_disabled; -#endif - spin_lock_irq(&bfqd->lock); - -#if defined(BFQ_GROUP_IOSCHED_ENABLED) && defined(CONFIG_DEBUG_BLK_CGROUP) - in_serv_queue = bfqd->in_service_queue; - waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue); - - rq = __bfq_dispatch_request(hctx); - - idle_timer_disabled = - waiting_rq && !bfq_bfqq_wait_request(in_serv_queue); - -#else - rq = __bfq_dispatch_request(hctx); -#endif - spin_unlock_irq(&bfqd->lock); +#if defined(BFQ_GROUP_IOSCHED_ENABLED) && defined(CONFIG_DEBUG_BLK_CGROUP) +static void bfq_update_dispatch_stats(struct request_queue *q, + struct request *rq, + struct bfq_queue *in_serv_queue, + bool idle_timer_disabled) +{ + struct bfq_queue *bfqq = rq ? RQ_BFQQ(rq) : NULL; -#if defined(BFQ_GROUP_IOSCHED_ENABLED) && defined(CONFIG_DEBUG_BLK_CGROUP) - bfqq = rq ? RQ_BFQQ(rq) : NULL; if (!idle_timer_disabled && !bfqq) - return rq; + return; /* * rq and bfqq are guaranteed to exist until this function @@ -4299,7 +4281,7 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) * In addition, the following queue lock guarantees that * bfqq_group(bfqq) exists as well. */ - spin_lock_irq(hctx->queue->queue_lock); + spin_lock_irq(q->queue_lock); if (idle_timer_disabled) /* * Since the idle timer has been disabled, @@ -4318,8 +4300,35 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) bfqg_stats_set_start_empty_time(bfqg); bfqg_stats_update_io_remove(bfqg, rq->cmd_flags); } - spin_unlock_irq(hctx->queue->queue_lock); + spin_unlock_irq(q->queue_lock); +} +#else +static inline void bfq_update_dispatch_stats(struct request_queue *q, + struct request *rq, + struct bfq_queue *in_serv_queue, + bool idle_timer_disabled) {} #endif +static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) +{ + struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; + struct request *rq; + struct bfq_queue *in_serv_queue; + bool waiting_rq, idle_timer_disabled; + + spin_lock_irq(&bfqd->lock); + + in_serv_queue = bfqd->in_service_queue; + waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue); + + rq = __bfq_dispatch_request(hctx); + + idle_timer_disabled = + waiting_rq && !bfq_bfqq_wait_request(in_serv_queue); + + spin_unlock_irq(&bfqd->lock); + + bfq_update_dispatch_stats(hctx->queue, rq, in_serv_queue, + idle_timer_disabled); return rq; } @@ -4881,6 +4890,38 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) return idle_timer_disabled; } +#if defined(BFQ_GROUP_IOSCHED_ENABLED) && defined(CONFIG_DEBUG_BLK_CGROUP) +static void bfq_update_insert_stats(struct request_queue *q, + struct bfq_queue *bfqq, + bool idle_timer_disabled, + unsigned int cmd_flags) +{ + if (!bfqq) + return; + + /* + * bfqq still exists, because it can disappear only after + * either it is merged with another queue, or the process it + * is associated with exits. But both actions must be taken by + * the same process currently executing this flow of + * instructions. + * + * In addition, the following queue lock guarantees that + * bfqq_group(bfqq) exists as well. + */ + spin_lock_irq(q->queue_lock); + bfqg_stats_update_io_add(bfqq_group(bfqq), bfqq, cmd_flags); + if (idle_timer_disabled) + bfqg_stats_update_idle_time(bfqq_group(bfqq)); + spin_unlock_irq(q->queue_lock); +} +#else +static inline void bfq_update_insert_stats(struct request_queue *q, + struct bfq_queue *bfqq, + bool idle_timer_disabled, + unsigned int cmd_flags) {} +#endif + static void bfq_prepare_request(struct request *rq, struct bio *bio); static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, @@ -4889,10 +4930,8 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, struct request_queue *q = hctx->queue; struct bfq_data *bfqd = q->elevator->elevator_data; struct bfq_queue *bfqq = RQ_BFQQ(rq); -#if defined(BFQ_GROUP_IOSCHED_ENABLED) && defined(CONFIG_DEBUG_BLK_CGROUP) bool idle_timer_disabled = false; unsigned int cmd_flags; -#endif spin_lock_irq(&bfqd->lock); if (blk_mq_sched_try_insert_merge(q, rq)) { @@ -4938,7 +4977,6 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bfqq = RQ_BFQQ(rq); } -#if defined(BFQ_GROUP_IOSCHED_ENABLED) && defined(CONFIG_DEBUG_BLK_CGROUP) idle_timer_disabled = __bfq_insert_request(bfqd, rq); /* * Update bfqq, because, if a queue merge has occurred @@ -4946,9 +4984,6 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, * redirected into a new queue. */ bfqq = RQ_BFQQ(rq); -#else - __bfq_insert_request(bfqd, rq); -#endif if (rq_mergeable(rq)) { elv_rqhash_add(q, rq); @@ -4956,34 +4991,17 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, q->last_merge = rq; } } -#if defined(BFQ_GROUP_IOSCHED_ENABLED) && defined(CONFIG_DEBUG_BLK_CGROUP) + /* * Cache cmd_flags before releasing scheduler lock, because rq * may disappear afterwards (for example, because of a request * merge). */ cmd_flags = rq->cmd_flags; -#endif + spin_unlock_irq(&bfqd->lock); -#if defined(BFQ_GROUP_IOSCHED_ENABLED) && defined(CONFIG_DEBUG_BLK_CGROUP) - if (!bfqq) - return; - /* - * bfqq still exists, because it can disappear only after - * either it is merged with another queue, or the process it - * is associated with exits. But both actions must be taken by - * the same process currently executing this flow of - * instruction. - * - * In addition, the following queue lock guarantees that - * bfqq_group(bfqq) exists as well. - */ - spin_lock_irq(q->queue_lock); - bfqg_stats_update_io_add(bfqq_group(bfqq), bfqq, cmd_flags); - if (idle_timer_disabled) - bfqg_stats_update_idle_time(bfqq_group(bfqq)); - spin_unlock_irq(q->queue_lock); -#endif + bfq_update_insert_stats(q, bfqq, idle_timer_disabled, + cmd_flags); } static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx, From 0d0d05632872b226f4fae5e56af8736a4c24bf57 Mon Sep 17 00:00:00 2001 From: Melzani Alessandro Date: Mon, 26 Feb 2018 22:43:30 +0100 Subject: [PATCH 21/23] bfq-sq, bfq-mq: port of "bfq: Use icq_to_bic() consistently" Some code uses icq_to_bic() to convert an io_cq pointer to a bfq_io_cq pointer while other code uses a direct cast. Convert the code that uses a direct cast such that it uses icq_to_bic(). Signed-off-by: Alessandro Melzani --- block/bfq-mq-iosched.c | 2 +- block/bfq-sq-iosched.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c index 5a211620f316..7b1269558c47 100644 --- a/block/bfq-mq-iosched.c +++ b/block/bfq-mq-iosched.c @@ -272,7 +272,7 @@ static const unsigned long max_service_from_wr = 120000; #define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \ { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 }) -#define RQ_BIC(rq) ((struct bfq_io_cq *) (rq)->elv.priv[0]) +#define RQ_BIC(rq) icq_to_bic((rq)->elv.priv[0]) #define RQ_BFQQ(rq) ((rq)->elv.priv[1]) /** diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c index f95deaab49a1..c4aff8d55fc4 100644 --- a/block/bfq-sq-iosched.c +++ b/block/bfq-sq-iosched.c @@ -266,7 +266,7 @@ static const unsigned long max_service_from_wr = 120000; #define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \ { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 }) -#define RQ_BIC(rq) ((struct bfq_io_cq *) (rq)->elv.priv[0]) +#define RQ_BIC(rq) icq_to_bic((rq)->elv.priv[0]) #define RQ_BFQQ(rq) ((rq)->elv.priv[1]) static void bfq_schedule_dispatch(struct bfq_data *bfqd); From 4cb5de6add7d6ad0d25d73cb95dc871305db1522 Mon Sep 17 00:00:00 2001 From: Melzani Alessandro Date: Mon, 26 Feb 2018 22:59:30 +0100 Subject: [PATCH 22/23] bfq-sq, bfq-mq: port of "block, bfq: fix error handle in bfq_init" if elv_register fail, bfq_pool should be free. Signed-off-by: Alessandro Melzani --- block/bfq-mq-iosched.c | 4 +++- block/bfq-sq-iosched.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c index 7b1269558c47..964e88c2ce59 100644 --- a/block/bfq-mq-iosched.c +++ b/block/bfq-mq-iosched.c @@ -6129,7 +6129,7 @@ static int __init bfq_init(void) ret = elv_register(&iosched_bfq_mq); if (ret) - goto err_pol_unreg; + goto slab_kill; #ifdef BFQ_GROUP_IOSCHED_ENABLED strcat(msg, " (with cgroups support)"); @@ -6138,6 +6138,8 @@ static int __init bfq_init(void) return 0; +slab_kill: + bfq_slab_kill(); err_pol_unreg: #ifdef BFQ_GROUP_IOSCHED_ENABLED blkcg_policy_unregister(&blkcg_policy_bfq); diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c index c4aff8d55fc4..7f0cf1f01ffc 100644 --- a/block/bfq-sq-iosched.c +++ b/block/bfq-sq-iosched.c @@ -5590,7 +5590,7 @@ static int __init bfq_init(void) ret = elv_register(&iosched_bfq); if (ret) - goto err_pol_unreg; + goto slab_kill; #ifdef BFQ_GROUP_IOSCHED_ENABLED strcat(msg, " (with cgroups support)"); @@ -5599,6 +5599,8 @@ static int __init bfq_init(void) return 0; +slab_kill: + bfq_slab_kill(); err_pol_unreg: #ifdef BFQ_GROUP_IOSCHED_ENABLED blkcg_policy_unregister(&blkcg_policy_bfq); From 1f77c173aaa87ffb22c9f062a6449245d14311e4 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Wed, 4 Apr 2018 11:28:16 +0200 Subject: [PATCH 23/23] block, bfq-sq, bfq-mq: lower-bound the estimated peak rate to 1 If a storage device handled by BFQ happens to be slower than 7.5 KB/s for a certain amount of time (in the order of a second), then the estimated peak rate of the device, maintained in BFQ, becomes equal to 0. The reason is the limited precision with which the rate is represented (details on the range of representable values in the comments introduced by this commit). This leads to a division-by-zero error where the estimated peak rate is used as divisor. Such a type of failure has been reported in [1]. This commit addresses this issue by: 1. Lower-bounding the estimated peak rate to 1 2. Adding and improving comments on the range of rates representable [1] https://www.spinics.net/lists/kernel/msg2739205.html Signed-off-by: Konstantin Khlebnikov Signed-off-by: Paolo Valente --- block/bfq-mq-iosched.c | 25 ++++++++++++++++++++++++- block/bfq-mq.h | 7 ++++++- block/bfq-sq-iosched.c | 25 ++++++++++++++++++++++++- block/bfq.h | 7 ++++++- 4 files changed, 60 insertions(+), 4 deletions(-) diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c index 964e88c2ce59..03efd90c5d20 100644 --- a/block/bfq-mq-iosched.c +++ b/block/bfq-mq-iosched.c @@ -160,7 +160,20 @@ static struct kmem_cache *bfq_pool; /* Target observation time interval for a peak-rate update (ns) */ #define BFQ_RATE_REF_INTERVAL NSEC_PER_SEC -/* Shift used for peak rate fixed precision calculations. */ +/* + * Shift used for peak-rate fixed precision calculations. + * With + * - the current shift: 16 positions + * - the current type used to store rate: u32 + * - the current unit of measure for rate: [sectors/usec], or, more precisely, + * [(sectors/usec) / 2^BFQ_RATE_SHIFT] to take into account the shift, + * the range of rates that can be stored is + * [1 / 2^BFQ_RATE_SHIFT, 2^(32 - BFQ_RATE_SHIFT)] sectors/usec = + * [1 / 2^16, 2^16] sectors/usec = [15e-6, 65536] sectors/usec = + * [15, 65G] sectors/sec + * Which, assuming a sector size of 512B, corresponds to a range of + * [7.5K, 33T] B/sec + */ #define BFQ_RATE_SHIFT 16 /* @@ -2881,6 +2894,16 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) BUG_ON(bfqd->peak_rate > 20<peak_rate += rate; + + /* + * For a very slow device, bfqd->peak_rate can reach 0 (see + * the minimum representable values reported in the comments + * on BFQ_RATE_SHIFT). Push to 1 if this happens, to avoid + * divisions by zero where bfqd->peak_rate is used as a + * divisor. + */ + bfqd->peak_rate = max_t(u32, 1, bfqd->peak_rate); + update_thr_responsiveness_params(bfqd); BUG_ON(bfqd->peak_rate > 20<peak_rate > 20<peak_rate += rate; + + /* + * For a very slow device, bfqd->peak_rate can reach 0 (see + * the minimum representable values reported in the comments + * on BFQ_RATE_SHIFT). Push to 1 if this happens, to avoid + * divisions by zero where bfqd->peak_rate is used as a + * divisor. + */ + bfqd->peak_rate = max_t(u32, 1, bfqd->peak_rate); + update_thr_responsiveness_params(bfqd); BUG_ON(bfqd->peak_rate > 20<