diff options
Diffstat (limited to 'sys-kernel/kogaion-sources/files/desktop/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7-for-3.10.0.patch')
-rw-r--r-- | sys-kernel/kogaion-sources/files/desktop/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7-for-3.10.0.patch | 1034 |
1 files changed, 1034 insertions, 0 deletions
diff --git a/sys-kernel/kogaion-sources/files/desktop/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7-for-3.10.0.patch b/sys-kernel/kogaion-sources/files/desktop/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7-for-3.10.0.patch new file mode 100644 index 00000000..ea585f02 --- /dev/null +++ b/sys-kernel/kogaion-sources/files/desktop/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7-for-3.10.0.patch @@ -0,0 +1,1034 @@ +From efc499347ea3827417cf00718616bf61a090afec Mon Sep 17 00:00:00 2001 +From: Mauro Andreolini <mauro.andreolini@unimore.it> +Date: Thu, 23 Jan 2014 16:54:44 +0100 +Subject: [PATCH 3/3] block, bfq: add Early Queue Merge (EQM) to BFQ-v7 for + 3.10.0 + +A set of processes may happen to perform interleaved reads, i.e., requests +whose union would give rise to a sequential read pattern. There are two +typical cases: in the first case, processes read fixed-size chunks of +data at a fixed distance from each other, while in the second case processes +may read variable-size chunks at variable distances. The latter case occurs +for example with KVM, which splits the I/O generated by the guest into +multiple chunks, and lets these chunks be served by a pool of cooperating +processes, iteratively assigning the next chunk of I/O to the first +available process. CFQ uses actual queue merging for the first type of +rocesses, whereas it uses preemption to get a sequential read pattern out +of the read requests performed by the second type of processes. In the end +it uses two different mechanisms to achieve the same goal: boosting the +throughput with interleaved I/O. + +This patch introduces Early Queue Merge (EQM), a unified mechanism to get a +sequential read pattern with both types of processes. The main idea is +checking newly arrived requests against the next request of the active queue +both in case of actual request insert and in case of request merge. By doing +so, both the types of processes can be handled by just merging their queues. +EQM is then simpler and more compact than the pair of mechanisms used in +CFQ. + +Finally, EQM also preserves the typical low-latency properties of BFQ, by +properly restoring the weight-raising state of a queue when it gets back to +a non-merged state. + +Signed-off-by: Mauro Andreolini <mauro.andreolini@unimore.it> +Signed-off-by: Arianna Avanzini <avanzini.arianna@gmail.com> +Reviewed-by: Paolo Valente <paolo.valente@unimore.it> +--- + block/bfq-iosched.c | 657 ++++++++++++++++++++++++++++++++++++---------------- + block/bfq-sched.c | 28 --- + block/bfq.h | 16 ++ + 3 files changed, 474 insertions(+), 227 deletions(-) + +diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c +index 96abb81..99083be6 100644 +--- a/block/bfq-iosched.c ++++ b/block/bfq-iosched.c +@@ -445,6 +445,46 @@ static inline unsigned int bfq_wrais_duration(struct bfq_data *bfqd) + return dur; + } + ++static inline void ++bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic) ++{ ++ if (bic->saved_idle_window) ++ bfq_mark_bfqq_idle_window(bfqq); ++ else ++ bfq_clear_bfqq_idle_window(bfqq); ++ if (bic->raising_time_left && bfqq->bfqd->low_latency) { ++ /* ++ * Start a weight raising period with the duration given by ++ * the raising_time_left snapshot. ++ */ ++ if (bfq_bfqq_busy(bfqq)) ++ bfqq->bfqd->raised_busy_queues++; ++ bfqq->raising_coeff = bfqq->bfqd->bfq_raising_coeff; ++ bfqq->raising_cur_max_time = bic->raising_time_left; ++ bfqq->last_rais_start_finish = jiffies; ++ bfqq->entity.ioprio_changed = 1; ++ } ++ /* ++ * Clear raising_time_left to prevent bfq_bfqq_save_state() from ++ * getting confused about the queue's need of a weight-raising ++ * period. ++ */ ++ bic->raising_time_left = 0; ++} ++ ++/* ++ * Must be called with the queue_lock held. ++ */ ++static int bfqq_process_refs(struct bfq_queue *bfqq) ++{ ++ int process_refs, io_refs; ++ ++ io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE]; ++ process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st; ++ BUG_ON(process_refs < 0); ++ return process_refs; ++} ++ + static void bfq_add_rq_rb(struct request *rq) + { + struct bfq_queue *bfqq = RQ_BFQQ(rq); +@@ -486,12 +526,20 @@ static void bfq_add_rq_rb(struct request *rq) + if (!bfqd->low_latency) + goto add_bfqq_busy; + ++ if (bfq_bfqq_just_split(bfqq)) ++ goto set_ioprio_changed; ++ + /* +- * If the queue is not being boosted and has been idle +- * for enough time, start a weight-raising period ++ * If the queue: ++ * - is not being boosted, ++ * - has been idle for enough time, ++ * - is not a sync queue or is linked to a bfq_io_cq (it is ++ * shared "for its nature" or it is not shared and its ++ * requests have not been redirected to a shared queue) ++ * start a weight-raising period. + */ +- if (old_raising_coeff == 1 && +- (idle_for_long_time || soft_rt)) { ++ if (old_raising_coeff == 1 && (idle_for_long_time || soft_rt) && ++ (!bfq_bfqq_sync(bfqq) || bfqq->bic != NULL)) { + bfqq->raising_coeff = bfqd->bfq_raising_coeff; + if (idle_for_long_time) + bfqq->raising_cur_max_time = +@@ -572,6 +620,7 @@ static void bfq_add_rq_rb(struct request *rq) + bfqd->bfq_raising_rt_max_time; + } + } ++set_ioprio_changed: + if (old_raising_coeff != bfqq->raising_coeff) + entity->ioprio_changed = 1; + add_bfqq_busy: +@@ -754,90 +803,35 @@ static void bfq_end_raising(struct bfq_data *bfqd) + spin_unlock_irq(bfqd->queue->queue_lock); + } + +-static int bfq_allow_merge(struct request_queue *q, struct request *rq, +- struct bio *bio) +-{ +- struct bfq_data *bfqd = q->elevator->elevator_data; +- struct bfq_io_cq *bic; +- struct bfq_queue *bfqq; +- +- /* +- * Disallow merge of a sync bio into an async request. +- */ +- if (bfq_bio_sync(bio) && !rq_is_sync(rq)) +- return 0; +- +- /* +- * Lookup the bfqq that this bio will be queued with. Allow +- * merge only if rq is queued there. +- * Queue lock is held here. +- */ +- bic = bfq_bic_lookup(bfqd, current->io_context); +- if (bic == NULL) +- return 0; +- +- bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio)); +- return bfqq == RQ_BFQQ(rq); +-} +- +-static void __bfq_set_in_service_queue(struct bfq_data *bfqd, +- struct bfq_queue *bfqq) +-{ +- if (bfqq != NULL) { +- bfq_mark_bfqq_must_alloc(bfqq); +- bfq_mark_bfqq_budget_new(bfqq); +- bfq_clear_bfqq_fifo_expire(bfqq); +- +- bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8; +- +- bfq_log_bfqq(bfqd, bfqq, +- "set_in_service_queue, cur-budget = %lu", +- bfqq->entity.budget); +- } +- +- bfqd->in_service_queue = bfqq; +-} +- +-/* +- * Get and set a new queue for service. +- */ +-static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd, +- struct bfq_queue *bfqq) ++static inline sector_t bfq_io_struct_pos(void *io_struct, bool request) + { +- if (!bfqq) +- bfqq = bfq_get_next_queue(bfqd); ++ if (request) ++ return blk_rq_pos(io_struct); + else +- bfq_get_next_queue_forced(bfqd, bfqq); +- +- __bfq_set_in_service_queue(bfqd, bfqq); +- return bfqq; ++ return ((struct bio *)io_struct)->bi_sector; + } + +-static inline sector_t bfq_dist_from_last(struct bfq_data *bfqd, +- struct request *rq) ++static inline sector_t bfq_dist_from(sector_t pos1, ++ sector_t pos2) + { +- if (blk_rq_pos(rq) >= bfqd->last_position) +- return blk_rq_pos(rq) - bfqd->last_position; ++ if (pos1 >= pos2) ++ return pos1 - pos2; + else +- return bfqd->last_position - blk_rq_pos(rq); ++ return pos2 - pos1; + } + +-/* +- * Return true if bfqq has no request pending and rq is close enough to +- * bfqd->last_position, or if rq is closer to bfqd->last_position than +- * bfqq->next_rq +- */ +-static inline int bfq_rq_close(struct bfq_data *bfqd, struct request *rq) ++static inline int bfq_rq_close_to_sector(void *io_struct, bool request, ++ sector_t sector) + { +- return bfq_dist_from_last(bfqd, rq) <= BFQQ_SEEK_THR; ++ return bfq_dist_from(bfq_io_struct_pos(io_struct, request), sector) <= ++ BFQQ_SEEK_THR; + } + +-static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) ++static struct bfq_queue *bfqq_close(struct bfq_data *bfqd, sector_t sector) + { + struct rb_root *root = &bfqd->rq_pos_tree; + struct rb_node *parent, *node; + struct bfq_queue *__bfqq; +- sector_t sector = bfqd->last_position; + + if (RB_EMPTY_ROOT(root)) + return NULL; +@@ -856,7 +850,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) + * position). + */ + __bfqq = rb_entry(parent, struct bfq_queue, pos_node); +- if (bfq_rq_close(bfqd, __bfqq->next_rq)) ++ if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) + return __bfqq; + + if (blk_rq_pos(__bfqq->next_rq) < sector) +@@ -867,7 +861,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) + return NULL; + + __bfqq = rb_entry(node, struct bfq_queue, pos_node); +- if (bfq_rq_close(bfqd, __bfqq->next_rq)) ++ if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) + return __bfqq; + + return NULL; +@@ -876,14 +870,12 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) + /* + * bfqd - obvious + * cur_bfqq - passed in so that we don't decide that the current queue +- * is closely cooperating with itself. +- * +- * We are assuming that cur_bfqq has dispatched at least one request, +- * and that bfqd->last_position reflects a position on the disk associated +- * with the I/O issued by cur_bfqq. ++ * is closely cooperating with itself ++ * sector - used as a reference point to search for a close queue + */ + static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, +- struct bfq_queue *cur_bfqq) ++ struct bfq_queue *cur_bfqq, ++ sector_t sector) + { + struct bfq_queue *bfqq; + +@@ -903,7 +895,7 @@ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, + * working closely on the same area of the disk. In that case, + * we can group them together and don't waste time idling. + */ +- bfqq = bfqq_close(bfqd); ++ bfqq = bfqq_close(bfqd, sector); + if (bfqq == NULL || bfqq == cur_bfqq) + return NULL; + +@@ -930,6 +922,282 @@ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, + return bfqq; + } + ++static struct bfq_queue * ++bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) ++{ ++ int process_refs, new_process_refs; ++ struct bfq_queue *__bfqq; ++ ++ /* ++ * If there are no process references on the new_bfqq, then it is ++ * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain ++ * may have dropped their last reference (not just their last process ++ * reference). ++ */ ++ if (!bfqq_process_refs(new_bfqq)) ++ return NULL; ++ ++ /* Avoid a circular list and skip interim queue merges. */ ++ while ((__bfqq = new_bfqq->new_bfqq)) { ++ if (__bfqq == bfqq) ++ return NULL; ++ new_bfqq = __bfqq; ++ } ++ ++ process_refs = bfqq_process_refs(bfqq); ++ new_process_refs = bfqq_process_refs(new_bfqq); ++ /* ++ * If the process for the bfqq has gone away, there is no ++ * sense in merging the queues. ++ */ ++ if (process_refs == 0 || new_process_refs == 0) ++ return NULL; ++ ++ bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", ++ new_bfqq->pid); ++ ++ /* ++ * Merging is just a redirection: the requests of the process owning ++ * one of the two queues are redirected to the other queue. The latter ++ * queue, in its turn, is set as shared if this is the first time that ++ * the requests of some process are redirected to it. ++ * ++ * We redirect bfqq to new_bfqq and not the opposite, because we ++ * are in the context of the process owning bfqq, hence we have the ++ * io_cq of this process. So we can immediately configure this io_cq ++ * to redirect the requests of the process to new_bfqq. ++ * ++ * NOTE, even if new_bfqq coincides with the in-service queue, the ++ * io_cq of new_bfqq is not available, because, if the in-service queue ++ * is shared, bfqd->in_service_bic may not point to the io_cq of the ++ * in-service queue. ++ * Redirecting the requests of the process owning bfqq to the currently ++ * in-service queue is in any case the best option, as we feed the ++ * in-service queue with new requests close to the last request served ++ * and, by doing so, hopefully increase the throughput. ++ */ ++ bfqq->new_bfqq = new_bfqq; ++ atomic_add(process_refs, &new_bfqq->ref); ++ return new_bfqq; ++} ++ ++/* ++ * Attempt to schedule a merge of bfqq with the currently in-service queue or ++ * with a close queue among the scheduled queues. ++ * Return NULL if no merge was scheduled, a pointer to the shared bfq_queue ++ * structure otherwise. ++ */ ++static struct bfq_queue * ++bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, ++ void *io_struct, bool request) ++{ ++ struct bfq_queue *in_service_bfqq, *new_bfqq; ++ ++ if (bfqq->new_bfqq) ++ return bfqq->new_bfqq; ++ ++ if (!io_struct) ++ return NULL; ++ ++ in_service_bfqq = bfqd->in_service_queue; ++ ++ if (in_service_bfqq == NULL || in_service_bfqq == bfqq || ++ !bfqd->in_service_bic) ++ goto check_scheduled; ++ ++ if (bfq_class_idle(in_service_bfqq) || bfq_class_idle(bfqq)) ++ goto check_scheduled; ++ ++ if (bfq_class_rt(in_service_bfqq) != bfq_class_rt(bfqq)) ++ goto check_scheduled; ++ ++ if (in_service_bfqq->entity.parent != bfqq->entity.parent) ++ goto check_scheduled; ++ ++ if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) && ++ bfq_bfqq_sync(in_service_bfqq) && bfq_bfqq_sync(bfqq)) { ++ new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq); ++ if (new_bfqq != NULL) ++ return new_bfqq; /* Merge with the in-service queue */ ++ } ++ ++ /* ++ * Check whether there is a cooperator among currently scheduled ++ * queues. The only thing we need is that the bio/request is not ++ * NULL, as we need it to establish whether a cooperator exists. ++ */ ++check_scheduled: ++ new_bfqq = bfq_close_cooperator(bfqd, bfqq, ++ bfq_io_struct_pos(io_struct, request)); ++ if (new_bfqq) ++ return bfq_setup_merge(bfqq, new_bfqq); ++ ++ return NULL; ++} ++ ++static inline void ++bfq_bfqq_save_state(struct bfq_queue *bfqq) ++{ ++ /* ++ * If bfqq->bic == NULL, the queue is already shared or its requests ++ * have already been redirected to a shared queue; both idle window ++ * and weight raising state have already been saved. Do nothing. ++ */ ++ if (bfqq->bic == NULL) ++ return; ++ if (bfqq->bic->raising_time_left) ++ /* ++ * This is the queue of a just-started process, and would ++ * deserve weight raising: we set raising_time_left to the full ++ * weight-raising duration to trigger weight-raising when and ++ * if the queue is split and the first request of the queue ++ * is enqueued. ++ */ ++ bfqq->bic->raising_time_left = bfq_wrais_duration(bfqq->bfqd); ++ else if (bfqq->raising_coeff > 1) { ++ unsigned long wrais_duration = ++ jiffies - bfqq->last_rais_start_finish; ++ /* ++ * It may happen that a queue's weight raising period lasts ++ * longer than its raising_cur_max_time, as weight raising is ++ * handled only when a request is enqueued or dispatched (it ++ * does not use any timer). If the weight raising period is ++ * about to end, don't save it. ++ */ ++ if (bfqq->raising_cur_max_time <= wrais_duration) ++ bfqq->bic->raising_time_left = 0; ++ else ++ bfqq->bic->raising_time_left = ++ bfqq->raising_cur_max_time - wrais_duration; ++ /* ++ * The bfq_queue is becoming shared or the requests of the ++ * process owning the queue are being redirected to a shared ++ * queue. Stop the weight raising period of the queue, as in ++ * both cases it should not be owned by an interactive or soft ++ * real-time application. ++ */ ++ bfq_bfqq_end_raising(bfqq); ++ } else ++ bfqq->bic->raising_time_left = 0; ++ bfqq->bic->saved_idle_window = bfq_bfqq_idle_window(bfqq); ++} ++ ++static inline void ++bfq_get_bic_reference(struct bfq_queue *bfqq) ++{ ++ /* ++ * If bfqq->bic has a non-NULL value, the bic to which it belongs ++ * is about to begin using a shared bfq_queue. ++ */ ++ if (bfqq->bic) ++ atomic_long_inc(&bfqq->bic->icq.ioc->refcount); ++} ++ ++static void ++bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, ++ struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) ++{ ++ bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", ++ (long unsigned)new_bfqq->pid); ++ /* Save weight raising and idle window of the merged queues */ ++ bfq_bfqq_save_state(bfqq); ++ bfq_bfqq_save_state(new_bfqq); ++ /* ++ * Grab a reference to the bic, to prevent it from being destroyed ++ * before being possibly touched by a bfq_split_bfqq(). ++ */ ++ bfq_get_bic_reference(bfqq); ++ bfq_get_bic_reference(new_bfqq); ++ /* Merge queues (that is, let bic redirect its requests to new_bfqq) */ ++ bic_set_bfqq(bic, new_bfqq, 1); ++ bfq_mark_bfqq_coop(new_bfqq); ++ /* ++ * new_bfqq now belongs to at least two bics (it is a shared queue): set ++ * new_bfqq->bic to NULL. bfqq either: ++ * - does not belong to any bic any more, and hence bfqq->bic must ++ * be set to NULL, or ++ * - is a queue whose owning bics have already been redirected to a ++ * different queue, hence the queue is destined to not belong to any ++ * bic soon and bfqq->bic is already NULL (therefore the next ++ * assignment causes no harm). ++ */ ++ new_bfqq->bic = NULL; ++ bfqq->bic = NULL; ++ bfq_put_queue(bfqq); ++} ++ ++static int bfq_allow_merge(struct request_queue *q, struct request *rq, ++ struct bio *bio) ++{ ++ struct bfq_data *bfqd = q->elevator->elevator_data; ++ struct bfq_io_cq *bic; ++ struct bfq_queue *bfqq, *new_bfqq; ++ ++ /* ++ * Disallow merge of a sync bio into an async request. ++ */ ++ if (bfq_bio_sync(bio) && !rq_is_sync(rq)) ++ return 0; ++ ++ /* ++ * Lookup the bfqq that this bio will be queued with. Allow ++ * merge only if rq is queued there. ++ * Queue lock is held here. ++ */ ++ bic = bfq_bic_lookup(bfqd, current->io_context); ++ if (bic == NULL) ++ return 0; ++ ++ bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio)); ++ /* ++ * We take advantage of this function to perform an early merge ++ * of the queues of possible cooperating processes. ++ */ ++ if (bfqq != NULL) { ++ new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false); ++ if (new_bfqq != NULL) { ++ bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq); ++ /* ++ * If we get here, the bio will be queued in the shared queue, ++ * i.e., new_bfqq, so use new_bfqq to decide whether bio and ++ * rq can be merged. ++ */ ++ bfqq = new_bfqq; ++ } ++ } ++ ++ return bfqq == RQ_BFQQ(rq); ++} ++ ++static void __bfq_set_in_service_queue(struct bfq_data *bfqd, ++ struct bfq_queue *bfqq) ++{ ++ if (bfqq != NULL) { ++ bfq_mark_bfqq_must_alloc(bfqq); ++ bfq_mark_bfqq_budget_new(bfqq); ++ bfq_clear_bfqq_fifo_expire(bfqq); ++ ++ bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8; ++ ++ bfq_log_bfqq(bfqd, bfqq, ++ "set_in_service_queue, cur-budget = %lu", ++ bfqq->entity.budget); ++ } ++ ++ bfqd->in_service_queue = bfqq; ++} ++ ++/* ++ * Get and set a new queue for service. ++ */ ++static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd) ++{ ++ struct bfq_queue *bfqq = bfq_get_next_queue(bfqd); ++ ++ __bfq_set_in_service_queue(bfqd, bfqq); ++ return bfqq; ++} ++ + /* + * If enough samples have been computed, return the current max budget + * stored in bfqd, which is dynamically updated according to the +@@ -1077,63 +1345,6 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq) + return rq; + } + +-/* +- * Must be called with the queue_lock held. +- */ +-static int bfqq_process_refs(struct bfq_queue *bfqq) +-{ +- int process_refs, io_refs; +- +- io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE]; +- process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st; +- BUG_ON(process_refs < 0); +- return process_refs; +-} +- +-static void bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) +-{ +- int process_refs, new_process_refs; +- struct bfq_queue *__bfqq; +- +- /* +- * If there are no process references on the new_bfqq, then it is +- * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain +- * may have dropped their last reference (not just their last process +- * reference). +- */ +- if (!bfqq_process_refs(new_bfqq)) +- return; +- +- /* Avoid a circular list and skip interim queue merges. */ +- while ((__bfqq = new_bfqq->new_bfqq)) { +- if (__bfqq == bfqq) +- return; +- new_bfqq = __bfqq; +- } +- +- process_refs = bfqq_process_refs(bfqq); +- new_process_refs = bfqq_process_refs(new_bfqq); +- /* +- * If the process for the bfqq has gone away, there is no +- * sense in merging the queues. +- */ +- if (process_refs == 0 || new_process_refs == 0) +- return; +- +- /* +- * Merge in the direction of the lesser amount of work. +- */ +- if (new_process_refs >= process_refs) { +- bfqq->new_bfqq = new_bfqq; +- atomic_add(process_refs, &new_bfqq->ref); +- } else { +- new_bfqq->new_bfqq = bfqq; +- atomic_add(new_process_refs, &bfqq->ref); +- } +- bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", +- new_bfqq->pid); +-} +- + static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq) + { + struct bfq_entity *entity = &bfqq->entity; +@@ -1703,7 +1914,7 @@ static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq) + */ + static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) + { +- struct bfq_queue *bfqq, *new_bfqq = NULL; ++ struct bfq_queue *bfqq; + struct request *next_rq; + enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT; + +@@ -1713,17 +1924,6 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) + + bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue"); + +- /* +- * If another queue has a request waiting within our mean seek +- * distance, let it run. The expire code will check for close +- * cooperators and put the close queue at the front of the +- * service tree. If possible, merge the expiring queue with the +- * new bfqq. +- */ +- new_bfqq = bfq_close_cooperator(bfqd, bfqq); +- if (new_bfqq != NULL && bfqq->new_bfqq == NULL) +- bfq_setup_merge(bfqq, new_bfqq); +- + if (bfq_may_expire_for_budg_timeout(bfqq) && + !timer_pending(&bfqd->idle_slice_timer) && + !bfq_bfqq_must_idle(bfqq)) +@@ -1760,36 +1960,26 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) + bfq_clear_bfqq_wait_request(bfqq); + del_timer(&bfqd->idle_slice_timer); + } +- if (new_bfqq == NULL) +- goto keep_queue; +- else +- goto expire; ++ goto keep_queue; + } + } + + /* +- * No requests pending. If the in-service queue has no cooperator and +- * still has requests in flight (possibly waiting for a completion) +- * or is idling for a new request, then keep it. ++ * No requests pending. If the in-service queue still has requests in ++ * flight (possibly waiting for a completion) or is idling for a new ++ * request, then keep it. + */ +- if (new_bfqq == NULL && (timer_pending(&bfqd->idle_slice_timer) || +- (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq)))) { ++ if (timer_pending(&bfqd->idle_slice_timer) || ++ (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq))) { + bfqq = NULL; + goto keep_queue; +- } else if (new_bfqq != NULL && timer_pending(&bfqd->idle_slice_timer)) { +- /* +- * Expiring the queue because there is a close cooperator, +- * cancel timer. +- */ +- bfq_clear_bfqq_wait_request(bfqq); +- del_timer(&bfqd->idle_slice_timer); + } + + reason = BFQ_BFQQ_NO_MORE_REQUESTS; + expire: + bfq_bfqq_expire(bfqd, bfqq, 0, reason); + new_queue: +- bfqq = bfq_set_in_service_queue(bfqd, new_bfqq); ++ bfqq = bfq_set_in_service_queue(bfqd); + bfq_log(bfqd, "select_queue: new queue %d returned", + bfqq != NULL ? bfqq->pid : 0); + keep_queue: +@@ -1799,9 +1989,8 @@ keep_queue: + static void bfq_update_raising_data(struct bfq_data *bfqd, + struct bfq_queue *bfqq) + { ++ struct bfq_entity *entity = &bfqq->entity; + if (bfqq->raising_coeff > 1) { /* queue is being boosted */ +- struct bfq_entity *entity = &bfqq->entity; +- + bfq_log_bfqq(bfqd, bfqq, + "raising period dur %u/%u msec, " + "old raising coeff %u, w %d(%d)", +@@ -1818,7 +2007,7 @@ static void bfq_update_raising_data(struct bfq_data *bfqd, + "WARN: pending prio change"); + /* + * If too much time has elapsed from the beginning +- * of this weight-raising, stop it. ++ * of this weight-raising period, stop it. + */ + if (jiffies - bfqq->last_rais_start_finish > + bfqq->raising_cur_max_time) { +@@ -1830,11 +2019,13 @@ static void bfq_update_raising_data(struct bfq_data *bfqd, + jiffies_to_msecs(bfqq-> + raising_cur_max_time)); + bfq_bfqq_end_raising(bfqq); +- __bfq_entity_update_weight_prio( +- bfq_entity_service_tree(entity), +- entity); + } + } ++ /* Update weight both if it must be raised and if it must be lowered */ ++ if ((entity->weight > entity->orig_weight) != (bfqq->raising_coeff > 1)) ++ __bfq_entity_update_weight_prio( ++ bfq_entity_service_tree(entity), ++ entity); + } + + /* +@@ -2075,6 +2266,25 @@ static void bfq_init_icq(struct io_cq *icq) + struct bfq_io_cq *bic = icq_to_bic(icq); + + bic->ttime.last_end_request = jiffies; ++ /* ++ * A newly created bic indicates that the process has just ++ * started doing I/O, and is probably mapping into memory its ++ * executable and libraries: it definitely needs weight raising. ++ * There is however the possibility that the process performs, ++ * for a while, I/O close to some other process. EQM intercepts ++ * this behavior and may merge the queue corresponding to the ++ * process with some other queue, BEFORE the weight of the queue ++ * is raised. Merged queues are not weight-raised (they are assumed ++ * to belong to processes that benefit only from high throughput). ++ * If the merge is basically the consequence of an accident, then ++ * the queue will be split soon and will get back its old weight. ++ * It is then important to write down somewhere that this queue ++ * does need weight raising, even if it did not make it to get its ++ * weight raised before being merged. To this purpose, we overload ++ * the field raising_time_left and assign 1 to it, to mark the queue ++ * as needing weight raising. ++ */ ++ bic->raising_time_left = 1; + } + + static void bfq_exit_icq(struct io_cq *icq) +@@ -2088,6 +2298,13 @@ static void bfq_exit_icq(struct io_cq *icq) + } + + if (bic->bfqq[BLK_RW_SYNC]) { ++ /* ++ * If the bic is using a shared queue, put the reference ++ * taken on the io_context when the bic started using a ++ * shared bfq_queue. ++ */ ++ if (bfq_bfqq_coop(bic->bfqq[BLK_RW_SYNC])) ++ put_io_context(icq->ioc); + bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]); + bic->bfqq[BLK_RW_SYNC] = NULL; + } +@@ -2375,6 +2592,10 @@ static void bfq_update_idle_window(struct bfq_data *bfqd, + if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) + return; + ++ /* Idle window just restored, statistics are meaningless. */ ++ if (bfq_bfqq_just_split(bfqq)) ++ return; ++ + enable_idle = bfq_bfqq_idle_window(bfqq); + + if (atomic_read(&bic->icq.ioc->active_ref) == 0 || +@@ -2415,6 +2636,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, + if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 || + !BFQQ_SEEKY(bfqq)) + bfq_update_idle_window(bfqd, bfqq, bic); ++ bfq_clear_bfqq_just_split(bfqq); + + bfq_log_bfqq(bfqd, bfqq, + "rq_enqueued: idle_window=%d (seeky %d, mean %llu)", +@@ -2475,13 +2697,48 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, + static void bfq_insert_request(struct request_queue *q, struct request *rq) + { + struct bfq_data *bfqd = q->elevator->elevator_data; +- struct bfq_queue *bfqq = RQ_BFQQ(rq); ++ struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq; + + assert_spin_locked(bfqd->queue->queue_lock); ++ ++ /* ++ * An unplug may trigger a requeue of a request from the device ++ * driver: make sure we are in process context while trying to ++ * merge two bfq_queues. ++ */ ++ if (!in_interrupt()) { ++ new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true); ++ if (new_bfqq != NULL) { ++ if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq) ++ new_bfqq = bic_to_bfqq(RQ_BIC(rq), 1); ++ /* ++ * Release the request's reference to the old bfqq ++ * and make sure one is taken to the shared queue. ++ */ ++ new_bfqq->allocated[rq_data_dir(rq)]++; ++ bfqq->allocated[rq_data_dir(rq)]--; ++ atomic_inc(&new_bfqq->ref); ++ bfq_put_queue(bfqq); ++ if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq) ++ bfq_merge_bfqqs(bfqd, RQ_BIC(rq), ++ bfqq, new_bfqq); ++ rq->elv.priv[1] = new_bfqq; ++ bfqq = new_bfqq; ++ } ++ } ++ + bfq_init_prio_data(bfqq, RQ_BIC(rq)); + + bfq_add_rq_rb(rq); + ++ /* ++ * Here a newly-created bfq_queue has already started a weight-raising ++ * period: clear raising_time_left to prevent bfq_bfqq_save_state() ++ * from assigning it a full weight-raising period. See the detailed ++ * comments about this field in bfq_init_icq(). ++ */ ++ if (bfqq->bic != NULL) ++ bfqq->bic->raising_time_left = 0; + rq_set_fifo_time(rq, jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]); + list_add_tail(&rq->queuelist, &bfqq->fifo); + +@@ -2629,18 +2886,6 @@ static void bfq_put_request(struct request *rq) + } + } + +-static struct bfq_queue * +-bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, +- struct bfq_queue *bfqq) +-{ +- bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", +- (long unsigned)bfqq->new_bfqq->pid); +- bic_set_bfqq(bic, bfqq->new_bfqq, 1); +- bfq_mark_bfqq_coop(bfqq->new_bfqq); +- bfq_put_queue(bfqq); +- return bic_to_bfqq(bic, 1); +-} +- + /* + * Returns NULL if a new bfqq should be allocated, or the old bfqq if this + * was the last process referring to said bfqq. +@@ -2649,6 +2894,9 @@ static struct bfq_queue * + bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq) + { + bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue"); ++ ++ put_io_context(bic->icq.ioc); ++ + if (bfqq_process_refs(bfqq) == 1) { + bfqq->pid = current->pid; + bfq_clear_bfqq_coop(bfqq); +@@ -2677,6 +2925,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, + struct bfq_queue *bfqq; + struct bfq_group *bfqg; + unsigned long flags; ++ bool split = false; + + might_sleep_if(gfp_mask & __GFP_WAIT); + +@@ -2695,24 +2944,14 @@ new_queue: + bfqq = bfq_get_queue(bfqd, bfqg, is_sync, bic, gfp_mask); + bic_set_bfqq(bic, bfqq, is_sync); + } else { +- /* +- * If the queue was seeky for too long, break it apart. +- */ ++ /* If the queue was seeky for too long, break it apart. */ + if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { + bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); + bfqq = bfq_split_bfqq(bic, bfqq); ++ split = true; + if (!bfqq) + goto new_queue; + } +- +- /* +- * Check to see if this queue is scheduled to merge with +- * another closely cooperating queue. The merging of queues +- * happens here as it must be done in process context. +- * The reference on new_bfqq was taken in merge_bfqqs. +- */ +- if (bfqq->new_bfqq != NULL) +- bfqq = bfq_merge_bfqqs(bfqd, bic, bfqq); + } + + bfqq->allocated[rw]++; +@@ -2723,6 +2962,26 @@ new_queue: + rq->elv.priv[0] = bic; + rq->elv.priv[1] = bfqq; + ++ /* ++ * If a bfq_queue has only one process reference, it is owned ++ * by only one bfq_io_cq: we can set the bic field of the ++ * bfq_queue to the address of that structure. Also, if the ++ * queue has just been split, mark a flag so that the ++ * information is available to the other scheduler hooks. ++ */ ++ if (bfqq_process_refs(bfqq) == 1) { ++ bfqq->bic = bic; ++ if (split) { ++ bfq_mark_bfqq_just_split(bfqq); ++ /* ++ * If the queue has just been split from a shared queue, ++ * restore the idle window and the possible weight ++ * raising period. ++ */ ++ bfq_bfqq_resume_state(bfqq, bic); ++ } ++ } ++ + spin_unlock_irqrestore(q->queue_lock, flags); + + return 0; +diff --git a/block/bfq-sched.c b/block/bfq-sched.c +index 30df81c..47e66a8 100644 +--- a/block/bfq-sched.c ++++ b/block/bfq-sched.c +@@ -979,34 +979,6 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) + return bfqq; + } + +-/* +- * Forced extraction of the given queue. +- */ +-static void bfq_get_next_queue_forced(struct bfq_data *bfqd, +- struct bfq_queue *bfqq) +-{ +- struct bfq_entity *entity; +- struct bfq_sched_data *sd; +- +- BUG_ON(bfqd->in_service_queue != NULL); +- +- entity = &bfqq->entity; +- /* +- * Bubble up extraction/update from the leaf to the root. +- */ +- for_each_entity(entity) { +- sd = entity->sched_data; +- bfq_update_budget(entity); +- bfq_update_vtime(bfq_entity_service_tree(entity)); +- bfq_active_extract(bfq_entity_service_tree(entity), entity); +- sd->active_entity = entity; +- sd->next_active = NULL; +- entity->service = 0; +- } +- +- return; +-} +- + static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) + { + if (bfqd->in_service_bic != NULL) { +diff --git a/block/bfq.h b/block/bfq.h +index 78da7d2..b6ebc1d 100644 +--- a/block/bfq.h ++++ b/block/bfq.h +@@ -192,6 +192,8 @@ struct bfq_group; + * idle to backlogged + * @service_from_backlogged: cumulative service received from the @bfq_queue + * since the last transition from idle to backlogged ++ * @bic: pointer to the bfq_io_cq owning the bfq_queue, set to %NULL if the ++ * queue is shared + * + * A bfq_queue is a leaf request queue; it can be associated to an io_context + * or more (if it is an async one). @cgroup holds a reference to the +@@ -235,6 +237,7 @@ struct bfq_queue { + sector_t last_request_pos; + + pid_t pid; ++ struct bfq_io_cq *bic; + + /* weight-raising fields */ + unsigned int raising_cur_max_time; +@@ -264,12 +267,23 @@ struct bfq_ttime { + * @icq: associated io_cq structure + * @bfqq: array of two process queues, the sync and the async + * @ttime: associated @bfq_ttime struct ++ * @raising_time_left: snapshot of the time left before weight raising ends ++ * for the sync queue associated to this process; this ++ * snapshot is taken to remember this value while the weight ++ * raising is suspended because the queue is merged with a ++ * shared queue, and is used to set @raising_cur_max_time ++ * when the queue is split from the shared queue and its ++ * weight is raised again ++ * @saved_idle_window: same purpose as the previous field for the idle window + */ + struct bfq_io_cq { + struct io_cq icq; /* must be the first member */ + struct bfq_queue *bfqq[2]; + struct bfq_ttime ttime; + int ioprio; ++ ++ unsigned int raising_time_left; ++ unsigned int saved_idle_window; + }; + + /** +@@ -411,6 +425,7 @@ enum bfqq_state_flags { + BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */ + BFQ_BFQQ_FLAG_coop, /* bfqq is shared */ + BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be splitted */ ++ BFQ_BFQQ_FLAG_just_split, /* queue has just been split */ + BFQ_BFQQ_FLAG_softrt_update, /* needs softrt-next-start update */ + }; + +@@ -438,6 +453,7 @@ BFQ_BFQQ_FNS(sync); + BFQ_BFQQ_FNS(budget_new); + BFQ_BFQQ_FNS(coop); + BFQ_BFQQ_FNS(split_coop); ++BFQ_BFQQ_FNS(just_split); + BFQ_BFQQ_FNS(softrt_update); + #undef BFQ_BFQQ_FNS + +-- +1.8.5.2 + |