mpath porting --- diff/drivers/md/Kconfig 2003-11-26 10:18:32.000000000 +0000 +++ source/drivers/md/Kconfig 2003-11-26 10:21:02.000000000 +0000 @@ -156,5 +156,11 @@ Allow volume managers to mirror logical volumes, also needed for live data migration tools such as 'pvmove'. +config DM_MULTIPATH + tristate "Multipath target (EXPERIMENTAL)" + depends on BLK_DEV_DM && EXPERIMENTAL + ---help--- + Allow volume managers to support multipath hardware. + endmenu --- diff/drivers/md/Makefile 2003-11-26 10:19:39.000000000 +0000 +++ source/drivers/md/Makefile 2003-11-26 10:21:02.000000000 +0000 @@ -9,6 +9,9 @@ dm-mirror-objs := dm-log.o dm-raid1.o +dm-multipath-objs := dm-path-selector.o dm-null-ps.o \ + dm-latency-ps.o dm-mpath.o + # Note: link order is important. All raid personalities # and xor.o must come before md.o, as they each initialise # themselves, and md.o may use the personalities when it @@ -23,3 +26,4 @@ obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o obj-$(CONFIG_DM_MIRROR) += dm-mirror.o +obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o --- diff/drivers/md/dm-daemon.c 2003-11-26 10:18:32.000000000 +0000 +++ source/drivers/md/dm-daemon.c 2003-11-26 10:21:02.000000000 +0000 @@ -59,7 +59,7 @@ return 0; } -int dm_daemon_start(struct dm_daemon *dd, const char *name, void (*fn)(void)) +int dm_daemon_start(struct dm_daemon *dd, const char *name, jiffy_t (*fn)(void)) { pid_t pid = 0; --- diff/drivers/md/dm-daemon.h 2003-11-26 10:20:45.000000000 +0000 +++ source/drivers/md/dm-daemon.h 2003-11-26 10:21:02.000000000 +0000 @@ -25,7 +25,7 @@ wait_queue_head_t job_queue; }; -int dm_daemon_start(struct dm_daemon *dd, const char *name, void (*fn)(void)); +int dm_daemon_start(struct dm_daemon *dd, const char *name, jiffy_t (*fn)(void)); void dm_daemon_stop(struct dm_daemon *dd); void dm_daemon_wake(struct dm_daemon *dd); int dm_daemon_running(struct dm_daemon *dd); --- diff/drivers/md/dm-latency-ps.c 2003-11-26 10:20:56.000000000 +0000 +++ source/drivers/md/dm-latency-ps.c 2003-11-26 10:21:02.000000000 +0000 @@ -18,35 +18,94 @@ #include "dm-path-selector.h" #include +#include -/* Path selector context */ -struct latency_c { - struct list_head paths; /* List of operational paths */ - struct list_head failed_paths; /* List of failed paths */ +/* Specific info about a path needed by this selector */ +struct path_info { + struct list_head list; + struct path *path; spinlock_t lock; - int prio_group; /* Actual priority group to select - path from */ + int valid; + unsigned priority; + sector_t io_min; + + sector_t io_count; + unsigned long long latency; + + /* used to calculate the average */ + unsigned latency_count; + jiffy_t latency_tot; }; -/* Path info */ -struct path_c { - struct list_head list; /* Linked list to latency_c */ +/* + * We need to record the io start time and path for every io :( + */ +struct io { + jiffy_t start; + struct path_info *pi; +}; +/* Path selector context */ +struct latency_c { spinlock_t lock; - struct path *path; /* Opaque pointer to caller path info */ - struct latency_c *lc; /* Back pointer to latency context */ + struct path_info *last_path; + struct list_head valid_paths; + struct list_head invalid_paths; + + /* + * FIXME: not wired these up yet. + */ + kmem_cache_t *io_cache; + mempool_t *io_pool; +}; - /* Set by add_path() arguments */ - int priority; - int queue_min; +/*----------------------------------------------------------------- + * The path lists are maintained in priority order (low to high). + *---------------------------------------------------------------*/ +static void path_ordered(struct list_head *head, struct path_info *pi) +{ + struct path_info *cursor; + unsigned last = 0; + int seen = 0; - /* Internal use */ - int io_count; - unsigned long long latency; -}; + list_for_each_entry (cursor, head, list) { + BUG_ON (cursor->priority < last); + + last = cursor->priority; + if (cursor == pi) + seen = 1; + } + + BUG_ON(!seen); +} + +static void path_insert(struct list_head *head, struct path_info *pi) +{ + struct path_info *cursor; + + list_for_each_entry (cursor, head, list) + if (cursor->priority >= pi->priority) + break; + + list_add_tail(&pi->list, &cursor->list); + + /* FIXME: remove debug later */ + path_ordered(head, pi); +} + +static struct path_info *path_lookup(struct list_head *head, struct path *p) +{ + struct path_info *pi; + + list_for_each_entry (pi, head, list) + if (pi->path == p) + return pi; + + return NULL; +} /* Allocate latency context */ static struct latency_c *alloc_latency_c(void) @@ -54,76 +113,58 @@ struct latency_c *lc = kmalloc(sizeof(*lc), GFP_KERNEL); if (lc) { - INIT_LIST_HEAD(&lc->paths); - INIT_LIST_HEAD(&lc->failed_paths); lc->lock = SPIN_LOCK_UNLOCKED; - lc->prio_group = -1; + lc->last_path = NULL; + INIT_LIST_HEAD(&lc->valid_paths); + INIT_LIST_HEAD(&lc->invalid_paths); } return lc; } /* Allocate path context */ -static struct path_c *alloc_path_c(void) +/* FIXME: remove this ? */ +static struct path_info *alloc_path_info(void) { - struct path_c *pc = kmalloc(sizeof(*pc), GFP_KERNEL); + struct path_info *pi = kmalloc(sizeof(*pi), GFP_KERNEL); - if (pc) { - memset(pc, 0, sizeof(*pc)); - pc->lock = SPIN_LOCK_UNLOCKED; - } + if (pi) + memset(pi, 0, sizeof(*pi)); - return pc; + return pi; } /* Path selector constructor */ -static int latency_ctr(struct path_selector *ps, - int argc, char **argv, char **error) +static int latency_ctr(struct path_selector *ps) { struct latency_c *lc; - if (argc) { - *error = "latency path selector: Invalid number " - "of arguments"; - return -EINVAL; - } - lc = alloc_latency_c(); - if (!lc) { - *error = "latency path selector: Error allocating context"; + if (!lc) return -ENOMEM; - } - ps->context = (void *) lc; + ps->context = lc; return 0; } +static void free_paths(struct list_head *paths) +{ + struct path_info *pi, *next; + + list_for_each_entry_safe(pi, next, paths, list) { + list_del(&pi->list); + kfree(pi); + } +} + /* Path selector destructor */ static void latency_dtr(struct path_selector *ps) { struct latency_c *lc = (struct latency_c *) ps->context; - struct list_head *lists[] = { - &lc->paths, - &lc->failed_paths, - }; - int i = ARRAY_SIZE(lists); - - spin_lock(&lc->lock); - while (i--) { - struct list_head *elem, *tmp; - - list_for_each_safe(elem, tmp, lists[i]) { - struct path_c *pc = - list_entry(elem, struct path_c, list); - - list_del(elem); - kfree(pc); - } - } - spin_unlock(&lc->lock); + free_paths(&lc->valid_paths); + free_paths(&lc->invalid_paths); kfree(lc); - ps->context = NULL; } /* Path add */ @@ -134,129 +175,139 @@ tmp < c ## _MIN || \ tmp > c ## _MAX) { \ *error = "latency path selector: Invalid " s; \ - return NULL; \ + return -EINVAL; \ } \ v = tmp; \ } #define PRIORITY_MIN 0 -#define PRIORITY_MAX 1024*1024 -#define QUEUE_MIN 0 -#define QUEUE_MAX 1024*1024 -static void *latency_add_path(struct path_selector *ps, struct path *path, - int argc, char **argv, char **error) +#define PRIORITY_MAX (1024 * 1024) +#define IO_MIN 0 +#define IO_MAX (1024 * 1024) +static int latency_add_path(struct path_selector *ps, struct path *path, + int argc, char **argv, char **error) { struct latency_c *lc = (struct latency_c *) ps->context; - struct path_c *pc; + struct path_info *pi; if (argc != 2) { *error = "latency path selector: Invalid number of arguments"; - return NULL; + return -EINVAL; } - pc = alloc_path_c(); - if (!pc) { + pi = alloc_path_info(); + if (!pi) { *error = "latency path selector: Error allocating path context"; - return NULL; + return -EINVAL; } - pc->path = path; - pc->lc = lc; - xx(0, "priority", PRIORITY, pc->priority); - xx(1, "queue min", QUEUE, pc->queue_min); - pc->io_count = pc->queue_min; + pi->path = path; + xx(0, "priority", PRIORITY, pi->priority); + xx(1, "io min", IO, pi->io_min); + pi->io_count = pi->io_min; + spin_lock(&lc->lock); - list_add_tail(&pc->list, &lc->paths); + list_add_tail(&pi->list, &lc->valid_paths); spin_unlock(&lc->lock); - return (void *) pc; + return 0; } #undef xx /* Path set state */ -static void latency_set_path_state(void *ps_private, unsigned long state) +static void latency_set_path_state(struct path_selector *ps, + struct path *p, int valid) { unsigned long flags; - struct path_c *path = (struct path_c *) ps_private; - struct latency_c *lc = path->lc; + struct latency_c *lc = (struct latency_c *) ps->context; + struct path_info *pi; spin_lock_irqsave(&lc->lock, flags); - /* Fail path */ - if (state) - list_move_tail(&path->list, &lc->failed_paths); + + pi = path_lookup(&lc->valid_paths, p); + if (!pi) + pi = path_lookup(&lc->invalid_paths, p); + + if (!pi) + DMWARN("unknown path"); + else { - list_move_tail(&path->list, &lc->paths); - list_for_each_entry(path, &lc->paths, list) - path->latency = 0; + list_del(&pi->list); + if (valid) { + path_insert(&lc->valid_paths, pi); + pi->latency = 0; + } else + list_add(&pi->list, &lc->invalid_paths); } + spin_unlock_irqrestore(&lc->lock, flags); } /* Helper function path selector */ -static struct path_c *_path(struct latency_c *lc) +static struct path_info *__select_path(struct latency_c *lc) { - struct path_c *path, *high_path = NULL, *ret = NULL; - int high_prio = INT_MAX; + struct path_info *pi, *best = NULL, *last = lc->last_path; unsigned long long latency = ~0ULL; + unsigned priority = ~0; - /* Any operational paths ? */ - list_for_each_entry(path, &lc->paths, list) { - /* Find path with highest riority */ - if (high_prio > path->priority) { - high_prio = path->priority; - high_path = path; - } - - /* Skip paths which aren't members of this priority group */ - if (path->priority != lc->prio_group) - continue; - - /* Ensure minimum IO queue */ - if (path->io_count) { - path->io_count--; - ret = path; + /* + * We only change paths if enough io has gone through. + */ + if (last) { + /* FIXME: add locking around path->valid ? */ + if (last->valid && last->io_count < last->io_min) + return last; + + /* recalculate the latency for last */ + last->latency = last->latency_tot / last->latency_count; + last->latency_count = 0; + last->latency_tot = 0; + } + + /* + * Choose a new path. The highest priority group will be + * at the start of the valid list. + */ + list_for_each_entry (pi, &lc->valid_paths, list) { + if (pi->priority > priority) break; - } - /* Select path with less/equal latency */ - if (path->latency <= latency) { - latency = path->latency; - ret = path; - break; + priority = pi->priority; + if (pi->latency < latency) { + best = pi; + latency = pi->latency; } } - /* There's still at least one with this group priority */ - if (ret) { - if (!path->io_count) - path->io_count = path->queue_min; - /* None with this group priority available, - try another priority group */ - } else if (high_path) { - lc->prio_group = high_prio; - ret = high_path; - } + if (best) + best->io_count = 0; - return ret; + lc->last_path = best; + return best; } /* Path selector */ static struct path *latency_select_path(struct path_selector *ps, - struct buffer_head *bh, int rw, - struct path_info *path_context) + struct bio *bio, + union map_info *info) { - unsigned long flags; struct latency_c *lc = (struct latency_c *) ps->context; - struct path_c *path; + unsigned long flags; + struct path_info *pi; + struct io *io; spin_lock_irqsave(&lc->lock, flags); - path = _path(lc); + pi = __select_path(lc); spin_unlock_irqrestore(&lc->lock, flags); - if (path) { - path_context->ll = jiffies; - path_context->ptr = (void *) path; - return path->path; /* Return opaque caller path */ + if (pi) { + pi->io_count += to_sector(bio->bi_size); + + io = mempool_alloc(lc->io_pool, GFP_NOIO); + io->start = jiffies; + io->pi = pi; + info->ptr = io; + return pi->path; } return NULL; @@ -264,42 +315,56 @@ /* Path end IO */ static void latency_endio(struct path_selector *ps, - struct buffer_head *bh, - int rw, int error, - struct path_info *path_context) + struct bio *bio, int error, + union map_info *info) { + struct latency_c *lc = (struct latency_c *) ps->context; unsigned long flags; jiffy_t j; - struct path_c *path = path_context->ptr; + struct io *io = (struct io *) info->ptr; + struct path_info *pi = io->pi; - BUG_ON(!path); + j = jiffies - io->start; - j = jiffies - path_context->ll; + spin_lock_irqsave(&pi->lock, flags); + pi->latency_tot += j; + pi->latency_count++; + spin_unlock_irqrestore(&pi->lock, flags); - /* Put heavy weight on long latencies */ - j *= j; - j *= j; - - spin_lock_irqsave(&path->lock, flags); - path->latency += j; - spin_unlock_irqrestore(&path->lock, flags); + mempool_free(io, lc->io_pool); } /* Path status */ -static int latency_status(void *context, status_type_t type, - char *result, unsigned int maxlen) +static int latency_status(struct path_selector *ps, + struct path *path, + status_type_t type, + char *result, unsigned int maxlen) { - struct path_c *path = - (struct path_c *) context; + struct latency_c *lc = (struct latency_c *) ps->context; + unsigned long flags; + struct path_info *pi; + + spin_lock_irqsave(&lc->lock, flags); + + pi = path_lookup(&lc->valid_paths, path); + if (!pi) + pi = path_lookup(&lc->invalid_paths, path); + + spin_unlock_irqrestore(&lc->lock, flags); + + if (!pi) { + DMWARN("unknown path"); + return -EINVAL; + } switch(type) { case STATUSTYPE_INFO: - snprintf(result, maxlen, "%llu ", path->latency); + snprintf(result, maxlen, "%llu ", pi->latency); break; case STATUSTYPE_TABLE: - snprintf(result, maxlen, "%d %d ", - path->priority, path->queue_min); + snprintf(result, maxlen, "%u " SECTOR_FORMAT " ", + pi->priority, pi->io_min); break; } --- diff/drivers/md/dm-mpath.c 2003-11-26 10:20:56.000000000 +0000 +++ source/drivers/md/dm-mpath.c 2003-11-26 10:21:02.000000000 +0000 @@ -11,6 +11,7 @@ #include "dm.h" #include "dm-daemon.h" +#include "dm-path-selector.h" #include #include @@ -20,355 +21,574 @@ #include #include #include -#include "dm-path-selector.h" -/* Multipath context */ -struct multipath_c { +/* Path properties */ +struct path { struct list_head list; - struct list_head paths; /* List of paths */ - struct list_head io_jobs; /* IO jobs */ - - struct path_selector ps; - struct dm_target *ti; + struct dm_dev *dev; + int fail_limit; - spinlock_t lock; /* Lock access to this context */ - atomic_t io_count; /* IOs in flight for this context */ + int has_failed; + jiffy_t fail_time; + atomic_t fail_count; + atomic_t fail_total; - unsigned int scrub_interval; /* Set in constructor */ + unsigned failback_interval; - atomic_t events; /* # if table events to throw */ - atomic_t suspended; /* device suspension */ + sector_t test_sector; }; -/* Multipath io job */ -struct path; -struct multipath_io { +/* Multipath context */ +struct multipath { struct list_head list; + struct dm_target *ti; + struct path_selector ps; - struct multipath_c *mc; - struct path *path; - - struct buffer_head *bh; - int rw; - - /* Path selector context between ps->type->select_path() - and ps->type->endio() */ - struct path_info path_context; -}; + struct list_head paths; + spinlock_t failed_lock; + struct bio *failed_ios; -/* Path flags */ -enum { - FAILED, - SCRUB_IO, + unsigned test_interval; +// atomic_t suspended; /* device suspension */ +// int throw_event; }; -/* Path properties */ -struct path { - struct list_head list; - - struct dm_dev *dev; - struct multipath_c *mc; /* Back pointer to multipath context */ - - unsigned long flags; /* See path flags above */ - - /* set in target ctr */ - int reactivation_interval; /* Automatic reactivation interval */ - int fail_max; /* Maximum failures allowed */ - - jiffy_t io_jiffies; /* Jiffies of last IO queued */ - atomic_t fail; /* actual failure count vs. fail_max */ - atomic_t fail_total; /* Total failures on this path */ - - void *ps_private; /* Opaque pointer to path selector object */ - unsigned long test_sector; /* Path scrubbing sector */ -}; +static struct path *alloc_path(void) +{ + struct path *path = kmalloc(sizeof(*path), GFP_KERNEL); -/* - * Various functions to set a single/all path(s) (in)operational, - * check if path(s) is/are operational and (un)fail a path, allocate - * and deallocate io job memory... - */ + if (path) { + memset(path, 0, sizeof(*path)); + atomic_set(&path->fail_count, 0); + atomic_set(&path->fail_total, 0); + } -/* Set/Rretrieve jiffies of last IO on this path */ -static inline void set_io_jiffies(struct path *path) -{ - path->io_jiffies = jiffies; + return path; } -static inline jiffy_t get_io_jiffies(struct path *path) +static inline void free_path(struct path *p) { - return path->io_jiffies; + kfree(p); } -/* "Queue" an event on a table in order to process - dm_table_event() calls in task context */ -static inline void queue_table_event(struct multipath_io *io) +static struct multipath *alloc_multipath(void) { - struct multipath_c *mc = (struct multipath_c *) io->mc; + struct multipath *m; - atomic_inc(&mc->events); -} + m = kmalloc(sizeof(*m), GFP_KERNEL); + if (m) { + memset(m, 0, sizeof(*m)); + INIT_LIST_HEAD(&m->paths); + } -/* Check path failed */ -static inline int is_failed(struct path *path) -{ - return test_bit(FAILED, &path->flags); + return m; } -/* Set a path to "failed" */ -static inline void set_failed(struct multipath_io *io) +static void free_multipath(struct multipath *m) { - struct path *path = io->path; - struct path_selector *ps = &path->mc->ps; + struct path_selector *ps; + struct path *path, *tmp; - if (is_failed(path)) + if (!m) return; - atomic_inc(&path->fail_total); - io->path->test_sector = io->bh->b_rsector; - ps->type->set_path_state(path->ps_private, 1); - queue_table_event(io); + ps = &m->ps; + + if (ps) { + ps->type->dtr(ps); + dm_put_path_selector(ps->type); + } + + list_for_each_entry_safe (path, tmp, &m->paths, list) { + list_del(&path->list); + dm_put_device(m->ti, path->dev); + free_path(path); + } + + kfree(m); } -/* Reset failure information on a path */ -static inline void reset_failures(struct path *path) +/*----------------------------------------------------------------- + * The multipath daemon is responsible for resubmitting failed ios. + *---------------------------------------------------------------*/ +static struct dm_daemon _kmpathd; + +static LIST_HEAD(_mpaths); +static spinlock_t _mpath_lock = SPIN_LOCK_UNLOCKED; + +static void dispatch_failed_ios(struct multipath *m) { - struct path_selector *ps = &path->mc->ps; + unsigned long flags; + struct bio *bio, *next_bio; - path->test_sector = 0; - atomic_set(&path->fail, path->fail_max); - clear_bit(FAILED, &path->flags); - ps->type->set_path_state(path->ps_private, 0); + spin_lock_irqsave(&m->failed_lock, flags); + bio = m->failed_ios; + m->failed_ios = NULL; + spin_unlock_irqrestore(&m->failed_lock, flags); + + while (bio) { + next_bio = bio->bi_next; + bio->bi_next = NULL; + generic_make_request(bio); + bio = next_bio; + } } -/* Reset a "failed" path - * (IOW: set it to operational so that it can be selected for IO submission) - */ -static inline void reset_failed(struct multipath_io *io) +/* Requeue error ios */ +static void do_ios(void) { - struct path *path = io->path; + struct multipath *m; - if (_is_failed(path)) { - reset_failures(path); - queue_table_event(io); - } + spin_lock(&_mpath_lock); + list_for_each_entry (m, &_mpaths, list) + dispatch_failed_ios(m); + spin_unlock(&_mpath_lock); + + blk_run_queues(); } -/* Scrub IO handling */ -static inline void reset_scrub_io(struct path *path) +/* Multipathd does this every time it runs, returns a sleep duration hint */ +static jiffy_t do_work(void) { - clear_bit(SCRUB_IO, &path->flags); + do_ios(); +// do_table_events(); +// return do_scrubbing(); + return 0; } +/*----------------------------------------------------------------- + * Constructor/argument parsing + * + * + * [ + * {num_ps_parms} + * ]{nr path parms} + *---------------------------------------------------------------*/ +struct param { + unsigned min; + unsigned max; + char *error; +}; -/* Scrub timeout calculation */ -static inline unsigned long get_reactivation_timeout(struct path *path) +#define ESTR(s) ("dm-multipath: " s) + +static int read_param(struct param *param, char *str, unsigned *v, char **error) { - return path->reactivation_interval * HZ; + if ((sscanf(str, "%u", v) != 1) || + (*v < param->min) || + (*v > param->max)) { + *error = param->error; + return -EINVAL; + } + + return 0; } -static inline unsigned long get_scrub_timeout(struct path *path) +static int parse_path(struct multipath *m, int argc, char **argv, struct dm_target *ti) { - return path->mc->scrub_interval * HZ; + /* path parameters */ + static struct param _params[] = { + {0, 1024, ESTR("invalid path reactivation interval")}, + {0, 1024, ESTR("invalid io failures")} + }; + + + int r; + struct path *p; + + p = alloc_path(); + if (!p) + return -ENOMEM; + + r = dm_get_device(ti, argv[0], ti->begin, ti->len, + dm_table_get_mode(ti->table), &p->dev); + if (r) { + ti->error = "dm-multipath: error getting device"; + goto bad; + } + + r = read_param(_params, argv[1], &p->failback_interval, &ti->error); + if (r) + goto bad; + + r = read_param(_params + 1, argv[2], &p->fail_limit, &ti->error); + if (r) + goto bad; + + r = m->ps.type->add_path(&m->ps, p, argc - 2, argv + 2, &ti->error); + if (r) + goto bad; + + list_add_tail(&p->list, &m->paths); + return 0; + + bad: + free_path(p); + return r; } -/* Calculate scrubbing sleep timeout for deamon */ -static inline int scrub_timeout(struct path *path, long *timeout) +#define MIN_PARMS 5 +static int multipath_ctr(struct dm_target *ti, unsigned int argc, + char **argv) { - int ret = 0; - jiffy_t j = get_io_jiffies(path); - jiffy_t t = is_failed(path) ? get_reactivation_timeout(path) : - get_scrub_timeout(path); + /* target parameters */ + static struct param _params[] = { + {2, 1024, ESTR("invalid number of paths")}, + {2, 32, ESTR("invalid number of path parameters")}, + {1, 24*60*60, ESTR("invalid path test interval")}, + {0, 1024, ESTR("invalid path selector parameters")} + }; - if (t) { - /* Jiffies wrap around check */ - if (jiffies < j) { - *timeout = HZ; - set_io_jiffies(path); - return 1; - } + int r, i; + struct multipath *m; + struct path_selector_type *pst; + unsigned nr_paths, nr_params, nr_ps_params; - j += t; - if (jiffies < j) - j -= jiffies; - else { - j = t; - ret = 1; + /* Check minimum argument count */ + if (argc < MIN_PARMS) { + ti->error = ESTR("not enough arguments"); + return -EINVAL; + } + + m = alloc_multipath(); + if (!m) { + ti->error = ESTR("can't allocate multipath context"); + return -EINVAL; + } + + r = read_param(_params, argv[0], &nr_paths, &ti->error); + if (r) + goto bad; + + /* there must be at least 2 paths */ + if (nr_paths < 2) { + ti->error = ESTR("not enough paths"); + goto bad; + } + + r = read_param(_params + 1, argv[1], &nr_params, &ti->error); + if (r) + goto bad; + + if (nr_params != 2) { + ti->error = ESTR("invalid number of path args"); + goto bad; + } + + r = read_param(_params + 2, argv[2], &m->test_interval, &ti->error); + if (r) + goto bad; + + pst = dm_get_path_selector(argv[3]); + if (!pst) { + ti->error = ESTR("unknown path selector type"); + goto bad; + } + + r = pst->ctr(&m->ps); + if (r) { + /* FIXME: put the pst ? */ + goto bad; + } + + r = read_param(_params + 3, argv[4], &nr_ps_params, &ti->error); + if (r) + goto bad; + + /* Loop through all paths parsing their parameters */ + argc -= 5; argv += 5; + nr_params += nr_ps_params + 1; + for (i = 0; i < nr_paths; i++) { + + if (argc < nr_params) { + ti->error = ESTR("insufficient arguments"); + goto bad; } - if (*timeout > j) - *timeout = (long) j; + r = parse_path(m, nr_params, argv, ti); + if (r) + goto bad; + + argc -= nr_params; argv += nr_params; } - return ret; + ti->private = m; + m->ti = ti; + + spin_lock(&_mpath_lock); + list_add(&_mpaths, &m->list); + spin_unlock(&_mpath_lock); + + return 0; + + bad: + free_multipath(m); + return -EINVAL; } -/* Push a job onto the tail of a job queue */ -static inline void push(struct list_head *joblist, - struct list_head *job, - spinlock_t *lock) +/* Destruct a multipath mapping */ +static void multipath_dtr(struct dm_target *ti) { - unsigned long flags; + struct multipath *m = (struct multipath *) ti->private; - spin_lock_irqsave(lock, flags); - list_add_tail(job, joblist); - spin_unlock_irqrestore(lock, flags); +// wait_for_scrub_ios(m); + spin_lock(&_mpath_lock); + list_del(&m->list); + spin_unlock(&_mpath_lock); + + free_multipath(m); } -/* Pop an IO job off a job queue */ -static inline struct multipath_io *pop(struct list_head *jobs, - spinlock_t *lock) +/* Set a path to "failed" */ +static inline void set_failed(struct path_selector *ps, struct path *path, sector_t sector) { - unsigned long flags; - struct multipath_io *io; + if (path->has_failed) + return; - spin_lock_irqsave(lock, flags); - if (list_empty(jobs)) - io = NULL; - else { - io = list_entry(jobs->next, struct multipath_io, list); - list_del(jobs->next); - } - spin_unlock_irqrestore(lock, flags); + /* FIXME: need locking ? */ + path->fail_time = jiffies; + atomic_inc(&path->fail_total); + path->test_sector = sector; + ps->type->set_path_state(ps, path, 0); +// queue_table_event(io); +} - return io; +/* + * Only called on the slow, error path. + */ +static struct path *find_path(struct multipath *m, struct block_device *bdev) +{ + struct path *p; + + list_for_each_entry(p, &m->paths, list) + if (p->dev->bdev == bdev) + return p; + + return NULL; } +static int multipath_end_io(struct dm_target *ti, struct bio *bio, + int error, union map_info *map_context) +{ + struct multipath *m = (struct multipath *) ti->private; + struct path_selector *ps = &m->ps; + struct path_selector_type *pst = ps->type; + ps_endio_fn endio = pst->endio; + unsigned long flags; + + if (error) { + struct path *path = find_path(m, bio->bi_bdev); -/*----------------------------------------------------------------- - * IO job allocation/deallocation - *---------------------------------------------------------------*/ + if (atomic_dec_and_test(&path->fail_count)) + set_failed(ps, path, bio->bi_sector); -/* Slab for the io jobs */ -static kmem_cache_t *_multipath_cache; -mempool_t *_multipath_pool; + /* choose a new path */ + path = pst->select_path(ps, bio, map_context); + if (path) { + bio->bi_bdev = path->dev->bdev; + spin_lock_irqsave(&m->failed_lock, flags); + bio->bi_next = m->failed_ios; + m->failed_ios = bio; + spin_unlock_irqrestore(&m->failed_lock, flags); -static int ios = 0; -#define DEFAULT_IOS 256 -#define MIN_IOS 16 -#define MAX_IOS 32768 /* maximum on 32 bit hw with mempool_create */ + dm_daemon_wake(&_kmpathd); + return 1; /* io not complete */ + } + } + + if (endio) + endio(ps, bio, error, map_context); + + return 0; /* io complete */ +} -static inline struct multipath_io *alloc_io(void) +/* Suspend */ +static void multipath_suspend(struct dm_target *ti) { - return mempool_alloc(_multipath_pool, GFP_NOIO); + struct multipath *m = (struct multipath *) ti->private; + + //atomic_set(&m->suspended, 1); + //wait_for_scrub_ios(m); } -static inline void free_io(struct multipath_io *io) +/* Resume */ +static void multipath_resume(struct dm_target *ti) { - mempool_free(io, _multipath_pool); + struct multipath *m = (struct multipath *) ti->private; + + //atomic_set(&m->suspended, 0); + dm_daemon_wake(&_kmpathd); } -/* Multipath context allocation */ -static inline struct multipath_c *alloc_context(void) +/* Multipath mapping */ +static int multipath_map(struct dm_target *ti, struct bio *bio, + union map_info *map_context) { - struct multipath_c *mc = kmalloc(sizeof(*mc), GFP_KERNEL); + struct multipath *m = (struct multipath *) ti->private; + struct path *path; - if (mc) { - memset(mc, 0, sizeof(*mc)); - INIT_LIST_HEAD(&mc->io_jobs); - INIT_LIST_HEAD(&mc->paths); - mc->lock = SPIN_LOCK_UNLOCKED; - atomic_set(&mc->io_count, 0); - atomic_set(&mc->events, 0); - atomic_set(&mc->suspended, 0); - } + /* Ask path selector for a path */ + path = m->ps.type->select_path(&m->ps, bio, map_context); + if (!path) + return -1; /* No valid path found */ - return mc; + bio->bi_bdev = path->dev->bdev; + return 1; } -/* Path context allocation */ -static inline struct path *alloc_path(void) +/* Multipath status */ +static int multipath_status(struct dm_target *ti, status_type_t type, + char *result, unsigned int maxlen) { - struct path *path = kmalloc(sizeof(*path), GFP_KERNEL); + return 0; +} - if (path) { - memset(path, 0, sizeof(*path)); - atomic_set(&path->fail_total, 0); +/*----------------------------------------------------------------- + * Module setup + *---------------------------------------------------------------*/ +static struct target_type multipath_target = { + .name = "multipath", + .module = THIS_MODULE, + .ctr = multipath_ctr, + .dtr = multipath_dtr, + .map = multipath_map, + .end_io = multipath_end_io, + .suspend = multipath_suspend, + .resume = multipath_resume, + .status = multipath_status, +}; + +int __init dm_multipath_init(void) +{ + int r; + + r = dm_register_target(&multipath_target); + if (r < 0) { + DMERR("%s: register failed %d", multipath_target.name, r); + return -EINVAL; } - return path; + r = dm_register_path_selectors(); + if (r && r != -EEXIST) { + dm_unregister_target(&multipath_target); + return r; + } + + r = dm_daemon_start(&_kmpathd, "kpathd", do_work); + if (r) { + dm_unregister_path_selectors(); + dm_unregister_target(&multipath_target); + } else + DMINFO("dm_multipath v0.2.0"); + + return r; } -static void free_context(struct multipath_c *mc) +void __exit dm_multipath_exit(void) { - struct list_head *elem, *tmp; - struct path_selector *ps = &mc->ps; + int r; - if (!mc) - return; + dm_daemon_stop(&_kmpathd); + dm_unregister_path_selectors(); + r = dm_unregister_target(&multipath_target); + if (r < 0) + DMERR("%s: target unregister failed %d", + multipath_target.name, r); +} + +/* Module hooks */ +module_init(dm_multipath_init); +module_exit(dm_multipath_exit); + +MODULE_DESCRIPTION(DM_NAME " multipath target"); +MODULE_AUTHOR("Heinz Mauelshagen "); +MODULE_LICENSE("GPL"); - ps->type->dtr(ps); - dm_put_path_selector(ps->type); - list_for_each_safe(elem, tmp, &mc->paths) { - struct path *path = list_entry(elem, struct path, list); - list_del(elem); - if (path->dev) - dm_put_device(mc->ti, path->dev); - kfree(path); - } - kfree(mc); -} -/*----------------------------------------------------------------- - * The multipath daemon is responsible for periodically - * retestings failed paths and resubmitting failed ios. - *---------------------------------------------------------------*/ -static struct dm_daemon _kmultipathd; -static LIST_HEAD(_mc_jobs); -static spinlock_t _job_lock = SPIN_LOCK_UNLOCKED; -/* Submit an IO and store the IO timestamp */ -static inline void make_request(struct multipath_io *io) +#ifdef SCRUB_STUFF +/* Reset failure information on a path */ +static inline void reset_failures(struct path *path) { - set_io_jiffies(io->path); - generic_make_request(io->rw, io->bh); + struct path_selector *ps = &path->m->ps; + + path->test_sector = 0; + atomic_set(&path->fail, path->fail_limit); + clear_bit(FAILED, &path->flags); + ps->type->set_path_state(path->ps_private, 0); } -/* Requeue error ios */ -static inline void do_ios(void) +/* Reset a "failed" path + * (IOW: set it to operational so that it can be selected for IO submission) + */ +static void reset_failed(struct multipath_io *io) { - unsigned long flags; - struct multipath_c *mc; - struct multipath_io *io; + struct path *path = io->path; - spin_lock_irqsave(&_job_lock, flags); - list_for_each_entry(mc, &_mc_jobs, list) { - while ((io = pop(&mc->io_jobs, &mc->lock))) - make_request(io); + if (is_failed(path)) { + reset_failures(path); + queue_table_event(io); } - spin_unlock_irqrestore(&_job_lock, flags); +} - run_task_queue(&tq_disk); +/* Scrub IO handling */ +static inline void reset_scrub_io(struct path *path) +{ + clear_bit(SCRUB_IO, &path->flags); } -/* Work all table events thrown */ -static inline void do_table_events(void) + +/* Scrub timeout calculation */ +static inline unsigned long get_reactivation_timeout(struct path *path) { - unsigned long flags; - struct multipath_c *mc; + return path->reactivation_interval * HZ; +} - /* FIXME: optimize this in case no events need to be thrown - (which is most of the time) */ - spin_lock_irqsave(&_job_lock, flags); - list_for_each_entry(mc, &_mc_jobs, list) { - /* Throw all events queued */ - while (atomic_read(&mc->events)) { - dm_table_event(mc->ti->table); - atomic_dec(&mc->events); +static inline unsigned long get_scrub_timeout(struct path *path) +{ + return path->m->scrub_interval * HZ; +} + +/* Calculate scrubbing sleep timeout for deamon */ +static int scrub_timeout(struct path *path, long *timeout) +{ + int ret = 0; + jiffy_t j = get_fail_time(path); + jiffy_t t = is_failed(path) ? get_reactivation_timeout(path) : + get_scrub_timeout(path); + + if (t) { + /* Jiffies wrap around check */ + if (jiffies < j) { + *timeout = HZ; + set_fail_time(path); + return 1; + } + + j += t; + if (jiffies < j) + j -= jiffies; + else { + j = t; + ret = 1; } + + if (*timeout > j) + *timeout = (long) j; } - spin_unlock_irqrestore(&_job_lock, flags); + + return ret; } /* Allocate a scrubing IO buffer_head and page */ -static inline struct buffer_head *alloc_scrub_bh(void) +static struct buffer_head *alloc_scrub_bh(void) { struct buffer_head *bh = kmalloc(sizeof(*bh), GFP_NOIO); @@ -393,7 +613,7 @@ } /* Free a scrubing IO page and buffer_head */ -static inline void free_scrub_bh(struct buffer_head *bh) +static void free_scrub_bh(struct buffer_head *bh) { UnlockPage(bh->b_page); __free_page(bh->b_page); @@ -403,22 +623,20 @@ /* Scrubbing end IO function */ static void multipath_scrub_end_io(struct buffer_head *bh, int uptodate) { - struct multipath_io *io = (struct multipath_io *) bh->b_private; - struct multipath_c *mc = (struct multipath_c *) io->mc; + struct multipath *m = (struct multipath *) io->m; if (uptodate) { unsigned long flags; - spin_lock_irqsave(&mc->lock, flags); + spin_lock_irqsave(&m->lock, flags); reset_failed(io); - spin_unlock_irqrestore(&mc->lock, flags); + spin_unlock_irqrestore(&m->lock, flags); dm_daemon_wake(&_kmultipathd); } reset_scrub_io(io->path); free_scrub_bh(io->bh); - free_io(io); } /* @@ -430,7 +648,7 @@ * 1: scrub IO queued * */ -static inline int queue_scrub_io(struct path *path) +static int queue_scrub_io(struct path *path) { struct multipath_io *io; struct buffer_head *bh; @@ -442,16 +660,10 @@ if (!bh) goto retry; /* just retry later */ - /* Setup io */ - io = alloc_io(); - - io->mc = path->mc; - io->path = path; - io->bh = bh; - io->rw = READ; - - /* no need to set b_dev, b_blocknr, b_count - or initialize the wait queue here */ + /* + * No need to set b_dev, b_blocknr, b_count or initialize + * the wait queue here. + */ bh->b_rdev = path->dev->dev; bh->b_rsector = path->test_sector; bh->b_end_io = multipath_scrub_end_io; @@ -473,341 +685,105 @@ * Check if paths need to get a test io queued either for * automatic failure recovery or scrubbing of idle paths. */ -static inline long do_scrubbing(void) +static long do_scrubbing(void) { unsigned long flags; long timeout = MAX_SCHEDULE_TIMEOUT; - struct multipath_c *mc; + struct multipath *m; /* FIXME: optimize this in case no scrubbing is needed */ - spin_lock_irqsave(&_job_lock, flags); - list_for_each_entry(mc, &_mc_jobs, list) { + spin_lock_irqsave(&_mpath_lock, flags); + list_for_each_entry (m, &_mpaths, list) { struct path *path; - /* Don't scrub suspended mcs */ - if (atomic_read(&mc->suspended)) + /* Don't scrub suspended ms */ + if (atomic_read(&m->suspended)) continue; - list_for_each_entry(path, &mc->paths, list) { + list_for_each_entry (path, &m->paths, list) { if (scrub_timeout(path, &timeout)) queue_scrub_io(path); } } - spin_unlock_irqrestore(&_job_lock, flags); + spin_unlock_irqrestore(&_mpath_lock, flags); return timeout; } -/* Multipathd does this every time it runs, returns a sleep duration hint */ -static inline jiffy_t do_work(void) +static void wait_for_scrub_ios(struct multipath *m) { - do_ios(); - do_table_events(); - return do_scrubbing(); -} - - -/*----------------------------------------------------------------- - * Constructor/argument parsing - *---------------------------------------------------------------*/ - -#define ARG_FORMAT "%d" - -/* range checks for target definition in _get_path() */ -#define PARM_MIN 0 /* mininum parameters */ -#define PARM_MAX 1024 /* maximum " */ - -#define PATH_PARM_MIN 2 /* min path parameters */ -#define PATH_PARM_MAX 2 /* max " */ - -#define SCRUB_MIN 1 /* min scrubbing interval in seconds */ -#define SCRUB_MAX 24*60*60 /* max " */ - -/* Path flags */ -#define PATHS_MIN 2 /* min number of paths */ -#define PATHS_MAX 1024 /* max " */ - -#define xx(av, a, s, c, v) \ - if (sscanf(av[a], ARG_FORMAT, &tmp) != 1 || \ - tmp < c ## _MIN || \ - tmp > c ## _MAX) { \ - _free_context(mc); \ - ti->error = "dm-multipath: Invalid " s; \ - return -EINVAL; \ - } \ - v = tmp; - -/* - * Parse a - * - * - * - * [ - * - * {2,num_paths}] - * - * parameter set and construct a multipath context - * - */ -#define MIN_PARMS 5 -static int multipath_ctr(struct dm_target *ti, unsigned int argc, - char **argv) -{ - int a, parms, paths, path_parms, scrub_interval, ps_parms, tmp; - char **av; - struct multipath_c *mc = NULL; - struct path_selector_type *pst; struct path *path; - if (argc < MIN_PARMS) /* Check minimum argument count */ - goto bad_parms; - - xx(argv, 0, "number of paths", PATHS, paths); - if (paths < 2) - goto bad_paths; - xx(argv, 1, "number of path parameters", PATH_PARM, path_parms); - xx(argv, 2, "path scrubbing interval", SCRUB, scrub_interval); - xx(argv, 4, "path selector parameters", PARM, ps_parms); - - parms = path_parms + ps_parms; - if (MIN_PARMS + paths * parms != argc) - goto bad_parms; - - mc = alloc_context(); - if (!mc) - goto bad_context; - - pst = dm_get_path_selector(argv[3]); - if (!pst) - goto bad_ps; - - if (pst->ctr(&mc->ps, 0, NULL, &ti->error)) - goto bad_ps_ctr; - - mc->scrub_interval = scrub_interval; - - /* Loop through all paths parsing their parameters */ - av = &argv[MIN_PARMS]; - for (a = MIN_PARMS; a < argc; a += parms, av += parms) { - void *path_c; - - path = alloc_path(); - if (!path) - goto bad_alloc_path; - - /* Add path to the list first, so that _free_context() - is able to free it on error */ - list_add_tail(&path->list, &mc->paths); - - xx(av, 1, "path reactivation interval", PARM, - path->reactivation_interval); - xx(av, 2, "maximum path failures", PARM, path->fail_max); - - if (dm_get_device(ti, av[0], ti->begin, ti->len, - dm_table_get_mode(ti->table), &path->dev)) - goto bad_dm_get_device; - - path_c = mc->ps.type->add_path(&mc->ps, path, - ps_parms, &av[3], &ti->error); - if (!path_c) - goto bad_ps_add; - - path->ps_private = path_c; - path->mc = mc; - reset_failures(path); - } - - ti->private = mc; - ti->error = NULL; - mc->ti = ti; - push(&_mc_jobs, &mc->list, &_job_lock); - - return 0; - -bad_parms: - ti->error = "dm-multipath: not enough arguments"; - return -EINVAL; - -bad_paths: - ti->error = "dm-multipath: not enough paths"; - return -EINVAL; - -bad_context: - ti->error = "dm-multipath: can't allocate multipath context"; - return -ENOMEM; - -bad_ps: - free_context(mc); - ti->error = "dm-multipath: invalid path selector"; - return -EINVAL; - -bad_ps_ctr: - free_context(mc); - ti->error = "dm-multipath: error path selector constructor"; - return -ENXIO; - -bad_alloc_path: - free_context(mc); - ti->error = "dm-multipath: can't allocate path context"; - return -ENOMEM; - -bad_dm_get_device: - free_context(mc); - ti->error = "dm-multipath: error getting device"; - return -ENXIO; - -bad_ps_add: - free_context(mc); - ti->error = "dm-multipath: error add path"; - return -ENXIO; -} -#undef xx - -static void wait_for_scrub_ios(struct multipath_c *mc) -{ - struct path *path; - - list_for_each_entry(path, &mc->paths, list) { + list_for_each_entry (path, &m->paths, list) { while (test_bit(SCRUB_IO, &path->flags)) schedule_timeout(HZ / 2); } } -static inline void remove_mc_job(struct multipath_c *mc) -{ - unsigned long flags; - struct multipath_c *mc_tmp; - spin_lock_irqsave(&_job_lock, flags); - list_for_each_entry(mc_tmp, &_mc_jobs, list) { - if (mc == mc_tmp) { - list_del(&mc->list); - break; - } - } - spin_unlock_irqrestore(&_job_lock, flags); -} +#endif -/* Destruct a multipath mapping */ -static void multipath_dtr(struct dm_target *ti) -{ - struct multipath_c *mc = (struct multipath_c *) ti->private; - wait_for_scrub_ios(mc); - remove_mc_job(mc); - free_context(mc); -} -static inline void map(struct multipath_io *io, struct path *path) -{ - io->path = path; - io->bh->b_rdev = path->dev->dev; - set_io_jiffies(path); -} -static int multipath_end_io(struct dm_target *ti, struct buffer_head *bh, - int rw, int error, union map_info *map_context) +#ifdef EVENT_STUFF +/* "Queue" an event on a table in order to process + dm_table_event() calls in task context */ +static inline void queue_table_event(struct multipath_io *io) { - int r = 0; - struct multipath_io *io = (struct multipath_io *) map_context->ptr; - struct multipath_c *mc = (struct multipath_c *) io->mc; - struct path_selector *ps = &mc->ps; - struct path *path = io->path; - struct path_selector_type *pst = ps->type; - ps_endio_fn ps_endio = pst->endio; - - if (error) { - if (atomic_dec_and_test(&path->fail)) - set_failed(io); - - path = pst->select_path(ps, io->bh, io->rw, &io->path_context); - if (path) { - /* Map the IO to this new path */ - map(io, path); - push(&mc->io_jobs, &io->list, &mc->lock); - dm_daemon_wake(&_kmultipathd); + struct multipath *m = (struct multipath *) io->m; - return 1; /* Handle later */ - } - } - - /* Call path selector end IO method if registered */ - if (ps_endio) - ps_endio(ps, io->bh, io->rw, error, &io->path_context); - - free_io(io); - - return r; + atomic_inc(&m->events); } -/* Suspend */ -static void multipath_suspend(struct dm_target *ti) +/* Work all table events thrown */ +static void do_table_events(void) { - struct multipath_c *mc = (struct multipath_c *) ti->private; - - atomic_set(&mc->suspended, 1); - wait_for_scrub_ios(mc); -} + unsigned long flags; + struct multipath *m; -/* Resume */ -static void multipath_resume(struct dm_target *ti) -{ - struct multipath_c *mc = (struct multipath_c *) ti->private; + /* FIXME: optimize this in case no events need to be thrown + (which is most of the time) */ + spin_lock_irqsave(&_mpath_lock, flags); + list_for_each_entry (m, &_mpaths, list) { - atomic_set(&mc->suspended, 0); - dm_daemon_wake(&_kmultipathd); + /* Throw all events queued */ + while (atomic_read(&m->events)) { + dm_table_event(m->ti->table); + atomic_dec(&m->events); + } + } + spin_unlock_irqrestore(&_mpath_lock, flags); } -/* Multipath mapping */ -static int multipath_map(struct dm_target *ti, struct buffer_head *bh, - int rw, union map_info *map_context) -{ - struct multipath_c *mc = (struct multipath_c *) ti->private; - struct path_selector *ps = &mc->ps; - struct multipath_io *io = alloc_io(); - struct path *path; - - /* Ask path selector for a path */ - path = ps->type->select_path(ps, bh, rw, &io->path_context); - if (!path) { /* No valid path found */ - free_io(io); - return -1; /* Error */ - } - io->mc = mc; - io->bh = bh; - io->rw = rw; +#endif - map(io, path); /* Map the IO to this path */ - map_context->ptr = (void *) io; /* Save for multipath_end_io() */ - return 1; /* Normal map */ -} +#ifdef STATUS_FOO -/* Multipath status */ -static int multipath_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) -{ int sz = 0; - struct multipath_c *mc = (struct multipath_c *) ti->private; + struct multipath *m = (struct multipath *) ti->private; struct path *path; - ps_status_fn ps_status = mc->ps.type->status; + ps_status_fn ps_status = m->ps.type->status; switch (type) { case STATUSTYPE_INFO: - list_for_each_entry(path, &mc->paths, list) { + list_for_each_entry (path, &m->paths, list) { sz += snprintf(result + sz, maxlen - sz, "%s ", dm_kdevname(to_kdev_t(path->dev->dev))); - if (_is_failed(path)) { + if (is_failed(path)) { struct timespec fail; - jiffies_to_timespec(jiffies - path->io_jiffies, &fail); - sz += snprintf(result + sz, maxlen - sz, "I(%lu/" ARG_FORMAT ")", (unsigned long) fail.tv_sec, path->reactivation_interval); + jiffies_to_timespec(jiffies - path->fail_time, &fail); + sz += snprintf(result + sz, maxlen - sz, "I(%lu/" ARG_FORMAT ")", + (unsigned long) fail.tv_sec, path->reactivation_interval); } else { sz += snprintf(result + sz, maxlen - sz, "O"); if (atomic_read(&path->fail_total)) - sz += snprintf(result + sz, maxlen - sz, "[" ARG_FORMAT "]", atomic_read(&path->fail_total)); + sz += snprintf(result + sz, maxlen - sz, "[" ARG_FORMAT "]", + atomic_read(&path->fail_total)); sz += snprintf(result + sz, maxlen - sz, " "); } @@ -821,12 +797,12 @@ break; case STATUSTYPE_TABLE: - list_for_each_entry(path, &mc->paths, list) { + list_for_each_entry (path, &m->paths, list) { sz += snprintf(result + sz, maxlen - sz, "%s " ARG_FORMAT ARG_FORMAT " ", dm_kdevname(to_kdev_t(path->dev->dev)), path->reactivation_interval, - path->fail_max); + path->fail_limit); if (ps_status) { ps_status(path->ps_private, type, @@ -842,96 +818,4 @@ } return 0; -} - -static struct target_type multipath_target = { - .name = "multipath", - .module = THIS_MODULE, - .ctr = multipath_ctr, - .dtr = multipath_dtr, - .map = multipath_map, - .end_io = multipath_end_io, - .suspend = multipath_suspend, - .resume = multipath_resume, - .status = multipath_status, -}; - -int __init dm_multipath_init(void) -{ - int r = -EINVAL; - - if (!ios) - ios = DEFAULT_IOS; - else if (ios < MIN_IOS || ios > MAX_IOS) - goto bad; - - r = -ENOMEM; - - /* Create multipath io slab */ - _multipath_cache = kmem_cache_create("dm multipath io", - sizeof(struct multipath_io), - 0, 0, NULL, NULL); - if (!_multipath_cache) - goto bad; - - /* Create multipath io mempool */ - _multipath_pool = mempool_create(ios, mempool_alloc_slab, - mempool_free_slab, - _multipath_cache); - if (!_multipath_pool) - goto bad_pool; - - r = dm_register_target(&multipath_target); - if (r < 0) { - DMERR("%s: register failed %d", multipath_target.name, r); - goto bad_target; - } - - r = dm_register_path_selectors(); - if (r && r != -EEXIST) - goto bad_ps; - - r = dm_daemon_start(&_kmultipathd, "kmultipathd", do_work); - if (!r) { - DMINFO("dm_multipath v0.2.0 (%d io contexts preallocated)", - ios); - return 0; - } - -bad_ps: - dm_unregister_target(&multipath_target); - -bad_target: - mempool_destroy(_multipath_pool); - -bad_pool: - kmem_cache_destroy(_multipath_cache); - -bad: - return r; -} - -void __exit dm_multipath_exit(void) -{ - int r; - - dm_daemon_stop(&_kmultipathd); - dm_unregister_path_selectors(); - r = dm_unregister_target(&multipath_target); - if (r < 0) - DMERR("%s: target unregister failed %d", - multipath_target.name, r); - - mempool_destroy(_multipath_pool); - kmem_cache_destroy(_multipath_cache); -} - -/* Module hooks */ -module_init(dm_multipath_init); -module_exit(dm_multipath_exit); - -MODULE_DESCRIPTION(DM_NAME " multipath target"); -MODULE_AUTHOR("Heinz Mauelshagen "); -MODULE_LICENSE("GPL"); -MODULE_PARM(ios, "i"); -MODULE_PARM_DESC(ios, "number of preallocated io contexts"); +#endif --- diff/drivers/md/dm-null-ps.c 2003-11-26 10:20:39.000000000 +0000 +++ source/drivers/md/dm-null-ps.c 2003-11-26 10:21:02.000000000 +0000 @@ -21,16 +21,14 @@ struct null_c { spinlock_t lock; - struct list_head paths; /* List of operational paths */ - struct list_head failed_paths; /* List of failed paths */ + struct list_head valid_paths; + struct list_head invalid_paths; }; -/* Path info */ -struct path_c { - struct list_head list; /* Linked list to null_c */ - - struct path *path; /* Opaque pointer to caller path info */ - struct null_c *nc; /* Back pointer path selector context */ +/* We keep the paths on linked lists */ +struct path_list { + struct list_head list; + struct path *path; }; /* Allocate null context */ @@ -39,57 +37,34 @@ struct null_c *nc = kmalloc(sizeof(*nc), GFP_KERNEL); if (nc) { - INIT_LIST_HEAD(&nc->paths); - INIT_LIST_HEAD(&nc->failed_paths); + INIT_LIST_HEAD(&nc->valid_paths); + INIT_LIST_HEAD(&nc->invalid_paths); nc->lock = SPIN_LOCK_UNLOCKED; } return nc; } -/* Allocate path context */ -static struct path_c *alloc_path_c(void) -{ - struct path_c *pc = kmalloc(sizeof(*pc), GFP_KERNEL); - - if (pc) - memset(pc, 0, sizeof(*pc)); - - return pc; -} - - /* Path selector constructor */ -static int null_ctr(struct path_selector *ps, - int argc, char **argv, char **error) +static int null_ctr(struct path_selector *ps) { struct null_c *nc; - if (argc) { - *error = "null path selector: No arguments allowed"; - return -EINVAL; - } - nc = alloc_null_c(); - if (!nc) { - *error = "null path selector: Error allocating context"; + if (!nc) return -ENOMEM; - } - ps->context = (void *) nc; + ps->context = nc; return 0; } static void free_paths(struct list_head *paths) { - struct list_head *elem, *tmp; + struct path_list *pl, *next; - list_for_each_safe(elem, tmp, paths) { - struct path_c *path = - list_entry(elem, struct path_c, list); - - list_del(elem); - kfree(path); + list_for_each_entry_safe (pl, next, paths, list) { + list_del(&pl->list); + kfree(pl); } } @@ -97,68 +72,94 @@ static void null_dtr(struct path_selector *ps) { struct null_c *nc = (struct null_c *) ps->context; - free_paths(&nc->paths); - free_paths(&nc->failed_paths); + free_paths(&nc->valid_paths); + free_paths(&nc->invalid_paths); kfree(nc); } /* Path add context */ -static void *null_add_path(struct path_selector *ps, struct path *path, - int argc, char **argv, char **error) +static int null_add_path(struct path_selector *ps, struct path *path, + int argc, char **argv, char **error) { struct null_c *nc = (struct null_c *) ps->context; - struct path_c *pc; + struct path_list *pl; if (argc) { - *error = "null path selector: No path arguments allowd"; - return NULL; + *error = "null path selector: No path arguments allowed"; + return -EINVAL; } - pc = alloc_path_c(); - if (!pc) { + pl = kmalloc(sizeof(*pl), GFP_KERNEL); + if (!pl) { *error = "null path selector: Error allocating path context"; - return NULL; + return -ENOMEM; } - pc->path = path; - pc->nc = nc; + pl->path = path; spin_lock(&nc->lock); - list_add_tail(&pc->list, &nc->paths); + list_add_tail(&pl->list, &nc->valid_paths); spin_unlock(&nc->lock); - return (void *) pc; + return 0; } -/* Path set state (state = 0 : operational; state != 0 : failed */ -static void null_set_path_state(void *ps_private, unsigned long state) +/* + * Search a list for a particular path. + */ +static struct path_list *__find_path(struct list_head *head, struct path *p) +{ + struct path_list *pl; + + list_for_each_entry (pl, head, list) + if (pl->path == p) + return pl; + + return NULL; +} + +static void null_set_path_state(struct path_selector *ps, + struct path *p, int valid) { unsigned long flags; - struct path_c *path = (struct path_c *) ps_private; - struct null_c *nc = path->nc; + struct null_c *nc = (struct null_c *) ps->context; + struct path_list *pl; + /* + * This function will be called infrequently so we don't + * mind the expense of these searches. + */ spin_lock_irqsave(&nc->lock, flags); - list_move_tail(&path->list, state ? &nc->failed_paths : &nc->paths); + pl = __find_path(&nc->valid_paths, p); + if (!pl) + pl = __find_path(&nc->invalid_paths, p); + + if (!pl) + DMWARN("asked to change the state of an unknown path"); + + else + list_move_tail(&pl->list, valid ? + &nc->valid_paths : &nc->invalid_paths); + spin_unlock_irqrestore(&nc->lock, flags); } /* Path selector */ static struct path *null_select_path(struct path_selector *ps, struct bio *bio, - struct path_info *path_context) + union map_info *map_context) { unsigned long flags; struct null_c *nc = (struct null_c *) ps->context; - struct list_head *list = &nc->paths; - struct path_c *path = NULL; + struct list_head *list = &nc->valid_paths; + struct path_list *pl = NULL; spin_lock_irqsave(&nc->lock, flags); if (!list_empty(list)) - path = list_entry(list->next, struct path_c, list); + pl = list_entry(list->next, struct path_list, list); spin_unlock_irqrestore(&nc->lock, flags); - /* Return opaque pointer to caller path object or NULL */ - return path ? path->path : NULL; + return pl ? pl->path : NULL; } static struct path_selector_type null_ps = { --- diff/drivers/md/dm-path-selector.h 2003-11-26 10:20:39.000000000 +0000 +++ source/drivers/md/dm-path-selector.h 2003-11-26 10:21:02.000000000 +0000 @@ -12,12 +12,14 @@ #ifndef DM_PATH_SELECTOR_H #define DM_PATH_SELECTOR_H +#include + struct path; -struct path_info { - void *ptr; - unsigned long long ll; -}; +/* + * We provide an abstraction for the code that chooses which path + * to send some io down. + */ struct path_selector_type; struct path_selector { struct path_selector_type *type; @@ -27,55 +29,53 @@ /* * Constructs a path selector object, takes custom arguments */ -typedef int (*ps_ctr_fn) (struct path_selector *ps, - int argc, char **argv, - char **error); -typedef void (*ps_dtr_fn) (struct path_selector *ps); +typedef int (*ps_ctr_fn) (struct path_selector *ps); +typedef void (*ps_dtr_fn) (struct path_selector *ps); /* * Add an opaque path object, along with some selector specific * path args (eg, path priority). */ -/* - * FIXME: what is this returning ? */ -typedef void * (*ps_add_path_fn) (struct path_selector *ps, - struct path *path, - int argc, char **argv, char **error); +typedef int (*ps_add_path_fn) (struct path_selector *ps, + struct path *path, + int argc, char **argv, char **error); /* * Chooses a path for this io, if no paths are available then - * NULL will be returned. Can take path_info over to ps_endio_fn below. + * NULL will be returned. The selector may set the map_info + * object if it wishes, this will be fed back into the endio fn. * * Must ensure that _any_ dynamically allocated selection context is * reused or reallocated because an endio call (which needs to free it) * might happen after a couple of select calls. */ -typedef struct path * (*ps_select_path_fn) (struct path_selector *ps, - struct bio *bio, - struct path_info *path_context); +typedef struct path *(*ps_select_path_fn) (struct path_selector *ps, + struct bio *bio, + union map_info *map_context); /* * Hook the end of the io, path throughput/failure can be - * detected through this. Must ensure, that any dynamically allocted + * detected through this. Must ensure, that any dynamically allocated * IO context gets freed. */ -typedef void (*ps_endio_fn) (struct path_selector *ps, - struct bio *bio, int error, - struct path_info *path_context); +typedef void (*ps_endio_fn) (struct path_selector *ps, + struct bio *bio, int error, + union map_info *map_context); /* - * Set path state (eg, failed/operational) + * Notify the selector that a path has failed. */ -typedef void (*ps_set_path_state_fn) (void *context, - unsigned long state); +typedef void (*ps_set_path_state_fn) (struct path_selector *ps, + struct path *p, int valid); /* * Table content based on parameters added in ps_add_path_fn * or path selector status */ -typedef int (*ps_status_fn) (void *context, - status_type_t type, - char *result, unsigned int maxlen); +typedef int (*ps_status_fn) (struct path_selector *ps, + struct path *path, + status_type_t type, + char *result, unsigned int maxlen); /* Information about a path selector type */ struct path_selector_type { @@ -92,6 +92,10 @@ ps_status_fn status; }; +/* + * FIXME: Factor out registration code. + */ + /* Register a path selector */ int dm_register_path_selector(struct path_selector_type *type); --- diff/drivers/md/dm-raid1.c 2003-11-26 10:20:02.000000000 +0000 +++ source/drivers/md/dm-raid1.c 2003-11-26 10:21:02.000000000 +0000 @@ -893,7 +893,7 @@ blk_run_queues(); } -static void do_work(void) +static jiffy_t do_work(void) { struct mirror_set *ms; @@ -901,6 +901,8 @@ list_for_each_entry (ms, &_mirror_sets, list) do_mirror(ms); up_read(&_mirror_sets_lock); + + return 0; } /*-----------------------------------------------------------------