diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index b11c07478dde..604ab5620694 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -149,6 +149,7 @@ typedef struct ztest_shared_hdr { uint64_t zh_stats_count; uint64_t zh_ds_size; uint64_t zh_ds_count; + uint64_t zh_scratch_state_size; } ztest_shared_hdr_t; static ztest_shared_hdr_t *ztest_shared_hdr; @@ -276,6 +277,12 @@ typedef struct ztest_shared_ds { static ztest_shared_ds_t *ztest_shared_ds; #define ZTEST_GET_SHARED_DS(d) (&ztest_shared_ds[d]) +typedef struct ztest_scratch_state { + uint64_t zs_raidz_scratch_verify_pause; +} ztest_shared_scratch_state_t; + +static ztest_shared_scratch_state_t *ztest_scratch_state; + #define BT_MAGIC 0x123456789abcdefULL #define MAXFAULTS(zs) \ (MAX((zs)->zs_mirrors, 1) * (ztest_opts.zo_raid_parity + 1) - 1) @@ -371,17 +378,11 @@ typedef struct ztest_ds { */ typedef void ztest_func_t(ztest_ds_t *zd, uint64_t id); -/* - * XXX: remove zi_raidz_attach_compatible field, when - * raidz expansion will be completely integrated. - */ - typedef struct ztest_info { ztest_func_t *zi_func; /* test function */ uint64_t zi_iters; /* iterations per execution */ uint64_t *zi_interval; /* execute every seconds */ const char *zi_funcname; /* name of test function */ - boolean_t zi_raidz_attach_compatible; } ztest_info_t; typedef struct ztest_shared_callstate { @@ -438,84 +439,81 @@ uint64_t zopt_often = 1ULL * NANOSEC; /* every second */ uint64_t zopt_sometimes = 10ULL * NANOSEC; /* every 10 seconds */ uint64_t zopt_rarely = 60ULL * NANOSEC; /* every 60 seconds */ -#define ZTI_INIT(func, iters, interval, compatible) \ +#define ZTI_INIT(func, iters, interval) \ { .zi_func = (func), \ .zi_iters = (iters), \ .zi_interval = (interval), \ - .zi_raidz_attach_compatible = (compatible), \ .zi_funcname = # func } ztest_info_t ztest_info[] = { - ZTI_INIT(ztest_dmu_read_write, 1, &zopt_always, B_TRUE), - ZTI_INIT(ztest_dmu_write_parallel, 10, &zopt_always, B_TRUE), - ZTI_INIT(ztest_dmu_object_alloc_free, 1, &zopt_always, B_TRUE), - ZTI_INIT(ztest_dmu_object_next_chunk, 1, &zopt_sometimes, B_TRUE), - ZTI_INIT(ztest_dmu_commit_callbacks, 1, &zopt_always, B_FALSE), - ZTI_INIT(ztest_zap, 30, &zopt_always, B_FALSE), - ZTI_INIT(ztest_zap_parallel, 100, &zopt_always, B_FALSE), - ZTI_INIT(ztest_split_pool, 1, &zopt_always, B_FALSE), - ZTI_INIT(ztest_zil_commit, 1, &zopt_incessant, B_FALSE), - ZTI_INIT(ztest_zil_remount, 1, &zopt_sometimes, B_FALSE), - ZTI_INIT(ztest_dmu_read_write_zcopy, 1, &zopt_often, B_FALSE), - ZTI_INIT(ztest_dmu_objset_create_destroy, 1, &zopt_often, B_FALSE), - ZTI_INIT(ztest_dsl_prop_get_set, 1, &zopt_often, B_FALSE), - ZTI_INIT(ztest_spa_prop_get_set, 1, &zopt_sometimes, B_FALSE), + ZTI_INIT(ztest_dmu_read_write, 1, &zopt_always), + ZTI_INIT(ztest_dmu_write_parallel, 10, &zopt_always), + ZTI_INIT(ztest_dmu_object_alloc_free, 1, &zopt_always), + ZTI_INIT(ztest_dmu_object_next_chunk, 1, &zopt_sometimes), + ZTI_INIT(ztest_dmu_commit_callbacks, 1, &zopt_always), + ZTI_INIT(ztest_zap, 30, &zopt_always), + ZTI_INIT(ztest_zap_parallel, 100, &zopt_always), + ZTI_INIT(ztest_split_pool, 1, &zopt_always), + ZTI_INIT(ztest_zil_commit, 1, &zopt_incessant), + ZTI_INIT(ztest_zil_remount, 1, &zopt_sometimes), + ZTI_INIT(ztest_dmu_read_write_zcopy, 1, &zopt_often), + ZTI_INIT(ztest_dmu_objset_create_destroy, 1, &zopt_often), + ZTI_INIT(ztest_dsl_prop_get_set, 1, &zopt_often), + ZTI_INIT(ztest_spa_prop_get_set, 1, &zopt_sometimes), #if 0 - ZTI_INIT(ztest_dmu_prealloc, 1, &zopt_sometimes, B_FALSE), + ZTI_INIT(ztest_dmu_prealloc, 1, &zopt_sometimes), #endif - ZTI_INIT(ztest_fzap, 1, &zopt_sometimes, B_FALSE), - ZTI_INIT(ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes, - B_FALSE), - ZTI_INIT(ztest_spa_create_destroy, 1, &zopt_sometimes, B_FALSE), - ZTI_INIT(ztest_fault_inject, 1, &zopt_sometimes, B_FALSE), - ZTI_INIT(ztest_dmu_snapshot_hold, 1, &zopt_sometimes, B_FALSE), - ZTI_INIT(ztest_mmp_enable_disable, 1, &zopt_sometimes, B_FALSE), - ZTI_INIT(ztest_reguid, 1, &zopt_rarely, B_FALSE), - ZTI_INIT(ztest_scrub, 1, &zopt_rarely, B_FALSE), - ZTI_INIT(ztest_spa_upgrade, 1, &zopt_rarely, B_FALSE), - ZTI_INIT(ztest_dsl_dataset_promote_busy, 1, &zopt_rarely, B_FALSE), - ZTI_INIT(ztest_vdev_attach_detach, 1, &zopt_sometimes, B_FALSE), - ZTI_INIT(ztest_vdev_raidz_attach, 1, &zopt_sometimes, B_TRUE), - ZTI_INIT(ztest_vdev_LUN_growth, 1, &zopt_rarely, B_FALSE), - ZTI_INIT(ztest_vdev_add_remove, 1, &ztest_opts.zo_vdevtime, B_FALSE), - ZTI_INIT(ztest_vdev_class_add, 1, &ztest_opts.zo_vdevtime, B_FALSE), - ZTI_INIT(ztest_vdev_aux_add_remove, 1, &ztest_opts.zo_vdevtime, - B_FALSE), - ZTI_INIT(ztest_device_removal, 1, &zopt_sometimes, B_FALSE), - ZTI_INIT(ztest_spa_checkpoint_create_discard, 1, &zopt_rarely, B_FALSE), - ZTI_INIT(ztest_initialize, 1, &zopt_sometimes, B_FALSE), - ZTI_INIT(ztest_trim, 1, &zopt_sometimes, B_FALSE), - ZTI_INIT(ztest_fletcher, 1, &zopt_rarely, B_FALSE), - ZTI_INIT(ztest_fletcher_incr, 1, &zopt_rarely, B_FALSE), - ZTI_INIT(ztest_verify_dnode_bt, 1, &zopt_sometimes, B_FALSE), + ZTI_INIT(ztest_fzap, 1, &zopt_sometimes), + ZTI_INIT(ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes), + ZTI_INIT(ztest_spa_create_destroy, 1, &zopt_sometimes), + ZTI_INIT(ztest_fault_inject, 1, &zopt_sometimes), + ZTI_INIT(ztest_dmu_snapshot_hold, 1, &zopt_sometimes), + ZTI_INIT(ztest_mmp_enable_disable, 1, &zopt_sometimes), + ZTI_INIT(ztest_reguid, 1, &zopt_rarely), + ZTI_INIT(ztest_scrub, 1, &zopt_rarely), + ZTI_INIT(ztest_spa_upgrade, 1, &zopt_rarely), + ZTI_INIT(ztest_dsl_dataset_promote_busy, 1, &zopt_rarely), + ZTI_INIT(ztest_vdev_attach_detach, 1, &zopt_sometimes), + ZTI_INIT(ztest_vdev_raidz_attach, 1, &zopt_sometimes), + ZTI_INIT(ztest_vdev_LUN_growth, 1, &zopt_rarely), + ZTI_INIT(ztest_vdev_add_remove, 1, &ztest_opts.zo_vdevtime), + ZTI_INIT(ztest_vdev_class_add, 1, &ztest_opts.zo_vdevtime), + ZTI_INIT(ztest_vdev_aux_add_remove, 1, &ztest_opts.zo_vdevtime), + ZTI_INIT(ztest_device_removal, 1, &zopt_sometimes), + ZTI_INIT(ztest_spa_checkpoint_create_discard, 1, &zopt_rarely), + ZTI_INIT(ztest_initialize, 1, &zopt_sometimes), + ZTI_INIT(ztest_trim, 1, &zopt_sometimes), + ZTI_INIT(ztest_fletcher, 1, &zopt_rarely), + ZTI_INIT(ztest_fletcher_incr, 1, &zopt_rarely), + ZTI_INIT(ztest_verify_dnode_bt, 1, &zopt_sometimes), }; #define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t)) ztest_info_t raidz_expand_info[] = { /* XXX - does this list of activities need further pruning? */ - ZTI_INIT(ztest_dmu_read_write, 1, &zopt_always, B_TRUE), - ZTI_INIT(ztest_dmu_write_parallel, 10, &zopt_always, B_TRUE), - ZTI_INIT(ztest_dmu_object_alloc_free, 1, &zopt_always, B_TRUE), - ZTI_INIT(ztest_dmu_object_next_chunk, 1, &zopt_sometimes, B_TRUE), - ZTI_INIT(ztest_dmu_commit_callbacks, 1, &zopt_always, B_TRUE), - ZTI_INIT(ztest_zap, 30, &zopt_always, B_TRUE), - ZTI_INIT(ztest_zap_parallel, 100, &zopt_always, B_TRUE), - ZTI_INIT(ztest_split_pool, 1, &zopt_always, B_TRUE), - ZTI_INIT(ztest_zil_commit, 1, &zopt_incessant, B_TRUE), - ZTI_INIT(ztest_zil_remount, 1, &zopt_sometimes, B_TRUE), - ZTI_INIT(ztest_dmu_read_write_zcopy, 1, &zopt_often, B_TRUE), - ZTI_INIT(ztest_dmu_objset_create_destroy, 1, &zopt_often, B_TRUE), - ZTI_INIT(ztest_dsl_prop_get_set, 1, &zopt_often, B_TRUE), - ZTI_INIT(ztest_spa_prop_get_set, 1, &zopt_sometimes, B_TRUE), + ZTI_INIT(ztest_dmu_read_write, 1, &zopt_always), + ZTI_INIT(ztest_dmu_write_parallel, 10, &zopt_always), + ZTI_INIT(ztest_dmu_object_alloc_free, 1, &zopt_always), + ZTI_INIT(ztest_dmu_object_next_chunk, 1, &zopt_sometimes), + ZTI_INIT(ztest_dmu_commit_callbacks, 1, &zopt_always), + ZTI_INIT(ztest_zap, 30, &zopt_always), + ZTI_INIT(ztest_zap_parallel, 100, &zopt_always), + ZTI_INIT(ztest_split_pool, 1, &zopt_always), + ZTI_INIT(ztest_zil_commit, 1, &zopt_incessant), + ZTI_INIT(ztest_zil_remount, 1, &zopt_sometimes), + ZTI_INIT(ztest_dmu_read_write_zcopy, 1, &zopt_often), + ZTI_INIT(ztest_dmu_objset_create_destroy, 1, &zopt_often), + ZTI_INIT(ztest_dsl_prop_get_set, 1, &zopt_often), + ZTI_INIT(ztest_spa_prop_get_set, 1, &zopt_sometimes), #if 0 - ZTI_INIT(ztest_dmu_prealloc, 1, &zopt_sometimes, B_TRUE), + ZTI_INIT(ztest_dmu_prealloc, 1, &zopt_sometimes), #endif - ZTI_INIT(ztest_fzap, 1, &zopt_sometimes, B_TRUE), - ZTI_INIT(ztest_dsl_dataset_promote_busy, 1, &zopt_rarely, B_TRUE), - ZTI_INIT(ztest_initialize, 1, &zopt_sometimes, B_TRUE), - ZTI_INIT(ztest_trim, 1, &zopt_sometimes, B_TRUE), - ZTI_INIT(ztest_verify_dnode_bt, 1, &zopt_sometimes, B_TRUE), + ZTI_INIT(ztest_fzap, 1, &zopt_sometimes), + ZTI_INIT(ztest_dsl_dataset_promote_busy, 1, &zopt_rarely), + ZTI_INIT(ztest_initialize, 1, &zopt_sometimes), + ZTI_INIT(ztest_trim, 1, &zopt_sometimes), + ZTI_INIT(ztest_verify_dnode_bt, 1, &zopt_sometimes), }; #define RAIDZ_EXPAND_FUNCS (sizeof (raidz_expand_info) / sizeof (ztest_info_t)) @@ -534,7 +532,6 @@ typedef struct ztest_cb_list { */ typedef struct ztest_shared { boolean_t zs_do_init; - boolean_t zs_do_raidz_scratch_verify; hrtime_t zs_proc_start; hrtime_t zs_proc_stop; hrtime_t zs_thread_start; @@ -547,7 +544,6 @@ typedef struct ztest_shared { uint64_t zs_space; uint64_t zs_splits; uint64_t zs_mirrors; - uint64_t zs_raidzs_attached; uint64_t zs_metaslab_sz; uint64_t zs_metaslab_df_alloc_threshold; uint64_t zs_guid; @@ -1253,9 +1249,28 @@ ztest_kill(ztest_shared_t *zs) * Before we kill off ztest, make sure that the config is updated. * See comment above spa_write_cachefile(). */ - mutex_enter(&spa_namespace_lock); - spa_write_cachefile(ztest_spa, B_FALSE, B_FALSE); - mutex_exit(&spa_namespace_lock); + if (raidz_expand_max_offset_pause) { + if (mutex_tryenter(&spa_namespace_lock)) { + spa_write_cachefile(ztest_spa, B_FALSE, B_FALSE); + mutex_exit(&spa_namespace_lock); + + ztest_scratch_state->zs_raidz_scratch_verify_pause = + raidz_expand_max_offset_pause; + } else { + /* + * Do not verify scratch object in case if + * spa_namespace_lock cannot be acquired, + * it can cause deadlock in spa_config_update(). + */ + raidz_expand_max_offset_pause = 0; + + return; + } + } else { + mutex_enter(&spa_namespace_lock); + spa_write_cachefile(ztest_spa, B_FALSE, B_FALSE); + mutex_exit(&spa_namespace_lock); + } (void) kill(getpid(), SIGKILL); } @@ -3113,15 +3128,31 @@ ztest_mmp_enable_disable(ztest_ds_t *zd, uint64_t id) spa_config_exit(spa, SCL_CONFIG, FTAG); } +static int +ztest_get_raidz_children(spa_t *spa) +{ + vdev_t *raidvd; + + ASSERT(MUTEX_HELD(&ztest_vdev_lock)); + + if (ztest_opts.zo_raid_do_expand) { + raidvd = ztest_spa->spa_root_vdev->vdev_child[0]; + + ASSERT(raidvd->vdev_ops == &vdev_raidz_ops); + + return (raidvd->vdev_children); + } + + return (ztest_opts.zo_raid_children); +} + /* ARGSUSED */ void ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id) { spa_t *spa; uint64_t initial_version = SPA_VERSION_INITIAL; - uint64_t raidz_children = ztest_opts.zo_raid_children + - ztest_shared->zs_raidzs_attached; - uint64_t version, newversion; + uint64_t raidz_children, version, newversion; nvlist_t *nvroot, *props; char *name; @@ -3140,6 +3171,8 @@ ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id) */ (void) spa_destroy(name); + raidz_children = ztest_get_raidz_children(ztest_spa); + nvroot = make_vdev_root(NULL, NULL, name, ztest_opts.zo_vdev_size, 0, NULL, raidz_children, ztest_opts.zo_mirrors, 1); @@ -3288,8 +3321,7 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) spa_t *spa = ztest_spa; uint64_t leaves; uint64_t guid; - uint64_t raidz_children = ztest_opts.zo_raid_children + - ztest_shared->zs_raidzs_attached; + uint64_t raidz_children; nvlist_t *nvroot; int error; @@ -3298,6 +3330,7 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) return; mutex_enter(&ztest_vdev_lock); + raidz_children = ztest_get_raidz_children(spa); leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * raidz_children; spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); @@ -3380,8 +3413,7 @@ ztest_vdev_class_add(ztest_ds_t *zd, uint64_t id) spa_t *spa = ztest_spa; uint64_t leaves; nvlist_t *nvroot; - uint64_t raidz_children = ztest_opts.zo_raid_children + - ztest_shared->zs_raidzs_attached; + uint64_t raidz_children; const char *class = (ztest_random(2) == 0) ? VDEV_ALLOC_BIAS_SPECIAL : VDEV_ALLOC_BIAS_DEDUP; int error; @@ -3409,6 +3441,7 @@ ztest_vdev_class_add(ztest_ds_t *zd, uint64_t id) return; } + raidz_children = ztest_get_raidz_children(spa); leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * raidz_children; spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); @@ -3676,8 +3709,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) uint64_t ashift = ztest_get_ashift(); uint64_t oldguid, pguid; uint64_t oldsize, newsize; - uint64_t raidz_children = ztest_opts.zo_raid_children + - ztest_shared->zs_raidzs_attached; + uint64_t raidz_children; char *oldpath, *newpath; int replacing; int oldvd_has_siblings = B_FALSE; @@ -3693,6 +3725,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) newpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); mutex_enter(&ztest_vdev_lock); + raidz_children = ztest_get_raidz_children(spa); leaves = MAX(zs->zs_mirrors, 1) * raidz_children; spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); @@ -3708,6 +3741,14 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) goto out; } + /* + * Does not work with expandable raidz, bp corruptions detected. + */ + if (ztest_opts.zo_raid_do_expand) { + spa_config_exit(spa, SCL_ALL, FTAG); + goto out; + } + /* * Decide whether to do an attach or a replace. */ @@ -3905,25 +3946,48 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) umem_free(newpath, MAXPATHLEN); } -#define RAIDZ_REFLOW_OFFSET_PAUSE 4 - static void raidz_scratch_verify(void) { spa_t *spa; + uint64_t pause, offset; + raidz_reflow_scratch_state_t state; - if (ztest_shared->zs_do_raidz_scratch_verify == B_FALSE) + ASSERT(raidz_expand_max_offset_pause == 0); + + if (ztest_scratch_state->zs_raidz_scratch_verify_pause == 0) return; kernel_init(SPA_MODE_READ); + + mutex_enter(&spa_namespace_lock); + spa = spa_lookup(ztest_opts.zo_pool); + ASSERT(spa); + spa->spa_import_flags |= ZFS_IMPORT_SKIP_MMP; + mutex_exit(&spa_namespace_lock); + VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG)); ASSERT3U(RRSS_GET_OFFSET(&spa->spa_uberblock), !=, UINT64_MAX); - ASSERT3U(RRSS_GET_OFFSET(&spa->spa_uberblock), >=, - RAIDZ_REFLOW_OFFSET_PAUSE); - ASSERT3U(RRSS_GET_STATE(&spa->spa_uberblock), ==, RRSS_SCRATCH_VALID); - ztest_shared->zs_do_raidz_scratch_verify = B_FALSE; + pause = ztest_scratch_state->zs_raidz_scratch_verify_pause; + offset = RRSS_GET_OFFSET(&spa->spa_uberblock); + state = RRSS_GET_STATE(&spa->spa_uberblock); + + if (pause < RAIDZ_EXPAND_PAUSE_SCRATCH_VALID) { + ASSERT3U(offset, ==, 0); + ASSERT3U(state, ==, RRSS_SCRATCH_NOT_IN_USE); + } else if (pause >= RAIDZ_EXPAND_PAUSE_SCRATCH_VALID && + pause <= RAIDZ_EXPAND_PAUSE_SCRATCH_REFLOWED) { + ASSERT3U(offset, >=, pause); + ASSERT3U(state, ==, RRSS_SCRATCH_VALID); + } else { + ASSERT(pause <= RAIDZ_EXPAND_PAUSE_SCRATCH_NOT_IN_USE); + ASSERT3U(offset, >, pause); + ASSERT3U(state, ==, RRSS_SCRATCH_NOT_IN_USE); + } + + ztest_scratch_state->zs_raidz_scratch_verify_pause = 0; spa_close(spa, FTAG); kernel_fini(); @@ -3932,9 +3996,8 @@ raidz_scratch_verify(void) static void ztest_scratch_thread(void *arg) { - ztest_shared_t *zs = arg; for (int t = 100; t > 0; t -= 1) { - if (!zs->zs_do_raidz_scratch_verify) + if (raidz_expand_max_offset_pause == 0) thread_exit(); (void) poll(NULL, 0, 100); @@ -3950,8 +4013,9 @@ ztest_scratch_thread(void *arg) void ztest_vdev_raidz_attach(ztest_ds_t *zd, uint64_t id) { + ztest_shared_t *zs = ztest_shared; spa_t *spa = ztest_spa; - uint64_t newsize, ashift = ztest_get_ashift(); + uint64_t leaves, raidz_children, newsize, ashift = ztest_get_ashift(); kthread_t *scratch_thread = NULL; vdev_t *newvd, *pvd; nvlist_t *root; @@ -3988,6 +4052,13 @@ ztest_vdev_raidz_attach(ztest_ds_t *zd, uint64_t id) newvd = pvd->vdev_child[ztest_random(pvd->vdev_children)]; newsize = 10 * vdev_get_min_asize(newvd) / (9 + ztest_random(2)); + /* + * Get next attached leaf id + */ + raidz_children = ztest_get_raidz_children(spa); + leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * raidz_children; + zs->zs_vdev_next_leaf = spa_num_top_vdevs(spa) * leaves; + if (spa->spa_raidz_expand) expected_error = ZFS_ERR_RAIDZ_EXPAND_IN_PROGRESS; @@ -3997,7 +4068,7 @@ ztest_vdev_raidz_attach(ztest_ds_t *zd, uint64_t id) * Path to vdev to be attached */ (void) snprintf(newpath, MAXPATHLEN, ztest_dev_template, - ztest_opts.zo_dir, ztest_opts.zo_pool, pvd->vdev_children); + ztest_opts.zo_dir, ztest_opts.zo_pool, zs->zs_vdev_next_leaf); /* * Build the nvlist describing newpath. @@ -4006,8 +4077,8 @@ ztest_vdev_raidz_attach(ztest_ds_t *zd, uint64_t id) 0, 0, 1); if (ztest_random(2) == 0 && expected_error == 0) { - raidz_expand_max_offset_pause = RAIDZ_REFLOW_OFFSET_PAUSE; - ztest_shared->zs_do_raidz_scratch_verify = B_TRUE; + raidz_expand_max_offset_pause = + ztest_random(RAIDZ_EXPAND_PAUSE_SCRATCH_NOT_IN_USE) + 1; scratch_thread = thread_create(NULL, 0, ztest_scratch_thread, ztest_shared, 0, NULL, TS_RUN | TS_JOINABLE, defclsyspri); } @@ -4021,21 +4092,18 @@ ztest_vdev_raidz_attach(ztest_ds_t *zd, uint64_t id) error == ZFS_ERR_DISCARDING_CHECKPOINT) expected_error = error; - if (error == 0) { - ztest_shared->zs_raidzs_attached++; - } else if (error != 0 && error != expected_error) { + if (error != 0 && error != expected_error) { fatal(0, "raidz attach (%s %"PRIu64") returned %d, expected %d", newpath, newsize, error, expected_error); } - if (ztest_shared->zs_do_raidz_scratch_verify) { + if (raidz_expand_max_offset_pause) { if (error != 0) { /* * Do not verify scratch object in case of error * returned by vdev attaching. */ raidz_expand_max_offset_pause = 0; - ztest_shared->zs_do_raidz_scratch_verify = B_FALSE; } VERIFY0(thread_join(scratch_thread)); @@ -6249,8 +6317,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) uint64_t leaves; uint64_t bad = 0x1990c0ffeedecadeull; uint64_t top, leaf; - uint64_t raidz_children = ztest_opts.zo_raid_children + - ztest_shared->zs_raidzs_attached; + uint64_t raidz_children; char *path0; char *pathrand; size_t fsize; @@ -6261,6 +6328,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) vdev_t *vd0 = NULL; uint64_t guid0 = 0; boolean_t islog = B_FALSE; + boolean_t injected = B_FALSE; path0 = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); pathrand = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); @@ -6273,15 +6341,23 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) * strategy for damaging blocks does not take in to account evacuated * blocks which may have already been damaged. */ - if (ztest_device_removal_active) { - mutex_exit(&ztest_vdev_lock); + if (ztest_device_removal_active) + goto out; + + /* + * The fault injection strategy for damaging blocks cannot be used + * if raidz expansion is in progress. The leaves value + * (attached raidz children) is variable and strategy for damaging + * blocks will corrupt same data blocks on different child vdevs + * because of reflow process. + */ + if (spa->spa_raidz_expand != NULL) goto out; - } maxfaults = MAXFAULTS(zs); + raidz_children = ztest_get_raidz_children(spa); leaves = MAX(zs->zs_mirrors, 1) * raidz_children; mirror_save = zs->zs_mirrors; - mutex_exit(&ztest_vdev_lock); ASSERT3U(leaves, >=, 1); @@ -6422,13 +6498,9 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) * call vdev_[on|off]line without holding locks * to force unpredictable failures but the side * effects of vdev_[on|off]line prevent us from - * doing so. We grab the ztest_vdev_lock here to - * prevent a race between injection testing and - * aux_vdev removal. + * doing so. */ - mutex_enter(&ztest_vdev_lock); (void) vdev_online(spa, guid0, 0, NULL); - mutex_exit(&ztest_vdev_lock); } } @@ -6502,9 +6574,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) offset + sizeof (bad) > psize - VDEV_LABEL_END_SIZE) continue; - mutex_enter(&ztest_vdev_lock); if (mirror_save != zs->zs_mirrors) { - mutex_exit(&ztest_vdev_lock); (void) close(fd); goto out; } @@ -6514,15 +6584,25 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) "can't inject bad word at 0x%"PRIx64" in %s", offset, pathrand); - mutex_exit(&ztest_vdev_lock); - if (ztest_opts.zo_verbose >= 7) (void) printf("injected bad word into %s," " offset 0x%"PRIx64"\n", pathrand, offset); + + injected = B_TRUE; } (void) close(fd); out: + mutex_exit(&ztest_vdev_lock); + + if (injected && ztest_opts.zo_raid_do_expand) { + int error = spa_scan(spa, POOL_SCAN_SCRUB); + if (error == 0) { + while (dsl_scan_scrubbing(spa_get_dsl(spa))) + txg_wait_synced(spa_get_dsl(spa), 0); + } + } + umem_free(path0, MAXPATHLEN); umem_free(pathrand, MAXPATHLEN); } @@ -7300,12 +7380,8 @@ ztest_execute(int test, ztest_info_t *zi, uint64_t id) hrtime_t functime = gethrtime(); int i; - for (i = 0; i < zi->zi_iters; i++) { - if (!ztest_opts.zo_raid_do_expand) - zi->zi_func(zd, id); - else if (zi->zi_raidz_attach_compatible) - zi->zi_func(zd, id); - } + for (i = 0; i < zi->zi_iters; i++) + zi->zi_func(zd, id); functime = gethrtime() - functime; @@ -7364,7 +7440,7 @@ ztest_thread(void *arg) /* * See if it's time to force a crash. */ - if (now > zs->zs_thread_kill) + if (now > zs->zs_thread_kill && !raidz_expand_max_offset_pause) ztest_kill(zs); /* @@ -7555,9 +7631,6 @@ ztest_freeze(void) spa_t *spa; int numloops = 0; - if (ztest_opts.zo_raid_do_expand) - return; - if (ztest_opts.zo_verbose >= 3) (void) printf("testing spa_freeze()...\n"); @@ -8429,6 +8502,7 @@ shared_data_size(ztest_shared_hdr_t *hdr) size += hdr->zh_size; size += hdr->zh_stats_size * hdr->zh_stats_count; size += hdr->zh_ds_size * hdr->zh_ds_count; + size += hdr->zh_scratch_state_size; return (size); } @@ -8452,6 +8526,7 @@ setup_hdr(void) hdr->zh_stats_count = ZTEST_FUNCS; hdr->zh_ds_size = sizeof (ztest_shared_ds_t); hdr->zh_ds_count = ztest_opts.zo_datasets; + hdr->zh_scratch_state_size = sizeof (ztest_shared_scratch_state_t); size = shared_data_size(hdr); VERIFY0(ftruncate(ztest_fd_data, size)); @@ -8486,6 +8561,8 @@ setup_data(void) ztest_shared_callstate = (void *)&buf[offset]; offset += hdr->zh_stats_size * hdr->zh_stats_count; ztest_shared_ds = (void *)&buf[offset]; + offset += hdr->zh_ds_size * hdr->zh_ds_count; + ztest_scratch_state = (void *)&buf[offset]; } static boolean_t diff --git a/include/sys/vdev_raidz.h b/include/sys/vdev_raidz.h index f784d425e9b3..ec52ff0827da 100644 --- a/include/sys/vdev_raidz.h +++ b/include/sys/vdev_raidz.h @@ -156,6 +156,13 @@ extern void vdev_raidz_attach_sync(void *, dmu_tx_t *); extern void spa_start_raidz_expansion_thread(spa_t *); extern int spa_raidz_expand_get_stats(spa_t *, pool_raidz_expand_stat_t *); extern int vdev_raidz_load(vdev_t *); + +/* Some of raidz scratch area states */ +#define RAIDZ_EXPAND_PAUSE_NONE 0 +#define RAIDZ_EXPAND_PAUSE_SCRATCH_VALID 4 +#define RAIDZ_EXPAND_PAUSE_SCRATCH_REFLOWED 5 +#define RAIDZ_EXPAND_PAUSE_SCRATCH_NOT_IN_USE 11 + #ifdef __cplusplus } #endif diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index 9dc74c7f328f..cd8c2c671245 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -3923,7 +3923,7 @@ raidz_reflow_scratch_sync(void *arg, dmu_tx_t *tx) (long long)logical_size, (long long)spa->spa_ubsync.ub_timestamp); - raidz_expand_pause(spa, 4); + raidz_expand_pause(spa, RAIDZ_EXPAND_PAUSE_SCRATCH_VALID); /* * Overwrite with reflow'ed data. @@ -3954,7 +3954,7 @@ raidz_reflow_scratch_sync(void *arg, dmu_tx_t *tx) abd_free(abds[i]); kmem_free(abds, raidvd->vdev_children * sizeof (abd_t *)); - raidz_expand_pause(spa, 5); + raidz_expand_pause(spa, RAIDZ_EXPAND_PAUSE_SCRATCH_REFLOWED); /* * Update uberblock to indicate that the initial part has been @@ -4102,7 +4102,7 @@ vdev_raidz_reflow_copy_scratch(spa_t *spa) spa_config_exit(spa, SCL_STATE, FTAG); - raidz_expand_pause(spa, 11); + raidz_expand_pause(spa, RAIDZ_EXPAND_PAUSE_SCRATCH_NOT_IN_USE); } /* ARGSUSED */