From 30158d885008246f48ee8ef9cdeca220c1bd8586 Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Thu, 28 Mar 2024 16:02:51 +0200 Subject: [PATCH 1/2] migration: Set migration error in migration_completion() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit 9425ef3f990a ("migration: Use migrate_has_error() in close_return_path_on_source()"), close_return_path_on_source() assumes that migration error is set if an error occurs during migration. This may not be true if migration errors in migration_completion(). For example, if qemu_savevm_state_complete_precopy() errors, migration error will not be set. This in turn, will cause a migration hang bug, similar to the bug that was fixed by commit 22b04245f0d5 ("migration: Join the return path thread before releasing to_dst_file"), as shutdown() will not be issued for the return-path channel. Fix it by ensuring migration error is set in case of error in migration_completion(). Signed-off-by: Avihai Horon Reviewed-by: Peter Xu Fixes: 9425ef3f990a ("migration: Use migrate_has_error() in close_return_path_on_source()") Acked-by: Cédric Le Goater Link: https://lore.kernel.org/r/20240328140252.16756-2-avihaih@nvidia.com Signed-off-by: Peter Xu --- migration/migration.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/migration/migration.c b/migration/migration.c index 9fe8fd2afd..b73ae3a72c 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2799,6 +2799,7 @@ static void migration_completion(MigrationState *s) { int ret = 0; int current_active_state = s->state; + Error *local_err = NULL; if (s->state == MIGRATION_STATUS_ACTIVE) { ret = migration_completion_precopy(s, ¤t_active_state); @@ -2832,6 +2833,15 @@ static void migration_completion(MigrationState *s) return; fail: + if (qemu_file_get_error_obj(s->to_dst_file, &local_err)) { + migrate_set_error(s, local_err); + error_free(local_err); + } else if (ret) { + error_setg_errno(&local_err, -ret, "Error in migration completion"); + migrate_set_error(s, local_err); + error_free(local_err); + } + migration_completion_failed(s, current_active_state); } From d0ad271a7613459bd0a3397c8071a4ad06f3f7eb Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Thu, 28 Mar 2024 16:02:52 +0200 Subject: [PATCH 2/2] migration/postcopy: Ensure postcopy_start() sets errp if it fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are several places where postcopy_start() fails without setting errp. This can cause a null pointer de-reference, as in case of error, the caller of postcopy_start() copies/prints the error set in errp. Fix it by setting errp in all of postcopy_start() error paths. Cc: qemu-stable Fixes: 908927db28ea ("migration: Update error description whenever migration fails") Signed-off-by: Avihai Horon Reviewed-by: Cédric Le Goater Reviewed-by: Peter Xu Link: https://lore.kernel.org/r/20240328140252.16756-3-avihaih@nvidia.com Signed-off-by: Peter Xu --- migration/migration.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/migration/migration.c b/migration/migration.c index b73ae3a72c..86bf76e925 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2510,6 +2510,8 @@ static int postcopy_start(MigrationState *ms, Error **errp) migration_wait_main_channel(ms); if (postcopy_preempt_establish_channel(ms)) { migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED); + error_setg(errp, "%s: Failed to establish preempt channel", + __func__); return -1; } } @@ -2525,17 +2527,22 @@ static int postcopy_start(MigrationState *ms, Error **errp) ret = migration_stop_vm(ms, RUN_STATE_FINISH_MIGRATE); if (ret < 0) { + error_setg_errno(errp, -ret, "%s: Failed to stop the VM", __func__); goto fail; } ret = migration_maybe_pause(ms, &cur_state, MIGRATION_STATUS_POSTCOPY_ACTIVE); if (ret < 0) { + error_setg_errno(errp, -ret, "%s: Failed in migration_maybe_pause()", + __func__); goto fail; } ret = bdrv_inactivate_all(); if (ret < 0) { + error_setg_errno(errp, -ret, "%s: Failed in bdrv_inactivate_all()", + __func__); goto fail; } restart_block = true; @@ -2612,6 +2619,7 @@ static int postcopy_start(MigrationState *ms, Error **errp) /* Now send that blob */ if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) { + error_setg(errp, "%s: Failed to send packaged data", __func__); goto fail_closefb; } qemu_fclose(fb);