migration: Refactor error handling in source return path

rp_state.error was a boolean used to show error happened in return path
thread.  That's not only duplicating error reporting (migrate_set_error),
but also not good enough in that we only do error_report() and set it to
true, we never can keep a history of the exact error and show it in
query-migrate.

To make this better, a few things done:

  - Use error_setg() rather than error_report() across the whole lifecycle
    of return path thread, keeping the error in an Error*.

  - With above, no need to have mark_source_rp_bad(), remove it, alongside
    with rp_state.error itself.

  - Use migrate_set_error() to apply that captured error to the global
    migration object when error occured in this thread.

  - Do the same when detected qemufile error in source return path

We need to re-export qemu_file_get_error_obj() to do the last one.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Fabiano Rosas <farosas@suse.de>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Message-ID: <20231017202633.296756-2-peterx@redhat.com>
This commit is contained in:
Peter Xu
2023-10-17 16:26:29 -04:00
committed by Juan Quintela
parent e7b428d6bc
commit 7aa6070d09
7 changed files with 80 additions and 95 deletions

View File

@ -1898,7 +1898,8 @@ static void migration_page_queue_free(RAMState *rs)
* @start: starting address from the start of the RAMBlock
* @len: length (in bytes) to send
*/
int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len,
Error **errp)
{
RAMBlock *ramblock;
RAMState *rs = ram_state;
@ -1915,7 +1916,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
* Shouldn't happen, we can't reuse the last RAMBlock if
* it's the 1st request.
*/
error_report("ram_save_queue_pages no previous block");
error_setg(errp, "MIG_RP_MSG_REQ_PAGES has no previous block");
return -1;
}
} else {
@ -1923,16 +1924,17 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
if (!ramblock) {
/* We shouldn't be asked for a non-existent RAMBlock */
error_report("ram_save_queue_pages no block '%s'", rbname);
error_setg(errp, "MIG_RP_MSG_REQ_PAGES has no block '%s'", rbname);
return -1;
}
rs->last_req_rb = ramblock;
}
trace_ram_save_queue_pages(ramblock->idstr, start, len);
if (!offset_in_ramblock(ramblock, start + len - 1)) {
error_report("%s request overrun start=" RAM_ADDR_FMT " len="
RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
__func__, start, len, ramblock->used_length);
error_setg(errp, "MIG_RP_MSG_REQ_PAGES request overrun, "
"start=" RAM_ADDR_FMT " len="
RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
start, len, ramblock->used_length);
return -1;
}
@ -1964,9 +1966,9 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
assert(len % page_size == 0);
while (len) {
if (ram_save_host_page_urgent(pss)) {
error_report("%s: ram_save_host_page_urgent() failed: "
"ramblock=%s, start_addr=0x"RAM_ADDR_FMT,
__func__, ramblock->idstr, start);
error_setg(errp, "ram_save_host_page_urgent() failed: "
"ramblock=%s, start_addr=0x"RAM_ADDR_FMT,
ramblock->idstr, start);
ret = -1;
break;
}
@ -4110,7 +4112,7 @@ static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
* This is only used when the postcopy migration is paused but wants
* to resume from a middle point.
*/
int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block, Error **errp)
{
int ret = -EINVAL;
/* from_dst_file is always valid because we're within rp_thread */
@ -4124,8 +4126,8 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
trace_ram_dirty_bitmap_reload_begin(block->idstr);
if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
error_report("%s: incorrect state %s", __func__,
MigrationStatus_str(s->state));
error_setg(errp, "Reload bitmap in incorrect state %s",
MigrationStatus_str(s->state));
return -EINVAL;
}
@ -4142,9 +4144,8 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
/* The size of the bitmap should match with our ramblock */
if (size != local_size) {
error_report("%s: ramblock '%s' bitmap size mismatch "
"(0x%"PRIx64" != 0x%"PRIx64")", __func__,
block->idstr, size, local_size);
error_setg(errp, "ramblock '%s' bitmap size mismatch (0x%"PRIx64
" != 0x%"PRIx64")", block->idstr, size, local_size);
return -EINVAL;
}
@ -4153,15 +4154,15 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
ret = qemu_file_get_error(file);
if (ret || size != local_size) {
error_report("%s: read bitmap failed for ramblock '%s': %d"
" (size 0x%"PRIx64", got: 0x%"PRIx64")",
__func__, block->idstr, ret, local_size, size);
error_setg(errp, "read bitmap failed for ramblock '%s': %d"
" (size 0x%"PRIx64", got: 0x%"PRIx64")",
block->idstr, ret, local_size, size);
return -EIO;
}
if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIx64,
__func__, block->idstr, end_mark);
error_setg(errp, "ramblock '%s' end mark incorrect: 0x%"PRIx64,
block->idstr, end_mark);
return -EINVAL;
}