mirror of
https://github.com/mii443/qemu.git
synced 2025-12-09 14:08:32 +00:00
tcg: remove tb_lock
Use mmap_lock in user-mode to protect TCG state and the page descriptors.
In !user-mode, each vCPU has its own TCG state, so no locks needed.
Per-page locks are used to protect the page descriptors.
Per-TB locks are used in both modes to protect TB jumps.
Some notes:
- tb_lock is removed from notdirty_mem_write by passing a
locked page_collection to tb_invalidate_phys_page_fast.
- tcg_tb_lookup/remove/insert/etc have their own internal lock(s),
so there is no need to further serialize access to them.
- do_tb_flush is run in a safe async context, meaning no other
vCPU threads are running. Therefore acquiring mmap_lock there
is just to please tools such as thread sanitizer.
- Not visible in the diff, but tb_invalidate_phys_page already
has an assert_memory_lock.
- cpu_io_recompile is !user-only, so no mmap_lock there.
- Added mmap_unlock()'s before all siglongjmp's that could
be called in user-mode while mmap_lock is held.
+ Added an assert for !have_mmap_lock() after returning from
the longjmp in cpu_exec, just like we do in cpu_exec_step_atomic.
Performance numbers before/after:
Host: AMD Opteron(tm) Processor 6376
ubuntu 17.04 ppc64 bootup+shutdown time
700 +-+--+----+------+------------+-----------+------------*--+-+
| + + + + + *B |
| before ***B*** ** * |
|tb lock removal ###D### *** |
600 +-+ *** +-+
| ** # |
| *B* #D |
| *** * ## |
500 +-+ *** ### +-+
| * *** ### |
| *B* # ## |
| ** * #D# |
400 +-+ ** ## +-+
| ** ### |
| ** ## |
| ** # ## |
300 +-+ * B* #D# +-+
| B *** ### |
| * ** #### |
| * *** ### |
200 +-+ B *B #D# +-+
| #B* * ## # |
| #* ## |
| + D##D# + + + + |
100 +-+--+----+------+------------+-----------+------------+--+-+
1 8 16 Guest CPUs 48 64
png: https://imgur.com/HwmBHXe
debian jessie aarch64 bootup+shutdown time
90 +-+--+-----+-----+------------+------------+------------+--+-+
| + + + + + + |
| before ***B*** B |
80 +tb lock removal ###D### **D +-+
| **### |
| **## |
70 +-+ ** # +-+
| ** ## |
| ** # |
60 +-+ *B ## +-+
| ** ## |
| *** #D |
50 +-+ *** ## +-+
| * ** ### |
| **B* ### |
40 +-+ **** # ## +-+
| **** #D# |
| ***B** ### |
30 +-+ B***B** #### +-+
| B * * # ### |
| B ###D# |
20 +-+ D ##D## +-+
| D# |
| + + + + + + |
10 +-+--+-----+-----+------------+------------+------------+--+-+
1 8 16 Guest CPUs 48 64
png: https://imgur.com/iGpGFtv
The gains are high for 4-8 CPUs. Beyond that point, however, unrelated
lock contention significantly hurts scalability.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
committed by
Richard Henderson
parent
705ad1ff0c
commit
0ac20318ce
@@ -49,6 +49,8 @@ void mtree_print_dispatch(fprintf_function mon, void *f,
|
||||
struct AddressSpaceDispatch *d,
|
||||
MemoryRegion *root);
|
||||
|
||||
struct page_collection;
|
||||
|
||||
/* Opaque struct for passing info from memory_notdirty_write_prepare()
|
||||
* to memory_notdirty_write_complete(). Callers should treat all fields
|
||||
* as private, with the exception of @active.
|
||||
@@ -60,10 +62,10 @@ void mtree_print_dispatch(fprintf_function mon, void *f,
|
||||
*/
|
||||
typedef struct {
|
||||
CPUState *cpu;
|
||||
struct page_collection *pages;
|
||||
ram_addr_t ram_addr;
|
||||
vaddr mem_vaddr;
|
||||
unsigned size;
|
||||
bool locked;
|
||||
bool active;
|
||||
} NotDirtyInfo;
|
||||
|
||||
@@ -91,7 +93,7 @@ typedef struct {
|
||||
*
|
||||
* This must only be called if we are using TCG; it will assert otherwise.
|
||||
*
|
||||
* We may take a lock in the prepare call, so callers must ensure that
|
||||
* We may take locks in the prepare call, so callers must ensure that
|
||||
* they don't exit (via longjump or otherwise) without calling complete.
|
||||
*
|
||||
* This call must only be made inside an RCU critical section.
|
||||
|
||||
Reference in New Issue
Block a user