QEMU用户态相关结构体和函数浅析

今天花了点时间粗看了下 qemu ,挺有意思的

概览

Qemu 通过 TCG翻译引擎(可以理解为编译器)将每条 target 指令动态翻译成 TCG 微操作码(前端),然后再转换成对应的 houst 的指令来实现仿真(后端)。Qemu 代码翻译流程:

1
target instruction ->micro-op->tcg->host instruction

qemu 仿真的大致流程图如下:
2012-12-30-3.png-111.6kB

代码结构

  • vl.c 模拟循环,虚拟机机器环境初始化,和CPU的执行。
  • target-arch/translate.c 将guest代码转化成不同架构的TCG操作码。
  • tcg/tcg.c TCG操作码的实现
  • tcg-arch/tcg-target.c 将TCG操作码转化成对应的主机代码
  • cpu-exec.c 其中的cpu-exec()函数主要寻找下一个TB(翻译代码块),如果没找到就请求得到下一个TB,并且操作生成的代码块。

TCG 执行流程

前端实现

TB 为单位读入当前 pc 指向的 guest code
image_thumb1.png-28.1kB

后端实现

生成对应的 TCG 操作码并优化,最后转化为 host code
image_thumb2.png-36.3kB
image_thumb3.png-41.1kB

TB Cache

TB Cache 的应用使 translate code 可以进行重用,减少了 guest codeQEMU Translation Code 之间的切换, 提高了 QEMU 的性能。

相关数据结构

TCGContext

存放 TCG IR 的结构。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
struct TCGContext {
uint8_t *pool_cur, *pool_end;
TCGPool *pool_first, *pool_current, *pool_first_large;
int nb_labels;
int nb_globals;
int nb_temps;
int nb_indirects;
/* goto_tb support */
tcg_insn_unit *code_buf;
uint16_t *tb_jmp_reset_offset; /* tb->jmp_reset_offset */
uint16_t *tb_jmp_insn_offset; /* tb->jmp_insn_offset if USE_DIRECT_JUMP */
uintptr_t *tb_jmp_target_addr; /* tb->jmp_target_addr if !USE_DIRECT_JUMP */
TCGRegSet reserved_regs; //寄存器
intptr_t current_frame_offset;
intptr_t frame_start;
intptr_t frame_end;
TCGTemp *frame_temp;
tcg_insn_unit *code_ptr;
GHashTable *helpers;
#ifdef CONFIG_PROFILER
/* profiling info */
int64_t tb_count1;
int64_t tb_count;
int64_t op_count; /* total insn count */
int op_count_max; /* max insn per TB */
int64_t temp_count;
int temp_count_max;
int64_t del_op_count;
int64_t code_in_len;
int64_t code_out_len;
int64_t search_out_len;
int64_t interm_time;
int64_t code_time;
int64_t la_time;
int64_t opt_time;
int64_t restore_count;
int64_t restore_time;
#endif
#ifdef CONFIG_DEBUG_TCG
int temps_in_use;
int goto_tb_issue_mask;
#endif
int gen_next_op_idx;
int gen_next_parm_idx;
/* Code generation. Note that we specifically do not use tcg_insn_unit
here, because there's too much arithmetic throughout that relies
on addition and subtraction working on bytes. Rely on the GCC
extension that allows arithmetic on void*. */
int code_gen_max_blocks;
void *code_gen_prologue; //指向缓冲区的prologue
void *code_gen_buffer; //翻译后的指令缓冲区
size_t code_gen_buffer_size; //翻译后指令缓冲区的大小
void *code_gen_ptr; //指向翻译后的指令缓冲区
/* Threshold to flush the translated code buffer. */
void *code_gen_highwater;
TBContext tb_ctx;
/* Track which vCPU triggers events */
CPUState *cpu; /* *_trans cpu状态*/
TCGv_env tcg_env; /* *_exec */
/* The TCGBackendData structure is private to tcg-target.inc.c. */
struct TCGBackendData *be;
TCGTempSet free_temps[TCG_TYPE_COUNT * 2];
TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */
/* Tells which temporary holds a given register.
It does not take into account fixed registers */
TCGTemp *reg_to_temp[TCG_TARGET_NB_REGS]; //临时寄存器变量
TCGOp gen_op_buf[OPC_BUF_SIZE]; //操作码缓冲区
TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE]; //参数缓冲区
uint16_t gen_insn_end_off[TCG_MAX_INSNS];
target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
};

DisasContext

应该可以理解为反编译 guest 需要的一些信息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
typedef struct DisasContext {
/* current insn context */
int override; /* -1 if no override */
int prefix;
TCGMemOp aflag;
TCGMemOp dflag;
target_ulong pc_start;
target_ulong pc; /* pc = eip + cs_base */
int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
static state change (stop translation) */
/* current block context */
target_ulong cs_base; /* base of CS segment */
int pe; /* protected mode */
int code32; /* 32 bit code segment */
#ifdef TARGET_X86_64
int lma; /* long mode active */
int code64; /* 64 bit code segment */
int rex_x, rex_b;
#endif
int vex_l; /* vex vector length */
int vex_v; /* vex vvvv register, without 1's compliment. */
int ss32; /* 32 bit stack segment */
CCOp cc_op; /* current CC operation */
bool cc_op_dirty;
int addseg; /* non zero if either DS/ES/SS have a non zero base */
int f_st; /* currently unused */
int vm86; /* vm86 mode */
int cpl;
int iopl;
int tf; /* TF cpu flag */
int singlestep_enabled; /* "hardware" single step enabled */
int jmp_opt; /* use direct block chaining for direct jumps */
int repz_opt; /* optimize jumps within repz instructions */
int mem_index; /* select memory access functions */
uint64_t flags; /* all execution flags */
struct TranslationBlock *tb;
int popl_esp_hack; /* for correct popl with esp base handling */
int rip_offset; /* only used in x86_64, but left for simplicity */
int cpuid_features;
int cpuid_ext_features;
int cpuid_ext2_features;
int cpuid_ext3_features;
int cpuid_7_0_ebx_features;
int cpuid_xsave_features;
} DisasContext;

CPUState

cpu 相关信息的记录

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
/**
* CPUState:
* @cpu_index: CPU index (informative).
* @nr_cores: Number of cores within this CPU package.
* @nr_threads: Number of threads within this CPU.
* @numa_node: NUMA node this CPU is belonging to.
* @host_tid: Host thread ID.
* @running: #true if CPU is currently running (usermode).
* @created: Indicates whether the CPU thread has been successfully created.
* @interrupt_request: Indicates a pending interrupt request.
* @halted: Nonzero if the CPU is in suspended state.
* @stop: Indicates a pending stop request.
* @stopped: Indicates the CPU has been artificially stopped.
* @unplug: Indicates a pending CPU unplug request.
* @crash_occurred: Indicates the OS reported a crash (panic) for this CPU
* @tcg_exit_req: Set to force TCG to stop executing linked TBs for this
* CPU and return to its top level loop.
* @tb_flushed: Indicates the translation buffer has been flushed.
* @singlestep_enabled: Flags for single-stepping.
* @icount_extra: Instructions until next timer event.
* @icount_decr: Number of cycles left, with interrupt flag in high bit.
* This allows a single read-compare-cbranch-write sequence to test
* for both decrementer underflow and exceptions.
* @can_do_io: Nonzero if memory-mapped IO is safe. Deterministic execution
* requires that IO only be performed on the last instruction of a TB
* so that interrupts take effect immediately.
* @cpu_ases: Pointer to array of CPUAddressSpaces (which define the
* AddressSpaces this CPU has)
* @num_ases: number of CPUAddressSpaces in @cpu_ases
* @as: Pointer to the first AddressSpace, for the convenience of targets which
* only have a single AddressSpace
* @env_ptr: Pointer to subclass-specific CPUArchState field.
* @gdb_regs: Additional GDB registers.
* @gdb_num_regs: Number of total registers accessible to GDB.
* @gdb_num_g_regs: Number of registers in GDB 'g' packets.
* @next_cpu: Next CPU sharing TB cache.
* @opaque: User data.
* @mem_io_pc: Host Program Counter at which the memory was accessed.
* @mem_io_vaddr: Target virtual address at which the memory was accessed.
* @kvm_fd: vCPU file descriptor for KVM.
* @work_mutex: Lock to prevent multiple access to queued_work_*.
* @queued_work_first: First asynchronous work pending.
* @trace_dstate: Dynamic tracing state of events for this vCPU (bitmask).
*
* State of one CPU core or thread.
*/
struct CPUState {
/*< private >*/
DeviceState parent_obj;
/*< public >*/
int nr_cores;
int nr_threads;
int numa_node;
struct QemuThread *thread;
#ifdef _WIN32
HANDLE hThread;
#endif
int thread_id;
uint32_t host_tid;
bool running;
struct QemuCond *halt_cond;
bool thread_kicked;
bool created;
bool stop;
bool stopped;
bool unplug;
bool crash_occurred;
bool exit_request;
bool tb_flushed;
uint32_t interrupt_request;
int singlestep_enabled;
int64_t icount_extra;
sigjmp_buf jmp_env;
QemuMutex work_mutex;
struct qemu_work_item *queued_work_first, *queued_work_last;
CPUAddressSpace *cpu_ases;
int num_ases;
AddressSpace *as;
MemoryRegion *memory;
void *env_ptr; /* CPUArchState */
struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
struct GDBRegisterState *gdb_regs;
int gdb_num_regs;
int gdb_num_g_regs;
QTAILQ_ENTRY(CPUState) node;
/* ice debug support */
QTAILQ_HEAD(breakpoints_head, CPUBreakpoint) breakpoints;
QTAILQ_HEAD(watchpoints_head, CPUWatchpoint) watchpoints;
CPUWatchpoint *watchpoint_hit;
void *opaque;
/* In order to avoid passing too many arguments to the MMIO helpers,
* we store some rarely used information in the CPU context.
*/
uintptr_t mem_io_pc;
vaddr mem_io_vaddr;
int kvm_fd;
bool kvm_vcpu_dirty;
struct KVMState *kvm_state;
struct kvm_run *kvm_run;
/* Used for events with 'vcpu' and *without* the 'disabled' properties */
DECLARE_BITMAP(trace_dstate, TRACE_VCPU_EVENT_COUNT);
/* TODO Move common fields from CPUArchState here. */
int cpu_index; /* used by alpha TCG */
uint32_t halted; /* used by alpha, cris, ppc TCG */
union {
uint32_t u32;
icount_decr_u16 u16;
} icount_decr;
uint32_t can_do_io;
int32_t exception_index; /* used by m68k TCG */
/* Used to keep track of an outstanding cpu throttle thread for migration
* autoconverge
*/
bool throttle_thread_scheduled;
/* Note that this is accessed at the start of every TB via a negative
offset from AREG0. Leave this field at the end so as to make the
(absolute value) offset as small as possible. This reduces code
size, especially for hosts without large memory offsets. */
uint32_t tcg_exit_req;
};

image_info

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
struct image_info {
abi_ulong load_addr; //加载基址
abi_ulong start_code; //代码起始地址
abi_ulong end_code; //代码结束地址
abi_ulong start_data; //数据段起始地址
abi_ulong end_data; //数据段结束地址
abi_ulong start_brk; //用于堆分配的brk起始地址
abi_ulong brk; //结束地址?
abi_ulong start_mmap; //mmap起始地址
abi_ulong mmap; //结束?
abi_ulong rss;
abi_ulong start_stack; //栈起始地址
abi_ulong entry; //程序入口点
abi_ulong code_offset; //代码段偏移
abi_ulong data_offset; //数据段偏移
int personality;
};

TranslationBlock

注释挺全的 哈哈哈 自己看吧

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
struct TranslationBlock {
target_ulong pc; /* simulated PC corresponding to this block (EIP + CS base) */
target_ulong cs_base; /* CS base for this block */
uint32_t flags; /* flags defining in which context the code was generated */
uint16_t size; /* size of target code for this block (1 <=
size <= TARGET_PAGE_SIZE) */
uint16_t icount;
uint32_t cflags; /* compile flags */
#define CF_COUNT_MASK 0x7fff
#define CF_LAST_IO 0x8000 /* Last insn may be an IO access. */
#define CF_NOCACHE 0x10000 /* To be freed after execution */
#define CF_USE_ICOUNT 0x20000
#define CF_IGNORE_ICOUNT 0x40000 /* Do not generate icount code */
void *tc_ptr; /* pointer to the translated code */
uint8_t *tc_search; /* pointer to search data */
/* original tb when cflags has CF_NOCACHE */
struct TranslationBlock *orig_tb;
/* first and second physical page containing code. The lower bit
of the pointer tells the index in page_next[] */
struct TranslationBlock *page_next[2];
tb_page_addr_t page_addr[2];
/* The following data are used to directly call another TB from
* the code of this one. This can be done either by emitting direct or
* indirect native jump instructions. These jumps are reset so that the TB
* just continue its execution. The TB can be linked to another one by
* setting one of the jump targets (or patching the jump instruction). Only
* two of such jumps are supported.
*/
uint16_t jmp_reset_offset[2]; /* offset of original jump target */
#define TB_JMP_RESET_OFFSET_INVALID 0xffff /* indicates no jump generated */
#ifdef USE_DIRECT_JUMP
uint16_t jmp_insn_offset[2]; /* offset of native jump instruction */
#else
uintptr_t jmp_target_addr[2]; /* target address for indirect jump */
#endif
/* Each TB has an assosiated circular list of TBs jumping to this one.
* jmp_list_first points to the first TB jumping to this one.
* jmp_list_next is used to point to the next TB in a list.
* Since each TB can have two jumps, it can participate in two lists.
* jmp_list_first and jmp_list_next are 4-byte aligned pointers to a
* TranslationBlock structure, but the two least significant bits of
* them are used to encode which data field of the pointed TB should
* be used to traverse the list further from that TB:
* 0 => jmp_list_next[0], 1 => jmp_list_next[1], 2 => jmp_list_first.
* In other words, 0/1 tells which jump is used in the pointed TB,
* and 2 means that this is a pointer back to the target TB of this list.
*/
uintptr_t jmp_list_next[2];
uintptr_t jmp_list_first;
};

部分源码分析

linux-user/main.c main

只是一部分源码,没细看,等考研结束再看吧

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
int main(int argc, char **argv, char **envp)
{
CPUState *cpu;
// 初始化 TCG
tcg_exec_init(0);
//初始化cpu
cpu = cpu_init(cpu_model);
if (!cpu) {
fprintf(stderr, "Unable to find CPU definition\n");
exit(EXIT_FAILURE);
}
//传递环境和命令行参数到 guest
target_environ = envlist_to_environ(envlist, NULL);
envlist_free(envlist);
// 初始化 TaskState 数据结构
init_task_state(ts);
/* build Task State */
ts->info = info; //被加载程序的一些信息(似乎镜像信息更合适)
ts->bprm = &bprm;
cpu->opaque = ts;
//加载指向guest, regs表示程序入口点
ret = loader_exec(execfd, filename, target_argv, target_environ, regs,
info, &bprm);
//设置guest的brk指针
target_set_brk(info->brk);
//初始化syscall和signal
syscall_init();
signal_init();
/*
根据相应guest 设置cpu
*/
#if defined(TARGET_I386)
env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
env->hflags |= HF_PE_MASK | HF_CPL_MASK;
if (env->features[FEAT_1_EDX] & CPUID_SSE) {
env->cr[4] |= CR4_OSFXSR_MASK;
env->hflags |= HF_OSFXSR_MASK;
}
#ifndef TARGET_ABI32
/* enable 64 bit mode if possible */
if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n");
exit(EXIT_FAILURE);
}
env->cr[4] |= CR4_PAE_MASK;
env->efer |= MSR_EFER_LMA | MSR_EFER_LME;
env->hflags |= HF_LMA_MASK;
#endif
/*
根据给的guest来设置相应的寄存器(64位是不是寄存器有点少啊)
*/
/* linux register setup */
#ifndef TARGET_ABI32
env->regs[R_EAX] = regs->rax;
env->regs[R_EBX] = regs->rbx;
env->regs[R_ECX] = regs->rcx;
env->regs[R_EDX] = regs->rdx;
env->regs[R_ESI] = regs->rsi;
env->regs[R_EDI] = regs->rdi;
env->regs[R_EBP] = regs->rbp;
env->regs[R_ESP] = regs->rsp;
env->eip = regs->rip;
#else
env->regs[R_EAX] = regs->eax;
env->regs[R_EBX] = regs->ebx;
env->regs[R_ECX] = regs->ecx;
env->regs[R_EDX] = regs->edx;
env->regs[R_ESI] = regs->esi;
env->regs[R_EDI] = regs->edi;
env->regs[R_EBP] = regs->ebp;
env->regs[R_ESP] = regs->esp;
env->eip = regs->eip;
#endif
下面是设置中断和段,还有其他架构的信息,不贴啦
/*
主要是cpu_loop这个函数,分析在下面
*/
cpu_loop(env);
/* never exits */
return 0;
}

linux-user/main.c tcg_exec_init

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
/* Must be called before using the QEMU cpus. 'tb_size' is the size
(in bytes) allocated to the translation buffer. Zero means default
size. */
void tcg_exec_init(unsigned long tb_size)
{
/*
cpu_gen_init()调用tcg_context_init()对tcg_ctx进行初始化,在这个函数好像只设置了nb_globals和helpers,之后调用tcg_target_init设置寄存器?
*/
cpu_gen_init();
page_init();
tb_htable_init();
/*
code_gen_alloc为translate block分配缓冲区
*/
code_gen_alloc(tb_size);
#if defined(CONFIG_SOFTMMU)
/* There's no guest base to take into account, so go ahead and
initialize the prologue now. */
//初始化prologue
tcg_prologue_init(&tcg_ctx);
#endif
}

linux-user/main.c cpu_loop

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
void cpu_loop(CPUX86State *env)
{
CPUState *cs = CPU(x86_env_get_cpu(env));
int trapnr;
abi_ulong pc;
abi_ulong ret;
target_siginfo_t info;
for(;;) {
cpu_exec_start(cs); //加锁,表示cpu正在被占用
/*
调用cpu_x86_exec函数进行翻译,并根据相应的返回值做相应处理,如中断和系统调用
*/
trapnr = cpu_exec(cs);
cpu_exec_end(cs); //释放锁
switch(trapnr) {
//32位系统调用
case 0x80:
/* linux syscall from int $0x80 */
ret = do_syscall(env,
env->regs[R_EAX],
env->regs[R_EBX],
env->regs[R_ECX],
env->regs[R_EDX],
env->regs[R_ESI],
env->regs[R_EDI],
env->regs[R_EBP],
0, 0);
if (ret == -TARGET_ERESTARTSYS) {
env->eip -= 2;
} else if (ret != -TARGET_QEMU_ESIGRETURN) {
env->regs[R_EAX] = ret;
}
break;
#ifndef TARGET_ABI32
//64位系统调用
case EXCP_SYSCALL:
/* linux syscall from syscall instruction */
ret = do_syscall(env,
env->regs[R_EAX],
env->regs[R_EDI],
env->regs[R_ESI],
env->regs[R_EDX],
env->regs[10],
env->regs[8],
env->regs[9],
0, 0);
if (ret == -TARGET_ERESTARTSYS) {
env->eip -= 2;
} else if (ret != -TARGET_QEMU_ESIGRETURN) {
//结果存到eax
env->regs[R_EAX] = ret;
}
break;
#endif
case EXCP0B_NOSEG:
.....
default:
pc = env->segs[R_CS].base + env->eip;
EXCP_DUMP(env, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n",
(long)pc, trapnr);
abort();
}
process_pending_signals(env);
}
}

cpu-exec.c cpu_exec

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
/* main execution loop */
int cpu_exec(CPUState *cpu)
{
//CPUClass类似于c++的虚表,存放了一组函数指针
CPUClass *cc = CPU_GET_CLASS(cpu);
int ret;
SyncClocks sc;
//保存当前cpu状态
/* replay_interrupt may need current_cpu */
current_cpu = cpu;
......
//对env->eflags进行初始化
cc->cpu_exec_enter(cpu);
/* Calculate difference between guest clock and host clock.
* This delay includes the delay of the last cycle, so
* what we have to do is sleep until it is 0. As for the
* advance/delay we gain here, we try to fix it next time.
*/
//好像和时钟周期有关?
init_delay_params(&sc, cpu);
for(;;) {
/* prepare setjmp context for exception handling */
//正常情况下会进入if,不大懂可以查下sigsetjmp
if (sigsetjmp(cpu->jmp_env, 0) == 0) {
TranslationBlock *tb, *last_tb = NULL;
int tb_exit = 0;
//如果有异常发生,并且不能处理则跳出
/* if an exception is pending, we execute it here */
if (cpu_handle_exception(cpu, &ret)) {
break;
}
cpu->tb_flushed = false; /* reset before first TB lookup */
for(;;) {
//处理中断
cpu_handle_interrupt(cpu, &last_tb);
//负责找到下一个需要执行的tb
tb = tb_find_fast(cpu, &last_tb, tb_exit);
//执行找到的tb
cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
/* Try to align the host and virtual clocks
if the guest is in advance */
align_clocks(&sc, cpu);
} /* for(;;) */
} else {
#if defined(__clang__) || !QEMU_GNUC_PREREQ(4, 6)
/* Some compilers wrongly smash all local variables after
* siglongjmp. There were bug reports for gcc 4.5.0 and clang.
* Reload essential local variables here for those compilers.
* Newer versions of gcc would complain about this code (-Wclobbered). */
//说明是通过siglongjmp到达的,恢复cpu之前保存的信息
cpu = current_cpu;
cc = CPU_GET_CLASS(cpu);
#else /* buggy compiler */
/* Assert that the compiler does not smash local variables. */
g_assert(cpu == current_cpu);
g_assert(cc == CPU_GET_CLASS(cpu));
#endif /* buggy compiler */
cpu->can_do_io = 1;
tb_lock_reset();
}
} /* for(;;) */
cc->cpu_exec_exit(cpu);
rcu_read_unlock();
/* fail safe : never use current_cpu outside cpu_exec() */
current_cpu = NULL;
/* Does not need atomic_mb_set because a spurious wakeup is okay. */
atomic_set(&tcg_current_cpu, NULL);
return ret;
}

cpu-exec.c tb_find_fast

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
static inline TranslationBlock *tb_find_fast(CPUState *cpu,
TranslationBlock **last_tb,
int tb_exit)
{
CPUArchState *env = (CPUArchState *)cpu->env_ptr;
TranslationBlock *tb;
target_ulong cs_base, pc;
uint32_t flags;
/* we record a subset of the CPU state. It will
always be the same before a given translated block
is executed. */
//获得当前cpu相关信息 pc, cs_base, flags
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
//tb加锁
tb_lock();
//尝试以TB的pc为参数寻找已经翻译过的TB
tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)];
if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
tb->flags != flags)) {
/*没有找到则调用tb_find_slow 在tb_find_slow函数中会
以 pc(cs + eip)对应的物理地址寻找TB。如果找到则将TB写入tb_jmp_cache; 若否,則进行翻译。
*/
tb = tb_find_slow(cpu, pc, cs_base, flags);
}
if (cpu->tb_flushed) {
/* Ensure that no TB jump will be modified as the
* translation buffer has been flushed.
*/
*last_tb = NULL;
cpu->tb_flushed = false;
}
#ifndef CONFIG_USER_ONLY
/* We don't take care of direct jumps when address mapping changes in
* system emulation. So it's not safe to make a direct jump to a TB
* spanning two pages because the mapping for the second page can change.
*/
if (tb->page_addr[1] != -1) {
*last_tb = NULL;
}
#endif
/* See if we can patch the calling TB. */
if (*last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
//tb_add_jump(*last_tb, tb_exit, tb);
}
tb_unlock();
return tb;
}

cpu-exec.c tb_find_slow

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
static TranslationBlock *tb_find_slow(CPUState *cpu,
target_ulong pc,
target_ulong cs_base,
uint32_t flags)
{
TranslationBlock *tb;
tb = tb_find_physical(cpu, pc, cs_base, flags);
if (tb) {
//如果在对应物理地址中找到TB则跳转到found
goto found;
}
#ifdef CONFIG_USER_ONLY
/* mmap_lock is needed by tb_gen_code, and mmap_lock must be
* taken outside tb_lock. Since we're momentarily dropping
* tb_lock, there's a chance that our desired tb has been
* translated.
*/
tb_unlock();
mmap_lock();
tb_lock();
tb = tb_find_physical(cpu, pc, cs_base, flags);
if (tb) {
mmap_unlock();
goto found;
}
#endif
//没有找到则调用tb_gen_code进行翻译
/* if no translated code available, then translate it now */
tb = tb_gen_code(cpu, pc, cs_base, flags, 0);
#ifdef CONFIG_USER_ONLY
mmap_unlock();
#endif
found:
//将TB写入tb_jmp_cache
/* we add the TB in the virtual pc hash table */
cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
return tb;
}

translate-all.c tb_gen_code

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
/* Called with mmap_lock held for user mode emulation. */
TranslationBlock *tb_gen_code(CPUState *cpu,
target_ulong pc, target_ulong cs_base,
uint32_t flags, int cflags)
{
CPUArchState *env = cpu->env_ptr;
TranslationBlock *tb;
tb_page_addr_t phys_pc, phys_page2;
target_ulong virt_page2;
tcg_insn_unit *gen_code_buf;
int gen_code_size, search_size;
#ifdef CONFIG_PROFILER
int64_t ti;
#endif
/*
将虚拟地址转化为物理地址,用户模式下直接返回pc,系统模式下看不下去
*/
phys_pc = get_page_addr_code(env, pc);
if (use_icount && !(cflags & CF_IGNORE_ICOUNT)) {
cflags |= CF_USE_ICOUNT;
}
/*
分配TranslationBlock描述符,将要翻译的pc等信息记录下来
*/
tb = tb_alloc(pc);
if (unlikely(!tb)) {
buffer_overflow:
/* flush must be done */
/*
分配失败,可能满了,清空cache
*/
tb_flush(cpu);
/* cannot fail at this point */
tb = tb_alloc(pc);
assert(tb != NULL);
}
gen_code_buf = tcg_ctx.code_gen_ptr;
tb->tc_ptr = gen_code_buf;
tb->cs_base = cs_base;
tb->flags = flags;
tb->cflags = cflags;
.......
//初始化gen_opc_ptr 和 gen_opparam_ptr
tcg_func_start(&tcg_ctx);
tcg_ctx.cpu = ENV_GET_CPU(env);
//生成TCG操作码
gen_intermediate_code(env, tb);
tcg_ctx.cpu = NULL;
trace_translate_block(tb, tb->pc, tb->tc_ptr);
......
/* ??? Overflow could be handled better here. In particular, we
don't need to re-do gen_intermediate_code, nor should we re-do
the tcg optimization currently hidden inside tcg_gen_code. All
that should be required is to flush the TBs, allocate a new TB,
re-initialize it per above, and re-do the actual code generation. */
//将TCG操作码翻译成相应host的指令
gen_code_size = tcg_gen_code(&tcg_ctx, tb);
if (unlikely(gen_code_size < 0)) {
goto buffer_overflow;
}
......
return tb;
}

target-i386/translate.c gen_intermediate_code

这个函数看的好蒙啊,好多不懂的,简单来说就是调用了 disas_insn 函数把指令转化为 TCG 操作码

target-i386/translate.c disas_insn

因为对操作码的处理情况太多,所以只分析了一部分

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
case 2: /* call Ev */
/* XXX: optimize if memory (no 'and' is necessary) */
if (dflag == MO_16) {
tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
}
//next_eip
next_eip = s->pc - s->cs_base;
tcg_gen_movi_tl(cpu_T1, next_eip);
gen_push_v(s, cpu_T1);
gen_op_jmp_v(cpu_T0);
gen_bnd_jmp(s);
gen_eob(s);
break;
case 0xe8: /* call im */
{
if (dflag != MO_16) {
tval = (int32_t)insn_get(env, s, MO_32);
} else {
tval = (int16_t)insn_get(env, s, MO_16);
}
next_eip = s->pc - s->cs_base;
tval += next_eip;
if (dflag == MO_16) {
tval &= 0xffff;
} else if (!CODE64(s)) {
tval &= 0xffffffff;
}
tcg_gen_movi_tl(cpu_T0, next_eip);
gen_push_v(s, cpu_T0);
gen_bnd_jmp(s);
gen_jmp(s, tval);
}
break;

case 0xe8的情况下,加入是指令 8048902: call 80489d2,则它的机器指令为 E8 CB 00 00 00。 根据当前架构获得 tval = 0x000000cb, tcg_gen_movi_tl(cpu_T0, next_eip); 将下条指令保存到 cpu_T0, gen_push_T0(s) 将当前 CPU上 下文地址压入T0,保存调用前环境。
gen_jmp(s, tval) 跳转到目标函数入口地址。

×

纯属好玩

扫码支持
扫码打赏,你说多少就多少

打开支付宝扫一扫,即可进行扫码打赏哦

文章目录
  1. 1. 概览
  2. 2. 代码结构
  3. 3. TCG 执行流程
    1. 3.1. 前端实现
    2. 3.2. 后端实现
  4. 4. TB Cache
  5. 5. 相关数据结构
    1. 5.1. TCGContext
    2. 5.2. DisasContext
    3. 5.3. CPUState
    4. 5.4. image_info
    5. 5.5. TranslationBlock
  6. 6. 部分源码分析
    1. 6.1. linux-user/main.c main
    2. 6.2. linux-user/main.c tcg_exec_init
    3. 6.3. linux-user/main.c cpu_loop
    4. 6.4. cpu-exec.c cpu_exec
    5. 6.5. cpu-exec.c tb_find_fast
    6. 6.6. cpu-exec.c tb_find_slow
    7. 6.7. translate-all.c tb_gen_code
    8. 6.8. target-i386/translate.c gen_intermediate_code
    9. 6.9. target-i386/translate.c disas_insn
,