本帖最后由 cruelfox 于 2021-6-12 22:36 编辑
用仿真耗时和"Total cycle_count"表达的仿真过程主时钟周期数进行折算,相当于 E203 SOC 的仿真运行频率只有 800Hz 左右.
这样的仿真虽然耗时,但因为可以查看每个信号的状态,适合查错用。上面我试的仿真内容是验证 add 指令,其测试代码(RISC-V指令)是用汇编语言写的,而且用了宏定义,故不熟悉指令架构就难以明白其意图。例如,这是汇编文件片段:
#-------------------------------------------------------------
# Arithmetic tests
#-------------------------------------------------------------
TEST_RR_OP( 2, add, 0x00000000, 0x00000000, 0x00000000 );
TEST_RR_OP( 3, add, 0x00000002, 0x00000001, 0x00000001 );
TEST_RR_OP( 4, add, 0x0000000a, 0x00000003, 0x00000007 );
TEST_RR_OP( 5, add, 0xffffffffffff8000, 0x0000000000000000, 0xffffffffffff8000 );
TEST_RR_OP( 6, add, 0xffffffff80000000, 0xffffffff80000000, 0x00000000 );
TEST_RR_OP( 7, add, 0xffffffff7fff8000, 0xffffffff80000000, 0xffffffffffff8000 );
TEST_RR_OP( 8, add, 0x0000000000007fff, 0x0000000000000000, 0x0000000000007fff );
TEST_RR_OP( 9, add, 0x000000007fffffff, 0x000000007fffffff, 0x0000000000000000 );
TEST_RR_OP( 10, add, 0x0000000080007ffe, 0x000000007fffffff, 0x0000000000007fff );
TEST_RR_OP( 11, add, 0xffffffff80007fff, 0xffffffff80000000, 0x0000000000007fff );
TEST_RR_OP( 12, add, 0x000000007fff7fff, 0x000000007fffffff, 0xffffffffffff8000 );
TEST_RR_OP( 13, add, 0xffffffffffffffff, 0x0000000000000000, 0xffffffffffffffff );
TEST_RR_OP( 14, add, 0x0000000000000000, 0xffffffffffffffff, 0x0000000000000001 );
TEST_RR_OP( 15, add, 0xfffffffffffffffe, 0xffffffffffffffff, 0xffffffffffffffff );
generated 目录下生成的机器码包括仿真用的 verilog 文件(相当于单片机烧写用的 .hex 文件的作用),编译得到的 .elf 文件,以及反汇编 ELF 文件的结果 *.dump.
看一下 .dump 文件的内容:很长,不能贴全在这里了
Disassembly of section .text.init:
80000000 <_start>:
80000000: aa0d j 80000132 <reset_vector>
80000002: 0001 nop
80000004 <trap_vector>:
80000004: 34051073 csrw mscratch,a0
80000008: 00002517 auipc a0,0x2
8000000c: ff850513 addi a0,a0,-8 # 80002000 <test_trap_data>
80000010: 01e52023 sw t5,0(a0)
80000014: 01f52223 sw t6,4(a0)
80000018: 34202f73 csrr t5,mcause
8000001c: 040f4163 bltz t5,8000005e <other_interrupts>
80000020: 4fa1 li t6,8
80000022: 07ff0263 beq t5,t6,80000086 <write_tohost>
80000026: 4fa5 li t6,9
80000028: 05ff0f63 beq t5,t6,80000086 <write_tohost>
8000002c: 4fad li t6,11
8000002e: 05ff0c63 beq t5,t6,80000086 <write_tohost>
80000032: 4f85 li t6,1
80000034: 0bff0563 beq t5,t6,800000de <ifetch_error_handler>
80000038: 4f95 li t6,5
8000003a: 0dff0063 beq t5,t6,800000fa <load_error_handler>
8000003e: 4f9d li t6,7
80000040: 0dff0b63 beq t5,t6,80000116 <store_error_handler>
80000044: 80000f17 auipc t5,0x80000
80000048: fbcf0f13 addi t5,t5,-68 # 0 <_start-0x80000000>
8000004c: 000f0363 beqz t5,80000052 <trap_vector+0x4e>
80000050: 8f02 jr t5
80000052: 34202f73 csrr t5,mcause
80000056: 000f5363 bgez t5,8000005c <handle_exception>
8000005a: a009 j 8000005c <handle_exception>
8000005c <handle_exception>:
8000005c: a01d j 80000082 <other_interrupts+0x24>
8000005e <other_interrupts>:
8000005e: 80000fb7 lui t6,0x80000
80000062: 003f8f93 addi t6,t6,3 # 80000003 <_end+0xffffdff3>
80000066: 05ff0463 beq t5,t6,800000ae <sft_irq_handler>
8000006a: 80000fb7 lui t6,0x80000
8000006e: 007f8f93 addi t6,t6,7 # 80000007 <_end+0xffffdff7>
80000072: 05ff0a63 beq t5,t6,800000c6 <tmr_irq_handler>
80000076: 80000fb7 lui t6,0x80000
8000007a: 00bf8f93 addi t6,t6,11 # 8000000b <_end+0xffffdffb>
8000007e: 01ff0c63 beq t5,t6,80000096 <ext_irq_handler>
80000082: 5391e193 ori gp,gp,1337
80000086 <write_tohost>:
80000086: 4521 li a0,8
80000088: 30052073 csrs mstatus,a0
8000008c: 00001f17 auipc t5,0x1
80000090: f63f2a23 sw gp,-140(t5) # 80001000 <tohost>
80000094: bfcd j 80000086 <write_tohost>
80000096 <ext_irq_handler>:
80000096: 00002517 auipc a0,0x2
8000009a: f6a50513 addi a0,a0,-150 # 80002000 <test_trap_data>
8000009e: 00052f03 lw t5,0(a0)
800000a2: 00452f83 lw t6,4(a0)
800000a6: 34002573 csrr a0,mscratch
800000aa: 30200073 mret
800000ae <sft_irq_handler>:
800000ae: 00002517 auipc a0,0x2
800000b2: f5250513 addi a0,a0,-174 # 80002000 <test_trap_data>
800000b6: 00052f03 lw t5,0(a0)
800000ba: 00452f83 lw t6,4(a0)
800000be: 34002573 csrr a0,mscratch
800000c2: 30200073 mret
......
80000132 <reset_vector>:
80000132: 00000f13 li t5,0
80000136: 00000f93 li t6,0
8000013a: f1402573 csrr a0,mhartid
8000013e: e101 bnez a0,8000013e <reset_vector+0xc>
80000140: 4181 li gp,0
80000142: 00000297 auipc t0,0x0
80000146: ec228293 addi t0,t0,-318 # 80000004 <trap_vector>
8000014a: 4521 li a0,8
8000014c: 30052073 csrs mstatus,a0
80000150: fff00513 li a0,-1
80000154: 30452073 csrs mie,a0
80000158: 30529073 csrw mtvec,t0
8000015c <post_mtvec>:
8000015c: 80000297 auipc t0,0x80000
80000160: ea428293 addi t0,t0,-348 # 0 <_start-0x80000000>
80000164: 00028e63 beqz t0,80000180 <post_mtvec+0x24>
80000168: 10529073 csrw stvec,t0
8000016c: 0000b2b7 lui t0,0xb
80000170: 10928293 addi t0,t0,265 # b109 <_start-0x7fff4ef7>
80000174: 30229073 csrw medeleg,t0
80000178: 30202373 csrr t1,medeleg
8000017c: ee6290e3 bne t0,t1,8000005c <handle_exception>
80000180: 30005073 csrwi mstatus,0
80000184: 08000513 li a0,128
80000188: 30052073 csrs mstatus,a0
8000018c: 4501 li a0,0
8000018e: bfc51073 csrw 0xbfc,a0
80000192: 0000100f fence.i
80000196: 000012b7 lui t0,0x1
8000019a: a0028293 addi t0,t0,-1536 # a00 <_start-0x7ffff600>
8000019e <waitloop1>:
8000019e: 12fd addi t0,t0,-1
800001a0: fe029fe3 bnez t0,8000019e <waitloop1>
800001a4: 100083b7 lui t2,0x10008
800001a8: 00838393 addi t2,t2,8 # 10008008 <_start-0x6fff7ff8>
800001ac: 0003a283 lw t0,0(t2)
800001b0: 00040337 lui t1,0x40
800001b4: fff34313 not t1,t1
800001b8: 0062f2b3 and t0,t0,t1
800001bc: 0053a023 sw t0,0(t2)
800001c0: 40000293 li t0,1024
800001c4 <waitloop2>:
800001c4: 12fd addi t0,t0,-1
800001c6: 0003ae03 lw t3,0(t2)
800001ca: fe029de3 bnez t0,800001c4 <waitloop2>
800001ce: 0003a283 lw t0,0(t2)
800001d2: 00040337 lui t1,0x40
800001d6: 0062e2b3 or t0,t0,t1
800001da: 0053a023 sw t0,0(t2)
......
80000262 <test_2>:
80000262: 4081 li ra,0
80000264: 4101 li sp,0
80000266: 00208f33 add t5,ra,sp
8000026a: 4e81 li t4,0
8000026c: 4189 li gp,2
8000026e: 37df1d63 bne t5,t4,800005e8 <fail>
80000272 <test_3>:
80000272: 4085 li ra,1
80000274: 4105 li sp,1
80000276: 00208f33 add t5,ra,sp
8000027a: 4e89 li t4,2
8000027c: 418d li gp,3
8000027e: 37df1563 bne t5,t4,800005e8 <fail>
80000282 <test_4>:
80000282: 408d li ra,3
80000284: 411d li sp,7
80000286: 00208f33 add t5,ra,sp
8000028a: 4ea9 li t4,10
8000028c: 4191 li gp,4
8000028e: 35df1d63 bne t5,t4,800005e8 <fail>
......
我尚不熟悉RISC-V的指令集,因此大部分都看不懂。
这段代码是直接放到 E203 SOC 的 ITCM 中的,因此一启动就执行了。它包含了中断向量的处理,是一个完整的程序,只是并不像我们做MCU程序要操作片上设备以实现功能。
想查看仿真的细节,用verilog系统的dump功能就可以。在 tb_top.v 中加这两行,将 e_203_cpu 这个模块以及其下级的信号导出成vcd文件:
$dumpfile("e203test.vcd");
$dumpvars(0, u_e203_soc_top.u_e203_subsys_top.u_e203_subsys_main.u_e203_cpu_top.u_e203_cpu);
再重新做仿真,就会每次都生成 e203test.vcd 这个文件(几百MB,是纯文本文件)。然后可以用 gtkwave 这个软件来看波形了:
这样可以观看 CPU 是如何取指令,如何执行指令等等的每一步状态变化,可以结合源代码阅读,帮助理解E203 RISC-V CPU的设计。
既然 E203 SOC 整体都被仿真了,要写程序控制 GPIO、UART 也是可以做到,并且从仿真可以观察到的。只是,那样就适合用 FPGA 在电路中验证。
|