RISCV Linux kernel 启动代码分析之六:setup_vm_final分析
一
.
前言
前面我们分析了
setup_vm
以及
relocate
,并详细手算了对应的页表了解了此时页表映射了哪些虚拟地址到哪个物理地址。现在继续来看
setup_vm_final
该函数实现最终的映射。
二.
分析过程
该函数调用路径如下
head.S
的
relocate
之后
tail start_kernel->
start_kernel
(init/main.c)->
setup_arch
(arch/riscv/kernel/setup.c)->
paging_init
(arch/riscv/mm/init.c)->
setup_vm_final(arch/riscv/mm/init.c)
实现如下
static void __init setup_vm_final(void)
{
uintptr_t va, map_size;
phys_addr_t pa, start, end;
u64 i;
/**
* MMU is enabled at this point. But page table setup is not complete yet.
* fixmap page table alloc functions should be used at this point
*/
pt_ops.alloc_pte = alloc_pte_fixmap;
pt_ops.get_pte_virt = get_pte_virt_fixmap;
pt_ops.alloc_pmd = alloc_pmd_fixmap;
pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
/* Setup swapper PGD for fixmap */
create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
__pa_symbol(fixmap_pgd_next),
PGDIR_SIZE, PAGE_TABLE);
/* Map all memory banks */
for_each_mem_range(i, &start, &end) {
if (start >= end)
break;
if (start <= __pa(PAGE_OFFSET) &&
__pa(PAGE_OFFSET) < end)
start = __pa(PAGE_OFFSET);
map_size = best_map_size(start, end - start);
for (pa = start; pa < end; pa += map_size) {
va = (uintptr_t)__va(pa);
create_pgd_mapping(swapper_pg_dir, va, pa,
map_size, PAGE_KERNEL_EXEC);
}
}
/* Clear fixmap PTE and PMD mappings */
clear_fixmap(FIX_PTE);
clear_fixmap(FIX_PMD);
/* Move to swapper page table */
csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
local_flush_tlb_all();
/* generic page allocation functions must be used to setup page table */
pt_ops.alloc_pte = alloc_pte_late;
pt_ops.get_pte_virt = get_pte_virt_late;
pt_ops.alloc_pmd = alloc_pmd_late;
pt_ops.get_pmd_virt = get_pmd_virt_late;
}
2.1
接口设置
开始设置接口
/**
* MMU is enabled at this point. But page table setup is not complete yet.
* fixmap page table alloc functions should be used at this point
*/
pt_ops.alloc_pte = alloc_pte_fixmap;
pt_ops.get_pte_virt = get_pte_virt_fixmap;
pt_ops.alloc_pmd = alloc_pmd_fixmap;
pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
最后设置接口
/* generic page allocation functions must be used to setup page table */
pt_ops.alloc_pte = alloc_pte_late;
pt_ops.get_pte_virt = get_pte_virt_late;
pt_ops.alloc_pmd = alloc_pmd_late;
pt_ops.get_pmd_virt = get_pmd_virt_late;
2.2
配置根页表
swapper_pg_dir
首先配置根页表
swapper_pg_dir
等下会从
early_pg_dir
切换到该页表
/* Setup swapper PGD for fixmap */
create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
__pa_symbol(fixmap_pgd_next),
PGDIR_SIZE, PAGE_TABLE);
此时参数为
和
setup_vm
时一样,
swapper_pg_dir
的
315
条目指向下一级
fixmap_pmd
执行完后
GDB
查看如下,
[315]
位置的条目对应
fixmap_pmd
(gdb) p /x swapper_pg_dir
$1 = {{pgd = 0x0}
times >, {pgd = 0x2075e801}, {pgd = 0x0}times>} (gdb)
2.3
映射
bank
/* Map all memory banks */
for_each_mem_range(i, &start, &end) {
if (start >= end)
break;
if (start <= __pa(PAGE_OFFSET) &&
__pa(PAGE_OFFSET) < end)
start = __pa(PAGE_OFFSET);
map_size = best_map_size(start, end - start);
for (pa = start; pa < end; pa += map_size) {
va = (uintptr_t)__va(pa);
create_pgd_mapping(swapper_pg_dir, va, pa,
map_size, PAGE_KERNEL_EXEC);
}
}
for_each_mem_range
遍历所有块映射。
第一次,此时映射的范围是
0x80200000~0x88000000
按照
2MB
单位进行映射
此时
alloc_pgd_next
pt_ops.alloc_pmd(__va)
pt_ops.alloc_pmd = alloc_pmd_fixmap;
static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va)
{
return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
}
动态分配的
pmd
地址为
0x87fff000
sz
为
2M
条目值为
0x21fffc01
0x87fff000
转为虚拟地址
nextp=0xffffffcefeffe000
继续下一级
pdm
条目的配置,指向对应的
2MB
物理地址
然后继续
2MB
配置,直到
0x8800000
动态分配的
pmd
物理地址为
0x87fff000
虚拟地址计算接口如下
pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
static pmd_t *__init get_pmd_virt_fixmap(phys_addr_t pa)
{
clear_fixmap(FIX_PMD);
return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
}
__set_fixmap_offset(idx, phys, FIXMAP_PAGE_NORMAL)
/* Return a pointer with offset calculated */
({ \
unsigned long ________addr; \
__set_fixmap(idx, phys, flags); \
________addr = fix_to_virt(idx) + ((phys) & (PAGE_SIZE - 1)); \
________addr; \
})
fix_to_virt
static __always_inline unsigned long fix_to_virt(const unsigned int idx)
{
BUILD_BUG_ON(idx >= __end_of_fixed_addresses);
return __fix_to_virt(idx);
}
其中
void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
{
unsigned long addr = __fix_to_virt(idx);
pte_t *ptep;
BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
ptep = &fixmap_pte[pte_index(addr)];
if (pgprot_val(prot))
set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot));
else
pte_clear(&init_mm, addr, ptep);
local_flush_tlb_page(addr);
}
通过
fixmap_pte
临时映射虚拟地址,对应如下地址,解决此时只能访问虚拟地址不能访问物理地址的问题
即将
0x87fff000
映射到了上述
FIX_PMD
页,然后通过去虚拟地址就可以访问该物理地址了。
#define __fix_to_virt(x)
(FIXADDR_TOP - ((x) << PAGE_SHIFT))
x
为
FIX_PMD=2
FIXADDR_TOP=0xffffffcefee00000+0x200000=0xffffffceff000000
0xffffffceff000000-(2<<12)=0xFFFF FFCE FEFF E000
get_pmd_virt_fixmap
->set_fixmap_offset
此时查看该
pmd
的内容
p /x *(pmd_t (*)[512])(0xffffffcefeffe000)
$38 = {{pmd = 0x200800ef}, {pmd = 0x201000ef}, {pmd = 0x201800ef}, {pmd = 0x202000ef}, {pmd = 0x202800ef}, {pmd = 0x203000ef}, {
pmd = 0x203800ef}, {pmd = 0x204000ef}, {pmd = 0x204800ef}, {pmd = 0x205000ef}, {pmd = 0x205800ef}, {pmd = 0x206000ef}, {
pmd = 0x206800ef}, {pmd = 0x207000ef}, {pmd = 0x207800ef}, {pmd = 0x208000ef}, {pmd = 0x208800ef}, {pmd = 0x209000ef}, {
pmd = 0x209800ef}, {pmd = 0x20a000ef}, {pmd = 0x20a800ef}, {pmd = 0x20b000ef}, {pmd = 0x20b800ef}, {pmd = 0x20c000ef}, {
pmd = 0x20c800ef}, {pmd = 0x20d000ef}, {pmd = 0x20d800ef}, {pmd = 0x20e000ef}, {pmd = 0x20e800ef}, {pmd = 0x20f000ef}, {
pmd = 0x20f800ef}, {pmd = 0x210000ef}, {pmd = 0x210800ef}, {pmd = 0x211000ef}, {pmd = 0x211800ef}, {pmd = 0x212000ef}, {
pmd = 0x212800ef}, {pmd = 0x213000ef}, {pmd = 0x213800ef}, {pmd = 0x214000ef}, {pmd = 0x214800ef}, {pmd = 0x215000ef}, {
pmd = 0x215800ef}, {pmd = 0x216000ef}, {pmd = 0x216800ef}, {pmd = 0x217000ef}, {pmd = 0x217800ef}, {pmd = 0x218000ef}, {
pmd = 0x218800ef}, {pmd = 0x219000ef}, {pmd = 0x219800ef}, {pmd = 0x21a000ef}, {pmd = 0x21a800ef}, {pmd = 0x21b000ef}, {
pmd = 0x21b800ef}, {pmd = 0x21c000ef}, {pmd = 0x21c800ef}, {pmd = 0x21d000ef}, {pmd = 0x21d800ef}, {pmd = 0x21e000ef}, {
pmd = 0x21e800ef}, {pmd = 0x21f000ef}, {pmd = 0x21f800ef}, {pmd = 0x0}
449 times>}(gdb)
以上看到
看到映射了
63
个
2MB
的块
一共
126M
刚好是 0x80200000~0x88000000 的范围 126MB 。
2.4
清除
fixmap
的
pte
级别条目
(FIX_PTE
和
FIX_PMD
对应的页
)
/* Clear fixmap PTE and PMD mappings */
clear_fixmap(FIX_PTE);
clear_fixmap(FIX_PMD);
其中
include/asm-generic/fixmap.h
__set_fixmap(idx, 0, FIXMAP_PAGE_CLEAR)
arch/riscv/mm/init.c
中
void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
{
unsigned long addr = __fix_to_virt(idx);
pte_t *ptep;
BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
ptep = &fixmap_pte[pte_index(addr)];
if (pgprot_val(prot))
set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot));
else
pte_clear(&init_mm, addr, ptep);
local_flush_tlb_page(addr);
}
FIX_PTE=1
FIX_PMD=2
FIX_PTE
和
FIX_PMD
对应如下的页表,
PTE
是
PMD
的后级,所以先清除
PTE
条目,再清除
PMD
条目
对应
__set_fixmap(1,0,0)
__set_fixmap(2,0,0)
clean 前 PMD 下有一个条目
(gdb) p /x fixmap_pmd
$1 = {{pmd = 0x0}
times >, {pmd = 0x2075f001}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}}
(gdb)
Pte
下有一个条目
(gdb) p /x fixmap_pte
$2 = {{pte = 0x0}
times >, {pte = 0x21fffce7}, {pte = 0x0}}(gdb)
来看
__set_fixmap
实现
unsigned long addr = __fix_to_virt(idx);
include/asm-generic/fixmap.h
中
#define __fix_to_virt(x)
(FIXADDR_TOP - ((x) << PAGE_SHIFT))
FIXADDR_TOP=0xffffffcefee00000+0x200000
所以
addr
在
PTE
和
PMD
时分别是
0xffffffcefee00000+0x200000-(1<<12)=FFFFFFCEFEFFF000
0xffffffcefee00000+0x200000-(2<<12)=FFFFFFCEFEFFE000
然后检查
BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
页索引要在以下范围内
即
include/asm-generic/fixmap.h
中的宏
enum fixed_addresses {
FIX_HOLE,
FIX_PTE,
FIX_PMD,
FIX_TEXT_POKE1,
FIX_TEXT_POKE0,
FIX_EARLYCON_MEM_BASE,
__end_of_permanent_fixed_addresses,
/*
* Temporary boot-time mappings, used by early_ioremap(),
* before ioremap() is functional.
*/
FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
__end_of_fixed_addresses
};
然后
ptep = &fixmap_pte[pte_index(addr)];
include/linux/pgtable.h
中
PAGE_SHIFT=12
,
PTRS_PER_PTE=512
static inline unsigned long pte_index(unsigned long address)
{
return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
}
所以
pte_index(
0xFFFFFFCEFEFFF000
)
=511
pte_index(
0xFFFFFFCEFEFFE000
)
=510
继续
prot
为
0
,所以走
pte_clear(&init_mm, addr, ptep);
arch/riscv/include/asm/pgtable.h 中
static inline void pte_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
set_pte_at(mm, addr, ptep, __pte(0));
}
arch/riscv/include/asm/pgtable.h
中
static inline void set_pte_at(struct mm_struct *mm,
unsigned long addr, pte_t *ptep, pte_t pteval)
{
if (pte_present(pteval) && pte_exec(pteval))
flush_icache_pte(pteval);
set_pte(ptep, pteval);
}
static inline void set_pte(pte_t *ptep, pte_t pteval)
{
*ptep = pteval;
}
即将
ptep
设置为
0.
即
fixmap_pte[
511
]
=0
fixmap_pte[
510
]
=0
最后
local_flush_tlb_page
调用
sfence.vma
刷新
tlb
执行完这两句后,看到
fixmap_pte[511]
变为了
0.
(gdb) p /x fixmap_pmd
$4 = {{pmd = 0x0}
times >, {pmd = 0x2075f001}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}}
(gdb)
(gdb) p /x fixmap_pte
$5 = {{pte = 0x0}
times >}(gdb)
2.5
切换页表
然后切换
satp
到
swapper_pg_dir
/* Move to swapper page table */
csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
local_flush_tlb_all();
arch/riscv/include/asm/page.h
中
#define __pa_symbol(x)
__phys_addr_symbol(RELOC_HIDE((unsigned long)(x), 0))
#define __phys_addr_symbol(x)
__va_to_pa_nodebug(x)
#define __va_to_pa_nodebug(x)
((unsigned long)(x) - va_pa_offset)
所以
__pa_symbol(swapper_pg_dir)
计算物理地址就是
&swapper_pg_dir-va_pa_offset
对应汇编代码如下
此时
swapper_pg_dir
地址为
0xffffffe001b7e000
对应寄存器
a5
(gdb) p &swapper_pg_dir
$1 = (pgd_t (*)[512]) 0xffffffe001b7e000
(gdb)
变量
va_pa_offset
的值
0xffffffdf7fe00000
为对应寄存器
a4
实际就是
(PAGE_OFFSET-load_pa=0xffffffe000000000-0x80200000).
(gdb) p /x va_pa_offset
$1 = 0xffffffdf7fe00000
(gdb)
计算完后值为
0x81d7e000
即
0xffffffe001b7e000
-
0xffffffdf7fe00000
include/linux/pfn.h
中
#define PFN_DOWN(x)
((x) >> PAGE_SHIFT)
arch/riscv/include/asm/csr.h
中
#define SATP_MODE_39
_AC(0x8000000000000000, UL)
#define SATP_MODE
SATP_MODE_39
所以写入
satp
寄存器的值是
(0x81d7e000>>12)|
0x8000000000000000
=0x8000000000081d7e
然后
local_flush_tlb_all();
即调用
sfence.vma
刷新
tlb
。
arch/riscv/include/asm/tlbflush.h
中
/* Flush one page from local TLB */
static inline void local_flush_tlb_page(unsigned long addr)
{
csr_write(CSR_SMCIR, 1 << 26);
__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory");
}
三.
设置之后页表
借助上一篇,我们实现了在
mmu
使能后继续使用
GDB
调试,我们可以直接跳到
setup_vm_final
,一步步查看执行过程分析。
hb setup_vm_final
打断点到函数入口
C
全速运行到函数处
可以运行完后查看页表信息
(gdb) p &swapper_pg_dir
$1 = (pgd_t (*)[512]) 0xffffffe001b7e000
(gdb)
(gdb) p /x swapper_pg_dir
$3 = {{pgd = 0x0}
times >, {pgd = 0x2075e801}, {pgd = 0x0}times>, {pgd = 0x21fffc01}, { pgd = 0x0}
times >}(gdb)
swapper_pg_dir
[315]
指向的正是
fixmap_pmd
(gdb) p &fixmap_pmd
$4 = (pmd_t (*)[512]) 0xffffffe001b7a000
(gdb)
(gdb) p /x fixmap_pmd
$5 = {{pmd = 0x0}
times >, {pmd = 0x2075f001}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}}
(gdb)
指向的
fixmap_pmd[503]
正是
fixmap_pte
(gdb) p & fixmap_pte
$6 = (pte_t (*)[512]) 0xffffffe001b7c000
(gdb)
(gdb) p /x fixmap_pte
$7 = {{pte = 0x0}
times >}(gdb)
fixmap_pte
后没有映射了,前面看到
clean
了。
swapper_pg_dir
[384]
条目值是
((x>>12 ) <<10)| 1=
0x21fffc01
所以反推对应的
pdb
页表地址为
0x87FFF000
。
此处对应的虚拟地址为
0xffffffcefeffe000
PA-VA
的计算方式前面已经分析
通过
fixmap_pte[FIX_PMD]
映射对应物理地址
0x87fff000
,虚拟地址是
0xffffffcefeffe000
。
p /x *(pmd_t (*)[512])(0xffffffcefeffe000)
$38 = {{pmd = 0x200800ef}, {pmd = 0x201000ef}, {pmd = 0x201800ef}, {pmd = 0x202000ef}, {pmd = 0x202800ef}, {pmd = 0x203000ef}, {
pmd = 0x203800ef}, {pmd = 0x204000ef}, {pmd = 0x204800ef}, {pmd = 0x205000ef}, {pmd = 0x205800ef}, {pmd = 0x206000ef}, {
pmd = 0x206800ef}, {pmd = 0x207000ef}, {pmd = 0x207800ef}, {pmd = 0x208000ef}, {pmd = 0x208800ef}, {pmd = 0x209000ef}, {
pmd = 0x209800ef}, {pmd = 0x20a000ef}, {pmd = 0x20a800ef}, {pmd = 0x20b000ef}, {pmd = 0x20b800ef}, {pmd = 0x20c000ef}, {
pmd = 0x20c800ef}, {pmd = 0x20d000ef}, {pmd = 0x20d800ef}, {pmd = 0x20e000ef}, {pmd = 0x20e800ef}, {pmd = 0x20f000ef}, {
pmd = 0x20f800ef}, {pmd = 0x210000ef}, {pmd = 0x210800ef}, {pmd = 0x211000ef}, {pmd = 0x211800ef}, {pmd = 0x212000ef}, {
pmd = 0x212800ef}, {pmd = 0x213000ef}, {pmd = 0x213800ef}, {pmd = 0x214000ef}, {pmd = 0x214800ef}, {pmd = 0x215000ef}, {
pmd = 0x215800ef}, {pmd = 0x216000ef}, {pmd = 0x216800ef}, {pmd = 0x217000ef}, {pmd = 0x217800ef}, {pmd = 0x218000ef}, {
pmd = 0x218800ef}, {pmd = 0x219000ef}, {pmd = 0x219800ef}, {pmd = 0x21a000ef}, {pmd = 0x21a800ef}, {pmd = 0x21b000ef}, {
pmd = 0x21b800ef}, {pmd = 0x21c000ef}, {pmd = 0x21c800ef}, {pmd = 0x21d000ef}, {pmd = 0x21d800ef}, {pmd = 0x21e000ef}, {
pmd = 0x21e800ef}, {pmd = 0x21f000ef}, {pmd = 0x21f800ef}, {pmd = 0x0}
449 times>}(gdb)
可以看到最终页表如下
其中
pmd_t xxx_pmd[PTRS_PER_PMD]
0x87fff000
是动态分配出来的
PMD
该
PMD
要通过虚拟地址访问,则需要先对齐进行映射,这是通过
pmd_t fixmap_pmd[PTRS_PER_PMD]
下映射
pte_t fixmap_pte[PTRS_PER_PTE]
下映射一个
4KB
的页来实现的,这个映射是临时的,访问完
xxx_pmd
即可
clean
。
四.
总结
setup_vm_final
最终切换到了
swapper_pg_dir
这个页表,映射了
PAGE_OFFSET
即
0xffffffe000000000
开始的
126MB
到
0x80200000
开始的
126MB
物理地址。
进行上述映射动态分配了
xxx_pmd
需要访问该地址,则现在使能了
MMU
不能直接访问物理地址,所以需要先借助
fixmap_pmd->fixmap_pte
临时映射一个
xxx_pmd
物理地址对应的虚拟地址,以便设置
xxx_pmd
的内容。这就是
fixmap_pmd/pte
的作用。