Linux/ARM Page Table Entry 属性设置分析
注:该博文写的比较凌乱,需要以后再修改。
Linux 的页表项总体可以划分为两部分,一部分是物理映射地址,另一部分是对物理地址对应的访问属性。在Linux/ARM的Kernel中,为各式各样的内存属性与定义了一下条目:
Linux 的页表项总体可以划分为两部分,一部分是物理映射地址,另一部分是对物理地址对应的访问属性。在Linux/ARM的Kernel中,为各式各样的内存属性与定义了一下条目:
198 static struct mem_type mem_types[] = {
199 [MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */
200 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
201 L_PTE_SHARED,
202 .prot_l1 = PMD_TYPE_TABLE,
203 .prot_sect = PROT_SECT_DEVICE | PMD_SECT_S,
204 .domain = DOMAIN_IO,
205 },
206 [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */
207 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED,
208 .prot_l1 = PMD_TYPE_TABLE,
209 .prot_sect = PROT_SECT_DEVICE,
210 .domain = DOMAIN_IO,
211 },
212 [MT_DEVICE_CACHED] = { /* ioremap_cached */
213 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED,
214 .prot_l1 = PMD_TYPE_TABLE,
215 .prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB,
216 .domain = DOMAIN_IO,
217 },
218 [MT_DEVICE_WC] = { /* ioremap_wc */
219 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC,
220 .prot_l1 = PMD_TYPE_TABLE,
221 .prot_sect = PROT_SECT_DEVICE,
222 .domain = DOMAIN_IO,
223 },
224 [MT_UNCACHED] = {
225 .prot_pte = PROT_PTE_DEVICE,
226 .prot_l1 = PMD_TYPE_TABLE,
227 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
228 .domain = DOMAIN_IO,
229 },
230 [MT_CACHECLEAN] = {
231 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
232 .domain = DOMAIN_KERNEL,
233 },
234 #ifndef CONFIG_ARM_LPAE
235 [MT_MINICLEAN] = {
236 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE,
237 .domain = DOMAIN_KERNEL,
238 },
239 #endif
240 [MT_LOW_VECTORS] = {
241 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
242 L_PTE_RDONLY,
243 .prot_l1 = PMD_TYPE_TABLE,
244 .domain = DOMAIN_USER,
245 },
246 [MT_HIGH_VECTORS] = {
247 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
248 L_PTE_USER | L_PTE_RDONLY,
249 .prot_l1 = PMD_TYPE_TABLE,
250 .domain = DOMAIN_USER,
251 },
252 [MT_MEMORY] = {
253 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
254 .prot_l1 = PMD_TYPE_TABLE,
255 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
256 .domain = DOMAIN_KERNEL,
257 },
258 [MT_ROM] = {
259 .prot_sect = PMD_TYPE_SECT,
260 .domain = DOMAIN_KERNEL,
261 },
262 [MT_MEMORY_NONCACHED] = {
263 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
264 L_PTE_MT_BUFFERABLE,
265 .prot_l1 = PMD_TYPE_TABLE,
266 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
267 .domain = DOMAIN_KERNEL,
268 },
269 [MT_MEMORY_DTCM] = {
270 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
271 L_PTE_XN,
272 .prot_l1 = PMD_TYPE_TABLE,
273 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
274 .domain = DOMAIN_KERNEL,
275 },
276 [MT_MEMORY_ITCM] = {
277 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
278 .prot_l1 = PMD_TYPE_TABLE,
279 .domain = DOMAIN_KERNEL,
280 },
281 [MT_MEMORY_SO] = {
282 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
283 L_PTE_MT_UNCACHED,
284 .prot_l1 = PMD_TYPE_TABLE,
285 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_S |
286 PMD_SECT_UNCACHED | PMD_SECT_XN,
287 .domain = DOMAIN_KERNEL,
288 },
289 };
在以后的建立和维护页表中,Linux Kernel只是在以上各种类型的基础上添加或者删除一些属性使用。比如252~257行的MT_MEMORY属性,在初始化的时候,还会在build_mem_type_table()函数中根据处理器架构的不同,“Adjust the PMD section entries according to the CPU in use”,其函数关键代码为:
paging_init->build_mem_type_ table
300 static void __init build_mem_type_table(void)
301 {
302 struct cachepolicy *cp;
303 unsigned int cr = get_cr();//System control register
304 pteval_t user_pgprot, kern_pgprot, vecs_pgprot;
305 int cpu_arch = cpu_architecture();
306 int i;
307
...
322 if (is_smp())
323 cachepolicy = CPOLICY_WRITEALLOC;// if write miss, first write block to memory ,then read to cache.
...
411 /*
412 * Now deal with the memory-type mappings
413 */
414 cp = &cache_policies[cachepolicy];
415 vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;
...
436 if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) {
448 /*
449 * Mark memory with the "shared" attribute
450 * for SMP systems
451 */
452 user_pgprot |= L_PTE_SHARED;
453 kern_pgprot |= L_PTE_SHARED;
454 vecs_pgprot |= L_PTE_SHARED;
455 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S;
456 mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED;
457 mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S;
458 mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED;
459 mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
460 mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED;
461 mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
462 mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED;
463 }
464 }
...
470 if (cpu_arch >= CPU_ARCH_ARMv6) {
481 mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;//
482 }
483
...
447 if (is_smp()) {448 /*
449 * Mark memory with the "shared" attribute
450 * for SMP systems
451 */
452 user_pgprot |= L_PTE_SHARED;
453 kern_pgprot |= L_PTE_SHARED;
454 vecs_pgprot |= L_PTE_SHARED;
455 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S;
456 mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED;
457 mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S;
458 mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED;
459 mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
460 mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED;
461 mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
462 mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED;
463 }
464 }
...
470 if (cpu_arch >= CPU_ARCH_ARMv6) {
...
480 } else {481 mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;//
482 }
483
...
496 for (i = 0; i < 16; i++) {
497 unsigned long v = pgprot_val(protection_map[i]);
498 protection_map[i] = __pgprot(v | user_pgprot);
499 }
500
501 mem_types[MT_LOW_VECTORS]. prot_pte |= vecs_pgprot;
502 mem_types[MT_HIGH_VECTORS]. prot_pte |= vecs_pgprot;
503
504 pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
505 pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
506 L_PTE_DIRTY | kern_pgprot);
507
508 mem_types[MT_LOW_VECTORS]. prot_l1 |= ecc_mask;
509 mem_types[MT_HIGH_VECTORS]. prot_l1 |= ecc_mask;
510 mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd;
511 mem_types[MT_MEMORY].prot_pte |= kern_pgprot;
512 mem_types[MT_MEMORY_NONCACHED] .prot_sect |= ecc_mask;
513 mem_types[MT_ROM].prot_sect |= cp->pmd;
514
515 switch (cp->pmd) {
516 case PMD_SECT_WT:
517 mem_types[MT_CACHECLEAN].prot_ sect |= PMD_SECT_WT;
518 break;
519 case PMD_SECT_WB:
520 case PMD_SECT_WBWA:
521 mem_types[MT_CACHECLEAN].prot_ sect |= PMD_SECT_WB;
522 break;
523 }
524 printk("Memory policy: ECC %sabled, Data cache %s\n",
525 ecc_mask ? "en" : "dis", cp->policy);
526
527 for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
528 struct mem_type *t = &mem_types[i];
529 if (t->prot_l1)
530 t->prot_l1 |= PMD_DOMAIN(t->domain);
531 if (t->prot_sect)
532 t->prot_sect |= PMD_DOMAIN(t->domain);
533 }
534 }
535
414行cache_policies的相关定义:
68 static struct cachepolicy cache_policies[] __initdata = {
69 {
70 .policy = "uncached",
71 .cr_mask = CR_W|CR_C,
72 .pmd = PMD_SECT_UNCACHED,
73 .pte = L_PTE_MT_UNCACHED,
74 }, {
75 .policy = "buffered",
76 .cr_mask = CR_C,
77 .pmd = PMD_SECT_BUFFERED,
78 .pte = L_PTE_MT_BUFFERABLE,
79 }, {
80 .policy = "writethrough",
81 .cr_mask = 0,
82 .pmd = PMD_SECT_WT,
83 .pte = L_PTE_MT_WRITETHROUGH,
84 }, {
85 .policy = "writeback",
86 .cr_mask = 0,
87 .pmd = PMD_SECT_WB,
88 .pte = L_PTE_MT_WRITEBACK,
89 }, {
90 .policy = "writealloc",
91 .cr_mask = 0,
92 .pmd = PMD_SECT_WBWA,
93 .pte = L_PTE_MT_WRITEALLOC,
94 }
95 };
根据323行的定义,使用了最后一项即 89~94行,所以build_mem_type_table中 415行的:
vecs_pgprot = kern_pgprot = user_pgprot = cp->pte的值为:L_PTE_MT_ WRITEALLOC.
ecc_mask: 未知,貌似是用来校验的。
cp->pmd:PMD_SECT_WBWA
529~533行,设置了对应的domain
PMD_DOMAIN(t->domain):
24 #define PMD_DOMAIN(x) (_AT(pmdval_t, (x)) << 5)
关于Domain,Linux Kernel 对Domain的index定义了如下的宏:
arch/arm/include/asm/domain.h
31 #ifndef CONFIG_IO_36
32 #define DOMAIN_KERNEL 0
33 #define DOMAIN_TABLE 0
34 #define DOMAIN_USER 1
35 #define DOMAIN_IO 2
36 #else
37 #define DOMAIN_KERNEL 2
38 #define DOMAIN_TABLE 2
39 #define DOMAIN_USER 1
40 #define DOMAIN_IO 0
41 #endif
42
31 #ifndef CONFIG_IO_36
32 #define DOMAIN_KERNEL 0
33 #define DOMAIN_TABLE 0
34 #define DOMAIN_USER 1
35 #define DOMAIN_IO 2
36 #else
37 #define DOMAIN_KERNEL 2
38 #define DOMAIN_TABLE 2
39 #define DOMAIN_USER 1
40 #define DOMAIN_IO 0
41 #endif
42
所以,这个宏定义的x=DOMAIN_KERNEL=2
根据build_mem_type_table的修正,假设在ARM V7的处理器前提下,MT_MEMORY的属性为:
252 [MT_MEMORY] = {
253 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY| L_PTE_MT_ WRITEALLOC | L_PTE_SHARED,
254 .prot_l1 = PMD_TYPE_TABLE | PMD_DOMAIN(t->domain),
255 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE| PMD_SECT_ WBWA | ecc_mask|0<<5|PMD_SECT_S,
256 .domain = DOMAIN_KERNEL,
257 },
以上是Linux Kernel作为软件,把PTE的属性和L1以及SECT的属性都已经设置完成了。
下面,看一下ARM V7提供的硬件机制:
本文只关注前两排,后面的Supersection不关注。
把上面的MT_MEMORY的属性中的section部分的值单独拿出来:
PMD_TYPE_SECT | PMD_SECT_AP_WRITE| PMD_SECT_ WBWA | ecc_mask|0<<5,
PMD_TYPE_SECT:2<<0
PMD_SECT_AP_WRITE:1<<10
PMD_SECT_WBWA:1<<3 | 1<<2 | 1<<12
0~19对应的二进制:
0 0 0 1 0 001 01 0 0000 0 1 1 10
即:
nG S AP[2] Tex[2:0] AP[1:0] Domain C B
0 1 0 01 01 0 1 1
AP 001 Privileged access only。即MT_MEMORY的权限为:进程在Kernel Mode下可以任意读写,在User Mode下是不可访问的。
另外,除了访问权限的属性外,还需要提供Shareable, Bufferable, Cacheable的信息。他们的信息由PTE中的TEX位域以及ARM硬件的PRRR和NMRR寄存器提供。
另外,除了访问权限的属性外,还需要提供Shareable, Bufferable, Cacheable的信息。他们的信息由PTE中的TEX位域以及ARM硬件的PRRR和NMRR寄存器提供。
PRRR的全称是:(Prime Region Remap Register);
NMRR的全称是:(Normal Memory Remap Register)
对于ARMV7,Linux Kernel 将其值设置为:
NMRR的全称是:(Normal Memory Remap Register)
对于ARMV7,Linux Kernel 将其值设置为:
arch/arm/mm/proc-v7-2level.S
PRRR: 0xff0a81a8
NMRR: 0x40e040e0
分析一下PRRR:
PRRR,可以分为4个部分,各个部分共同决定shareabl e的状态。
0~15位:每两位作为一个unit,说明类型是Normal Memory 还是 Device Memory Strongly-ordered.
16 17位:如果是Device Memory当S=0或者1时,对应的区域是shareable 还是non-shareable
18 19位:如果是Normal Memory 当S=0或者1时,对应的区域是shareable还是non- shareable
20~23 :SBZ
24~31位:每一位都是指 Outer Shareable 还是Inner Shareable。当是1,则为Outer Shareable。
在Linux中 PRRR的24~31位都是0b1,说明都是inner Shareable.
若某块cache是inner shareable的,那么该cache所在的inner domain的observers对该cache的访问是transparent或者coherent。 这里transparent的意思是:observers可以对该cache随便access,而不会发生数据不一致的情况。如果某块cache不是inner shareable,则一定是outer shareable(虽然ARM还定义了non-shareable,但软件一般不利用这个概念).
对照上文Tex[0]:1 C:1 B:1 决定n为7(其映射表实际上也是一句二进制转化为十进制来决定的 n的值0~8)
所以,此段Memory地址的类型由[14:15]位决定。PR RR中对应位的值是0b10,所以为Normal Memory;又由于其S=1的,所以看第[19]位,19位是 1,说明是Shareable的;又[31]位是1,说明是In ner Shareable。
因此,Section默认的cache状态:Inner Shareable, Normal Memory
下面分析一下NMRR:
NMRR分为两部分0~15 16~31.
16~31: Outer Cacheable property
15~0:Inner Cacheable property
每两位作一个unit,其值代表的意义
00 Region is Non-cacheable
01 Region is Write-Back, WriteAllocate
10 Region is WriteThrough, Non-WriteAllocate
11 Region is Write-Back, Non-WriteAllocate.
在Linux中,其默认的值为0x40e040e0,前16位和 后16位是一样的。
对于Section PGD的值,其默认的Tex[0]:1 C:1 B:1,因此:n=7。
所以outer cacheable的property 看31:31位,inner cacheable的property 看15:14位。
其值都为0b01,所以outer cacheable 和 Inner cacheable都是WBWA.
综上,对于section的映射,其属性 outer and inner cache 都是WBWA,而且inner cache 是shareable的。
下面看一下PTE的属性
Pte的属性设置为:
.prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY| L_PTE_MT_ WRITEALLOC | L_PTE_SHARED,
再参考(真正做的操作是黑色粗体部分):
76 ENTRY(cpu_v7_set_pte_ext)
77 #ifdef CONFIG_MMU
78 str r1, [r0] @ linux version
79
80 bic r3, r1, #0x000003f0
81 bic r3, r3, #PTE_TYPE_MASK
82 orr r3, r3, r2
83 orr r3, r3, #PTE_EXT_AP0 | 2
84
85 tst r1, #1 << 4
86 orrne r3, r3, #PTE_EXT_TEX(1)
87
88 eor r1, r1, #L_PTE_DIRTY
89 tst r1, #L_PTE_RDONLY | L_PTE_DIRTY
90 orrne r3, r3, #PTE_EXT_APX
91
92 tst r1, #L_PTE_USER
93 orrne r3, r3, #PTE_EXT_AP1
94 #ifdef CONFIG_CPU_USE_DOMAINS
95 @ allow kernel read/write access to read-only user pages
96 tstne r3, #PTE_EXT_APX
97 bicne r3, r3, #PTE_EXT_APX | PTE_EXT_AP0
98 #endif
99
100 tst r1, #L_PTE_XN
101 orrne r3, r3, #PTE_EXT_XN
102
103 tst r1, #L_PTE_YOUNG
104 tstne r1, #L_PTE_PRESENT
105 moveq r3, #0
注意一点:
88~90行:如果Linux版本的pte没有被设置为dirty(实际上,可以当作write),APX位一定会被置位的。这样AP[2:1:0]的值为101,所带来的结果是,Privileged read-only. user permissions No access.
此时,若Linux版本的Pte被设置为L_PTE_USER,则AP[2:1:0]的值为:111,其效应是:Privileged and User read-only(这个只对ARM V7有效。在V6中是reserved的)。若88~90行的APX没有被置位,则AP[2:1:0]的值为:011,其效应是:Full access。
所以AP[2...0]的设置情况是:
1.AP[0]一定置1.
2.若L_PTE_USER设置了,则AP[1]被设置。
3.若L_PTE_DIRTY不设置,则AP[2]必为1,反之,若设置了L_PTE_RDONLY,则AP[1]也会置1.若同时设置了L_PTE_DIRTY和L_PTE_RDONLY,AP[2]也会置1(只是这样的话,貌似互相矛盾了,不过,在设置的时候,还是设置了RDONLY的)
AP对应的权限表为:
AP[2..0]
|
Privileged
|
User
|
Notes
|
000
|
No Access
|
No Access
|
|
001
|
RW
|
No Access
|
|
010
|
RW
|
R
|
|
011
|
RW
|
RW
|
|
100
|
-
|
-
|
Reserved
|
101
|
R
|
No Access
|
|
110
|
R
|
R
|
Deprecated
|
111
|
R
|
R
|
最终:
nG S AP[2] TEX[2:0] | AP[1:0] | C | B| 1 | XN
0 1 0 001 01 1 1 1 0
AP[2:1:0]: 001 Privileged access only
Cache的属性:n=7,所以,查PRRR和NMRR:
其属性 outer and inner cache 都是WBWA,而且inner cache 是shareable的(和Section相同)
注1:添加于2013.09.13
评论
发表评论