void __init paging_init() - 커널 페이지 초기화 하기

[분석 기준]

kernel version : linux kernel 4.16

architecture : aarch64 (arm64)

커널용 페이지 테이블 초기화 하는 함수입니다. 전체적인 내용을 분석 후 각 주요 함수에 대해 아래에 연속적으로 분석해 보도록 하겠습니다.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
void __init paging_init(void)
{
    phys_addr_t pgd_phys = early_pgtable_alloc();
    pgd_t *pgdp = pgd_set_fixmap(pgd_phys);
 
    map_kernel(pgdp);
    map_mem(pgdp);
 
    /*
     * We want to reuse the original swapper_pg_dir so we don't have to  
     * communicate the new address to non-coherent secondaries in
     * secondary_entry, and so cpu_switch_mm can generate the address with
     * adrp+add rather than a load from some global variable.
     *
     * To do this we need to go via a temporary pgd.
     */
    cpu_replace_ttbr1(__va(pgd_phys));
    memcpy(swapper_pg_dir, pgdp, PGD_SIZE);
    cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
 
    pgd_clear_fixmap();
    memblock_free(pgd_phys, PAGE_SIZE);
    /*
    * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd  
    * allocated with it.
     */
    memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE,
              __pa_symbol(swapper_pg_end) - __pa_symbol(swapper_pg_dir)
              - PAGE_SIZE);
}
Colored by Color Scripter
cs

line 3

페이지 테이블 용도로 사용할 하나의 페이지를 할당하여 0으로 초기화 한 후 물리 주소를 반환하는 함수입니다. 페이지에 매핑된 영역은 다시 매핑이 해제되고 할당 해제됩니다.

line 7

memblock에 등록된 각 메모리 영역을 pgd에 매핑합니다. 이 때 매핑되는 영역은 커널 과 메모리영역입니다. 매핑 시 임시로 할당받은 fixmap 영역의 pgd페이지에 매핑하게 됩니다.

line 17

커널용 페이지 테이블이 준비되었으므로 TTBR1이 커널용 페이지 테이블을 가리키도록 합니다.

line 18

준비한 커널 페이지 테이블을 swapper_pg_dir로 PGD_SIZE 만큼 복사합니다.

line 19

원래 커널 페이지 테이블은 swapper_pg_dir을 TTBR1 레지스터에 설정해줍니다.

line 21

fixmap의 pgd 페이지를 언매핑하고 임시로 할당한 커널용 pgd 테이블을 해제합니다. fixmap 영역을 모두 해제하는 것이 아닌 pgd영역을 clear해줍니다.

line 22

임시로 할당받아 사용하던 하나의 page을 free해줍니다. swapper_pd_dir에 이미 복사되었기에 해제합니다.

line 27

커널빌드 시 swapper_pg_dir는 한 페이지가 아닌 페이지 테이블 단계 수 만큼 만들어지므로 추가로 만들어진 swapper_pg_dir의 사용하지 않는 부분은 해제해줍니다.

ealry_pgtable_allc() 함수는 페이지 테이블 용도로 사용할 싱글 페이지를 할당하여 0으로 초기화 한 후 물리 주소를 반환하는 함수입니다.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
static phys_addr_t __init early_pgtable_alloc(void)
{
    phys_addr_t phys;
    void *ptr;
    phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
    /*
     * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
     * slot will be free, so we can (ab)use the FIX_PTE slot to initialise
     * any level of table.
     */
    ptr = pte_set_fixmap(phys);
    memset(ptr, 0, PAGE_SIZE);
    /*
     * Implicit barriers also ensure the zeroed page is visible to the page
     * table walker
     */
    pte_clear_fixmap();
    return phys;
}
 
Colored by Color Scripter
cs

line 5

커널 페이지 테이블로 사용할 하나의 페이지를 memblock으로 부터 할당받는다.

line 11

memblock으루부터 할당받은 페이지 물리주소를 전달하여 fixmap의 FIX_PTE주소에 매핑한 후 해당페이지를 0으로 클리어하고 다시 매핑을 해제합니다. FIX_PTE는 커널 페이지 테이블에 대한 수정이 필요할 때 런타임 중에 사용하는 fixmap 영역입니다.

line 12

매핑된 영역을 0으로 모두 초기화 합니다.

map_kernel() 함수에서는 커널의 코드 및 데이터 영역과 메모리 영역을 매핑합니다. 매핑하는 영역은 fixmap영역의 pgd 페이지에 매핑하고 수정하는 과정을 거칩니다.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
static void __init map_kernel(pgd_t *pgdp)
{
    static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext,
                vmlinux_initdata, vmlinux_data;
    /*
     * External debuggers may need to write directly to the text
     * mapping to install SW breakpoints. Allow this (only) when
     * explicitly requested with rodata=off.
     */
    pgprot_t text_prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
    /*
     * Only rodata will be remapped with different permissions later on,
     * all other segments are allowed to use contiguous mappings.
     */
    map_kernel_segment(pgdp, _text, _etext, text_prot, &vmlinux_text, 0,
               VM_NO_GUARD);
    map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL,
               &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD);
    map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot,
               &vmlinux_inittext, 0, VM_NO_GUARD);
    map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL,
               &vmlinux_initdata, 0, VM_NO_GUARD);
    map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
    if (!READ_ONCE(pgd_val(*pgd_offset_raw(pgdp, FIXADDR_START)))) {
        /*
         * The fixmap falls in a separate pgd to the kernel, and doesn't
         * live in the carveout for the swapper_pg_dir. We can simply
         * re-use the existing dir for the fixmap.
         */
        set_pgd(pgd_offset_raw(pgdp, FIXADDR_START),
            READ_ONCE(*pgd_offset_k(FIXADDR_START)));
    } else if (CONFIG_PGTABLE_LEVELS > 3) {
        /*
         * The fixmap shares its top level pgd entry with the kernel
         * mapping. This can really only occur when we are running
         * with 16k/4 levels, so we can simply reuse the pud level
         * entry instead.
         */
        BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
        pud_populate(&init_mm,
                 pud_set_fixmap_offset(pgdp, FIXADDR_START),
                 lm_alias(bm_pmd));
        pud_clear_fixmap();
    } else {
        BUG();
    }
    kasan_copy_shadow(pgdp);
}
Colored by Color Scripter
cs

line 15 ~ 23

커널 이미지의 각 세그먼트를 커널 페이지 타입으로 매핑합니다.

line 24

FIXMAP 영역을 pgdp가 가리키는 ptage table 영역에 설정합니다.

아래 코드 분석 시 CONFIG_KEXEC_CORE 는 분석하지 않겠습니다.

map_mem() 함수는 memblock에 등록된 각 메모리 영역을 pgd에 매핑하는 함수입니다.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
static void __init map_mem(pgd_t *pgdp)
{
    phys_addr_t kernel_start = __pa_symbol(_text);
    phys_addr_t kernel_end = __pa_symbol(__init_begin);
    struct memblock_region *reg;
    int flags = 0;
 
    if (debug_pagealloc_enabled())
        flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
    /*
     * Take care not to create a writable alias for the
     * read-only text and rodata sections of the kernel image.
     * So temporarily mark them as NOMAP to skip mappings in
     * the following for-loop
     */
    memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
#ifdef CONFIG_KEXEC_CORE
    if (crashk_res.end)
        memblock_mark_nomap(crashk_res.start,
                    resource_size(&crashk_res));
#endif
 
    /* map all the memory banks */
    for_each_memblock(memory, reg) {
        phys_addr_t start = reg->base;
        phys_addr_t end = start + reg->size;
 
        if (start >= end)
            break;
        if (memblock_is_nomap(reg))
            continue;
 
        __map_memblock(pgdp, start, end, PAGE_KERNEL, flags);
    }
 
    /*
     * Map the linear alias of the [_text, __init_begin) interval
     * as non-executable now, and remove the write permission in
     * mark_linear_text_alias_ro() below (which will be called after
     * alternative patching has completed). This makes the contents
     * of the region accessible to subsystems such as hibernate,
     * but protects it from inadvertent modification or execution.
     * Note that contiguous mappings cannot be remapped in this way,
     * so we should avoid them here.
     */
    __map_memblock(pgdp, kernel_start, kernel_end,
               PAGE_KERNEL, NO_CONT_MAPPINGS);
    memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
 
#ifdef CONFIG_KEXEC_CORE
    /*
     * Use page-level mappings here so that we can shrink the region
     * in page granularity and put back unused memory to buddy system
     * through /sys/kernel/kexec_crash_size interface.
     */
    if (crashk_res.end) {
        __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1,
                   PAGE_KERNEL,
                   NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
        memblock_clear_nomap(crashk_res.start,
                     resource_size(&crashk_res));
    }
#endif
}
Colored by Color Scripter
cs

line 17

kernel 영역을 nmap으로 설정합니다. 그 이유는 그 아래 foreach 문에서 nomap 영역을 skip하기 위함입니다.

line 25 ~ 35

memblock의 memory 타입 영역을 pgd에 매핑하기 위함입니다. 전체 memblock memory 타입의 region을 확인하며 하나씩 매핑하게 됩니다.

line 47

kernel 영역을 pgd 에 매핑하는데 NO_CONT_MAPPINGS 플래그로 설정함으로써 연속적이지 않도록 pte 영역에 매핑하도록 합니다.

cpu_replace_ttbr1함수는 안전하게 TTBR1 레지스터가 커널 페이지 테이블을 가리키도록 하기위한 함수입니다.

상세한 사유는 적용된 패치를 살펴보는게 더 빠를것 같습니다. (http://lists.infradead.org/pipermail/linux-arm-kernel/2015-December/392305.html)

TLB conflic 문제를 해결하기 위해 idmap을 사용해서 TTBR0에서 페이지 테이블을 교체해 주는 작업을 해주고 있습니다. idmap_cpu_replace_ttbr1 함수는 idmap에서만 사용할 수 있으므로 TTBR0를 idmap 페이지 테이블로 교체한 후 TTBR0 에서 함수를 호출하도록 합니다. 그 이후 idmap을 해제하여 TTBR0를 원상복구 시켜줍니다. TTBR0는 여기서 오직 idmap을 위해서 사용되어 집니다.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
static inline void cpu_replace_ttbr1(pgd_t *pgdp)
{
    typedef void (ttbr_replace_func)(phys_addr_t);
    extern ttbr_replace_func idmap_cpu_replace_ttbr1;
    ttbr_replace_func *replace_phys;
 
    phys_addr_t pgd_phys = virt_to_phys(pgdp);
 
    replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
 
    cpu_install_idmap();
    replace_phys(pgd_phys);
    cpu_uninstall_idmap();
}
Colored by Color Scripter
cs

line 9

idmap_cpu_replace_ttbr1은 idmapping 상태에서만 사용가능하기에 물리주소를 가져온다.

line 11

ttbr0에 idmap page table을 매핑합니다.

line 12

idmap_cpu_replace_ttbr1 함수 호출하여 ttbr1에 인자로 전달받은 pgd를 전달합니다.

line 13

ttbr0에 매핑했던 idmap을 해제합니다.

[참고자료]

http://jake.dothome.co.kr/

도서 : 코드로 알아보는 ARM 리눅스 커널

저작자표시

'Linux > Kernel Analysis' 카테고리의 다른 글

메모리 모델 (FLATMEM, DISCONTIGMEM, SPARSEMEM) (0)	2018.10.06
bootmem_init() 부트 메모리 초기화 (0)	2018.10.06
[커널분석] arm64_memblock_init() (1)	2018.09.15
[ARMv8] aarch64 프로세서 상태 레지스터(PSTATE) (0)	2018.09.08
[커널분석] parse_early_param() (0)	2018.09.08

내 블로그 - 관리자 홈 전환	`Q` `Q`
새 글 쓰기	`W` `W`

글 수정 (권한 있는 경우)	`E` `E`
댓글 영역으로 이동	`C` `C`

이 페이지의 URL 복사	`S` `S`
맨 위로 이동	`T` `T`
티스토리 홈 이동	`H` `H`
단축키 안내	`Shift` + `/` `⇧` + `/`

void __init paging_init() - 커널 페이지 초기화 하기

'Linux > Kernel Analysis' 카테고리의 다른 글

티스토리툴바

개인정보

단축키

내 블로그

블로그 게시글

모든 영역