ARM64架构设备树初始化流程详解

ARM64架构设备树初始化流程

初始阶段

arm64架构的head.S中,有这样一句,其中的x21寄存器存放的是FDT的物理地址,从uboot/ueft+grub等bootloader里面传过来的

1
str_l	x21, __fdt_pointer, x5		// Save FDT pointer

这里的__fdt_pointer定义在arch/arm64/kernel/setup.c中,__initdata表明这个变量链接时放在初始化数据段

1
2
#define __initdata	__section(.init.data)
phys_addr_t __fdt_pointer __initdata;

此时FDT的物理地址就保存在了__fdt_pointer里面。

由物理地址到虚拟地址

来到setup_arch中,首先相关的第一个函数是setup_machine_fdt,它传入刚才的__fdt_pointer

1
2
3
4
5
void __init setup_arch(char **cmdline_p) {
//......
setup_machine_fdt(__fdt_pointer);
//......
}

其中,这里的setup_machine_fdt主要是为了将物理地址映射为内核可读的虚拟地址,因为此时已经开了MMU,物理地址不可读了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
static void __init setup_machine_fdt(phys_addr_t dt_phys)
{
int size;
//这里将物理地址映射到虚拟地址上,创建固定映射,fdt的大小会保存在size中
void *dt_virt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL);

//如果成功申请到了,就将这块地址保留下来
if (dt_virt)
memblock_reserve(dt_phys, size);

//如果没有成功申请到,或者设备树早期扫描失败,直接宕机
if (!dt_virt || !early_init_dt_scan(dt_virt)) {
pr_crit("\n"
"Error: invalid device tree blob at physical address %pa (virtual address 0x%p)\n"
"The dtb must be 8-byte aligned and must not exceed 2 MB in size\n"
"\nPlease check your bootloader.",
&dt_phys, dt_virt);

while (true)
cpu_relax();
}

/* Early fixups are done, map the FDT as read-only now */
//重新将这段内存映射为只读的
fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO);
//功能无关,无需关注
dump_stack_set_arch_desc("%s (DT)", of_flat_dt_get_machine_name());
}

首先看一下设备树早期扫描函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
int fdt_check_header(const void *fdt)
{
if (fdt_magic(fdt) == FDT_MAGIC) {
/* Complete tree */
if (fdt_version(fdt) < FDT_FIRST_SUPPORTED_VERSION)
return -FDT_ERR_BADVERSION;
if (fdt_last_comp_version(fdt) > FDT_LAST_SUPPORTED_VERSION)
return -FDT_ERR_BADVERSION;
} else if (fdt_magic(fdt) == FDT_SW_MAGIC) {
/* Unfinished sequential-write blob */
if (fdt_size_dt_struct(fdt) == 0)
return -FDT_ERR_BADSTATE;
} else {
return -FDT_ERR_BADMAGIC;
}

return 0;
}

bool __init early_init_dt_verify(void *params)
{
if (!params)
return false;

/* check device tree validity */
//设备树的有效性检测,魔数之类的
if (fdt_check_header(params))
return false;

/* Setup flat device-tree pointer */
initial_boot_params = params;
//crc32的校验
of_fdt_crc32 = crc32_be(~0, initial_boot_params,
fdt_totalsize(initial_boot_params));
return true;
}

bool __init early_init_dt_scan(void *params)
{
bool status;

status = early_init_dt_verify(params);
//由上面可以看到,基本上是fdt格式不对,或者crc校验不过,这里false返回
if (!status)
return false;
//否者就先扫描设备树的节点
early_init_dt_scan_nodes();
return true;
}

早期设备树节点的扫描,这一步主要是获得设备树的cells,别名,可选属性,内存信息等

1
2
3
4
5
6
7
8
9
10
11
void __init early_init_dt_scan_nodes(void)
{
/* Retrieve various information from the /chosen node */
of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line);

/* Initialize {size,address}-cells info */
of_scan_flat_dt(early_init_dt_scan_root, NULL);

/* Setup memory, calling early_init_dt_add_memory_arch */
of_scan_flat_dt(early_init_dt_scan_memory, NULL);
}

之后会来到解压设备树的部分,如果没有开启acpi,那么默认开始将dtb展开,完成将设备树转换为device_node的过程。

1
2
3
4
5
6
void __init setup_arch(char **cmdline_p) {
//......
if (acpi_disabled)
unflatten_device_tree();
//......
}

那么,解压设备树时需要传入几个参数,这些参数分别定义在哪里呢?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
/**
* unflatten_device_tree - create tree of device_nodes from flat blob
*
* unflattens the device-tree passed by the firmware, creating the
* tree of struct device_node. It also fills the "name" and "type"
* pointers of the nodes so the normal device-tree walking functions
* can be used.
*/
void __init unflatten_device_tree(void)
{
__unflatten_device_tree(initial_boot_params, NULL, &of_root,
early_init_dt_alloc_memory_arch, false);

/* Get pointer to "/chosen" and "/aliases" nodes for use everywhere */
of_alias_scan(early_init_dt_alloc_memory_arch);
}

这里的initial_boot_param定义在drivers/of/fdt.c中,并且在early_init_dt_verify中被赋予了fdt的虚拟地址值

1
2
3
4
5
6
7
8
void *initial_boot_params;
bool __init early_init_dt_verify(void *params)
{
//.....
/* Setup flat device-tree pointer */
initial_boot_params = params;
//.....
}

of_root则是系统中的根节点

1
struct device_node *of_root;

接下来是__unflatten_device_tree,这个函数用来实际创建device_node节点

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
/**
* __unflatten_device_tree - create tree of device_nodes from flat blob
*
* unflattens a device-tree, creating the
* tree of struct device_node. It also fills the "name" and "type"
* pointers of the nodes so the normal device-tree walking functions
* can be used.
* @blob: The blob to expand
* @dad: Parent device node
* @mynodes: The device_node tree created by the call
* @dt_alloc: An allocator that provides a virtual address to memory
* for the resulting tree
*
* Returns NULL on failure or the memory chunk containing the unflattened
* device tree on success.
*/
static void *__unflatten_device_tree(const void *blob,
struct device_node *dad,
struct device_node **mynodes,
void *(*dt_alloc)(u64 size, u64 align),
bool detached)
{
int size;
void *mem;

pr_debug(" -> unflatten_device_tree()\n");

//blob是fdt的虚拟地址
if (!blob) {
pr_debug("No device tree pointer\n");
return NULL;
}

pr_debug("Unflattening device tree:\n");
pr_debug("magic: %08x\n", fdt_magic(blob));
pr_debug("size: %08x\n", fdt_totalsize(blob));
pr_debug("version: %08x\n", fdt_version(blob));

//fdt头的合法性检查
if (fdt_check_header(blob)) {
pr_err("Invalid device tree blob header\n");
return NULL;
}

/* First pass, scan for size */
//第一次调用,只获得其展开为node后的大小,而不实际展开
size = unflatten_dt_nodes(blob, NULL, dad, NULL);
if (size < 0)
return NULL;

//4字节对齐
size = ALIGN(size, 4);
pr_debug(" size is %d, allocating...\n", size);

/* Allocate memory for the expanded device tree */
//为node分配内存空间,注意调用的是传入的回调函数,并且size多了四个字节,用来放魔数
mem = dt_alloc(size + 4, __alignof__(struct device_node));
if (!mem)
return NULL;

memset(mem, 0, size);

//在这块内存最后放魔数
*(__be32 *)(mem + size) = cpu_to_be32(0xdeadbeef);

pr_debug(" unflattening %p...\n", mem);

/* Second pass, do actual unflattening */
//实际的展开dtb到node
unflatten_dt_nodes(blob, mem, dad, mynodes);
//超过边界了!!!
if (be32_to_cpup(mem + size) != 0xdeadbeef)
pr_warning("End of tree marker overwritten: %08x\n",
be32_to_cpup(mem + size));
//如果设置了detached并且mynodes存在,将这个设备树设置为detached状态
if (detached && mynodes) {
of_node_set_flag(*mynodes, OF_DETACHED);
pr_debug("unflattened tree is detached\n");
}

pr_debug(" <- unflatten_device_tree()\n");
return mem;
}

可以见到,核心函数是unflatten_dt_nodes,注意,在第一次执行该函数时,只获得其展开后的内存区域大小,这次调用,mem=NULL,dad=NULL,nodepp=of_root

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
/**
* unflatten_dt_nodes - Alloc and populate a device_node from the flat tree
* @blob: The parent device tree blob
* @mem: Memory chunk to use for allocating device nodes and properties
* @dad: Parent struct device_node
* @nodepp: The device_node tree created by the call
*
* It returns the size of unflattened device tree or error code
*/
static int unflatten_dt_nodes(const void *blob,
void *mem,
struct device_node *dad,
struct device_node **nodepp)
{
struct device_node *root;
int offset = 0, depth = 0, initial_depth = 0;
//节点最大嵌套深度
#define FDT_MAX_DEPTH 64
unsigned int fpsizes[FDT_MAX_DEPTH];
struct device_node *nps[FDT_MAX_DEPTH];
void *base = mem;
//这里的dryrun只有在没有传入mem时为真,此时只返回设备树展开后的大小,而不是真的展开设备树
bool dryrun = !base;

if (nodepp)
*nodepp = NULL;

/*
* We're unflattening device sub-tree if @dad is valid. There are
* possibly multiple nodes in the first level of depth. We need
* set @depth to 1 to make fdt_next_node() happy as it bails
* immediately when negative @depth is found. Otherwise, the device
* nodes except the first one won't be unflattened successfully.
*/
//如果dad存在,那么是展开子树,将下面这两个变量赋初值
if (dad)
depth = initial_depth = 1;

//循环的初始条件,这里的fpsizes存放节点名长度,nps存放这个节点指针
root = dad;
fpsizes[depth] = dad ? strlen(of_node_full_name(dad)) : 0;
nps[depth] = dad;

//这个循环创建所有的node
for (offset = 0;
offset >= 0 && depth >= initial_depth;
offset = fdt_next_node(blob, offset, &depth)) {
if (WARN_ON_ONCE(depth >= FDT_MAX_DEPTH))
continue;

fpsizes[depth+1] = populate_node(blob, offset, &mem,
nps[depth],
fpsizes[depth],
&nps[depth+1], dryrun);
if (!fpsizes[depth+1])
return mem - base;

if (!dryrun && nodepp && !*nodepp)
*nodepp = nps[depth+1];
if (!dryrun && !root)
root = nps[depth+1];
}

if (offset < 0 && offset != -FDT_ERR_NOTFOUND) {
pr_err("Error %d processing FDT\n", offset);
return -EINVAL;
}

/*
* Reverse the child list. Some drivers assumes node order matches .dts
* node order
*/
if (!dryrun)
reverse_nodes(root);

return mem - base;
}

核心函数是populate_node

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
static unsigned int populate_node(const void *blob, //设备树地址
int offset, //本node的偏移
void **mem, //解压的node存放的地址
struct device_node *dad, //父节点
unsigned int fpsize, //节点名长度
struct device_node **pnp, //生成的node
bool dryrun) //是否进行预检
{
struct device_node *np;
const char *pathp;
unsigned int l, allocl;
int new_format = 0;

pathp = fdt_get_name(blob, offset, &l);
if (!pathp) {
*pnp = NULL;
return 0;
}

allocl = ++l;

/* version 0x10 has a more compact unit name here instead of the full
* path. we accumulate the full path size using "fpsize", we'll rebuild
* it later. We detect this because the first character of the name is
* not '/'.
*/
if ((*pathp) != '/') {
new_format = 1;
if (fpsize == 0) {
/* root node: special case. fpsize accounts for path
* plus terminating zero. root node only has '/', so
* fpsize should be 2, but we want to avoid the first
* level nodes to have two '/' so we use fpsize 1 here
*/
fpsize = 1;
allocl = 2;
l = 1;
pathp = "";
} else {
/* account for '/' and path size minus terminal 0
* already in 'l'
*/
fpsize += l;
allocl = fpsize;
}
}

np = unflatten_dt_alloc(mem, sizeof(struct device_node) + allocl,
__alignof__(struct device_node));
if (!dryrun) {
char *fn;
of_node_init(np);
np->full_name = fn = ((char *)np) + sizeof(*np);
if (new_format) {
/* rebuild full path for new format */
if (dad && dad->parent) {
strcpy(fn, dad->full_name);
#ifdef DEBUG
if ((strlen(fn) + l + 1) != allocl) {
pr_debug("%s: p: %d, l: %d, a: %d\n",
pathp, (int)strlen(fn),
l, allocl);
}
#endif
fn += strlen(fn);
}
*(fn++) = '/';
}
memcpy(fn, pathp, l);

if (dad != NULL) {
np->parent = dad;
np->sibling = dad->child;
dad->child = np;
}
}

populate_properties(blob, offset, mem, np, pathp, dryrun);
if (!dryrun) {
np->name = of_get_property(np, "name", NULL);
np->type = of_get_property(np, "device_type", NULL);

if (!np->name)
np->name = "<NULL>";
if (!np->type)
np->type = "<NULL>";
}

*pnp = np;
return fpsize;
}

到这里,dtb的所有二进制都被展开成了节点device_node,但是还没有进行扫描,也就是说,这些节点还没有变成能被内核识别的设备

下面这个就是关键函数of_platform_default_populate_init

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#ifndef CONFIG_PPC
static int __init of_platform_default_populate_init(void)
{
struct device_node *node;

if (!of_have_populated_dt())
return -ENODEV;

/*
* Handle ramoops explicitly, since it is inside /reserved-memory,
* which lacks a "compatible" property.
*/
node = of_find_node_by_path("/reserved-memory");
if (node) {
node = of_find_compatible_node(node, NULL, "ramoops");
if (node)
of_platform_device_create(node, NULL, NULL);
}

/* Populate everything else. */
of_platform_default_populate(NULL, NULL, NULL);

return 0;
}
arch_initcall_sync(of_platform_default_populate_init);
#endif

可以看到,除了PPC(Power PC)架构,其他架构都需要调用这个函数,而且是通过arch_initcall_sync来默认执行,该latecall的级别是3,而驱动的级别大多都是6,也就是说,内核常常是先注册设备,然后注册驱动,在注册驱动的时候进行probe,将已经挂载的设备进行激活


ARM64架构设备树初始化流程详解
https://yill-z.github.io/2025/01/06/ARM64架构设备树初始化流程详解/
作者
Yill Zhang
发布于
2025年1月6日
许可协议