]>
Commit | Line | Data |
---|---|---|
de42fe11 JL |
1 | ========================================== |
2 | ARM CPUs capacity bindings | |
3 | ========================================== | |
4 | ||
5 | ========================================== | |
6 | 1 - Introduction | |
7 | ========================================== | |
8 | ||
9 | ARM systems may be configured to have cpus with different power/performance | |
10 | characteristics within the same chip. In this case, additional information has | |
11 | to be made available to the kernel for it to be aware of such differences and | |
12 | take decisions accordingly. | |
13 | ||
14 | ========================================== | |
15 | 2 - CPU capacity definition | |
16 | ========================================== | |
17 | ||
18 | CPU capacity is a number that provides the scheduler information about CPUs | |
19 | heterogeneity. Such heterogeneity can come from micro-architectural differences | |
20 | (e.g., ARM big.LITTLE systems) or maximum frequency at which CPUs can run | |
21 | (e.g., SMP systems with multiple frequency domains). Heterogeneity in this | |
22 | context is about differing performance characteristics; this binding tries to | |
23 | capture a first-order approximation of the relative performance of CPUs. | |
24 | ||
25 | CPU capacities are obtained by running a suitable benchmark. This binding makes | |
26 | no guarantees on the validity or suitability of any particular benchmark, the | |
27 | final capacity should, however, be: | |
28 | ||
29 | * A "single-threaded" or CPU affine benchmark | |
30 | * Divided by the running frequency of the CPU executing the benchmark | |
31 | * Not subject to dynamic frequency scaling of the CPU | |
32 | ||
33 | For the time being we however advise usage of the Dhrystone benchmark. What | |
34 | above thus becomes: | |
35 | ||
36 | CPU capacities are obtained by running the Dhrystone benchmark on each CPU at | |
37 | max frequency (with caches enabled). The obtained DMIPS score is then divided | |
38 | by the frequency (in MHz) at which the benchmark has been run, so that | |
39 | DMIPS/MHz are obtained. Such values are then normalized w.r.t. the highest | |
40 | score obtained in the system. | |
41 | ||
42 | ========================================== | |
43 | 3 - capacity-dmips-mhz | |
44 | ========================================== | |
45 | ||
46 | capacity-dmips-mhz is an optional cpu node [1] property: u32 value | |
47 | representing CPU capacity expressed in normalized DMIPS/MHz. At boot time, the | |
48 | maximum frequency available to the cpu is then used to calculate the capacity | |
49 | value internally used by the kernel. | |
50 | ||
51 | capacity-dmips-mhz property is all-or-nothing: if it is specified for a cpu | |
52 | node, it has to be specified for every other cpu nodes, or the system will | |
53 | fall back to the default capacity value for every CPU. If cpufreq is not | |
54 | available, final capacities are calculated by directly using capacity-dmips- | |
55 | mhz values (normalized w.r.t. the highest value found while parsing the DT). | |
56 | ||
57 | =========================================== | |
58 | 4 - Examples | |
59 | =========================================== | |
60 | ||
61 | Example 1 (ARM 64-bit, 6-cpu system, two clusters): | |
204c881e VK |
62 | The capacities-dmips-mhz or DMIPS/MHz values (scaled to 1024) |
63 | are 1024 and 578 for cluster0 and cluster1. Further normalization | |
64 | is done by the operating system based on cluster0@max-freq=1100 and | |
65 | custer1@max-freq=850, final capacities are 1024 for cluster0 and | |
66 | 446 for cluster1 (576*850/1100). | |
de42fe11 JL |
67 | |
68 | cpus { | |
69 | #address-cells = <2>; | |
70 | #size-cells = <0>; | |
71 | ||
72 | cpu-map { | |
73 | cluster0 { | |
74 | core0 { | |
75 | cpu = <&A57_0>; | |
76 | }; | |
77 | core1 { | |
78 | cpu = <&A57_1>; | |
79 | }; | |
80 | }; | |
81 | ||
82 | cluster1 { | |
83 | core0 { | |
84 | cpu = <&A53_0>; | |
85 | }; | |
86 | core1 { | |
87 | cpu = <&A53_1>; | |
88 | }; | |
89 | core2 { | |
90 | cpu = <&A53_2>; | |
91 | }; | |
92 | core3 { | |
93 | cpu = <&A53_3>; | |
94 | }; | |
95 | }; | |
96 | }; | |
97 | ||
98 | idle-states { | |
e9880240 | 99 | entry-method = "psci"; |
de42fe11 JL |
100 | |
101 | CPU_SLEEP_0: cpu-sleep-0 { | |
102 | compatible = "arm,idle-state"; | |
103 | arm,psci-suspend-param = <0x0010000>; | |
104 | local-timer-stop; | |
105 | entry-latency-us = <100>; | |
106 | exit-latency-us = <250>; | |
107 | min-residency-us = <150>; | |
108 | }; | |
109 | ||
110 | CLUSTER_SLEEP_0: cluster-sleep-0 { | |
111 | compatible = "arm,idle-state"; | |
112 | arm,psci-suspend-param = <0x1010000>; | |
113 | local-timer-stop; | |
114 | entry-latency-us = <800>; | |
115 | exit-latency-us = <700>; | |
116 | min-residency-us = <2500>; | |
117 | }; | |
118 | }; | |
119 | ||
120 | A57_0: cpu@0 { | |
121 | compatible = "arm,cortex-a57","arm,armv8"; | |
122 | reg = <0x0 0x0>; | |
123 | device_type = "cpu"; | |
124 | enable-method = "psci"; | |
125 | next-level-cache = <&A57_L2>; | |
126 | clocks = <&scpi_dvfs 0>; | |
127 | cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; | |
128 | capacity-dmips-mhz = <1024>; | |
129 | }; | |
130 | ||
131 | A57_1: cpu@1 { | |
132 | compatible = "arm,cortex-a57","arm,armv8"; | |
133 | reg = <0x0 0x1>; | |
134 | device_type = "cpu"; | |
135 | enable-method = "psci"; | |
136 | next-level-cache = <&A57_L2>; | |
137 | clocks = <&scpi_dvfs 0>; | |
138 | cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; | |
139 | capacity-dmips-mhz = <1024>; | |
140 | }; | |
141 | ||
142 | A53_0: cpu@100 { | |
143 | compatible = "arm,cortex-a53","arm,armv8"; | |
144 | reg = <0x0 0x100>; | |
145 | device_type = "cpu"; | |
146 | enable-method = "psci"; | |
147 | next-level-cache = <&A53_L2>; | |
148 | clocks = <&scpi_dvfs 1>; | |
149 | cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; | |
150 | capacity-dmips-mhz = <578>; | |
151 | }; | |
152 | ||
153 | A53_1: cpu@101 { | |
154 | compatible = "arm,cortex-a53","arm,armv8"; | |
155 | reg = <0x0 0x101>; | |
156 | device_type = "cpu"; | |
157 | enable-method = "psci"; | |
158 | next-level-cache = <&A53_L2>; | |
159 | clocks = <&scpi_dvfs 1>; | |
160 | cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; | |
161 | capacity-dmips-mhz = <578>; | |
162 | }; | |
163 | ||
164 | A53_2: cpu@102 { | |
165 | compatible = "arm,cortex-a53","arm,armv8"; | |
166 | reg = <0x0 0x102>; | |
167 | device_type = "cpu"; | |
168 | enable-method = "psci"; | |
169 | next-level-cache = <&A53_L2>; | |
170 | clocks = <&scpi_dvfs 1>; | |
171 | cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; | |
172 | capacity-dmips-mhz = <578>; | |
173 | }; | |
174 | ||
175 | A53_3: cpu@103 { | |
176 | compatible = "arm,cortex-a53","arm,armv8"; | |
177 | reg = <0x0 0x103>; | |
178 | device_type = "cpu"; | |
179 | enable-method = "psci"; | |
180 | next-level-cache = <&A53_L2>; | |
181 | clocks = <&scpi_dvfs 1>; | |
182 | cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; | |
183 | capacity-dmips-mhz = <578>; | |
184 | }; | |
185 | ||
186 | A57_L2: l2-cache0 { | |
187 | compatible = "cache"; | |
188 | }; | |
189 | ||
190 | A53_L2: l2-cache1 { | |
191 | compatible = "cache"; | |
192 | }; | |
193 | }; | |
194 | ||
195 | Example 2 (ARM 32-bit, 4-cpu system, two clusters, | |
196 | cpus 0,1@1GHz, cpus 2,3@500MHz): | |
197 | capacities-dmips-mhz are scaled w.r.t. 2 (cpu@0 and cpu@1), this means that first | |
198 | cpu@0 and cpu@1 are twice fast than cpu@2 and cpu@3 (at the same frequency) | |
199 | ||
200 | cpus { | |
201 | #address-cells = <1>; | |
202 | #size-cells = <0>; | |
203 | ||
204 | cpu0: cpu@0 { | |
205 | device_type = "cpu"; | |
206 | compatible = "arm,cortex-a15"; | |
207 | reg = <0>; | |
208 | capacity-dmips-mhz = <2>; | |
209 | }; | |
210 | ||
211 | cpu1: cpu@1 { | |
212 | device_type = "cpu"; | |
213 | compatible = "arm,cortex-a15"; | |
214 | reg = <1>; | |
215 | capacity-dmips-mhz = <2>; | |
216 | }; | |
217 | ||
218 | cpu2: cpu@2 { | |
219 | device_type = "cpu"; | |
220 | compatible = "arm,cortex-a15"; | |
221 | reg = <0x100>; | |
222 | capacity-dmips-mhz = <1>; | |
223 | }; | |
224 | ||
225 | cpu3: cpu@3 { | |
226 | device_type = "cpu"; | |
227 | compatible = "arm,cortex-a15"; | |
228 | reg = <0x101>; | |
229 | capacity-dmips-mhz = <1>; | |
230 | }; | |
231 | }; | |
232 | ||
233 | =========================================== | |
234 | 5 - References | |
235 | =========================================== | |
236 | ||
237 | [1] ARM Linux Kernel documentation - CPUs bindings | |
8217724e | 238 | Documentation/devicetree/bindings/arm/cpus.yaml |