]>
Commit | Line | Data |
---|---|---|
69c1280b SM |
1 | // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB |
2 | // Copyright (c) 2018 Mellanox Technologies | |
3 | ||
4 | #include <linux/mlx5/driver.h> | |
2c891560 | 5 | |
69c1280b SM |
6 | #include "mlx5_core.h" |
7 | #include "lib/eq.h" | |
8 | #include "lib/mlx5.h" | |
9 | ||
2c891560 SM |
10 | struct mlx5_event_nb { |
11 | struct mlx5_nb nb; | |
12 | void *ctx; | |
13 | }; | |
14 | ||
15 | /* General events handlers for the low level mlx5_core driver | |
16 | * | |
17 | * Other Major feature specific events such as | |
18 | * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with | |
19 | * separate notifiers callbacks, specifically by those mlx5 components. | |
20 | */ | |
21 | static int any_notifier(struct notifier_block *, unsigned long, void *); | |
2c891560 SM |
22 | static int temp_warn(struct notifier_block *, unsigned long, void *); |
23 | static int port_module(struct notifier_block *, unsigned long, void *); | |
24 | ||
7a179555 SM |
25 | /* handler which forwards the event to events->nh, driver notifiers */ |
26 | static int forward_event(struct notifier_block *, unsigned long, void *); | |
27 | ||
2c891560 | 28 | static struct mlx5_nb events_nbs_ref[] = { |
b8267cd7 | 29 | /* Events to be proccessed by mlx5_core */ |
2c891560 | 30 | {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY }, |
2c891560 SM |
31 | {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT }, |
32 | {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT }, | |
7a179555 SM |
33 | |
34 | /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */ | |
35 | {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE }, | |
cb6191bf | 36 | {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT }, |
451be51c SM |
37 | /* QP/WQ resource events to forward */ |
38 | {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_DCT_DRAINED }, | |
39 | {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG }, | |
40 | {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_COMM_EST }, | |
41 | {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SQ_DRAINED }, | |
42 | {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE }, | |
43 | {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR }, | |
44 | {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED }, | |
45 | {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR }, | |
46 | {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR }, | |
4e2df04a SM |
47 | /* SRQ events */ |
48 | {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR }, | |
49 | {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT }, | |
2c891560 SM |
50 | }; |
51 | ||
69c1280b | 52 | struct mlx5_events { |
69c1280b | 53 | struct mlx5_core_dev *dev; |
2c891560 | 54 | struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)]; |
20902be4 SM |
55 | /* driver notifier chain */ |
56 | struct atomic_notifier_head nh; | |
2c891560 | 57 | /* port module events stats */ |
69c1280b SM |
58 | struct mlx5_pme_stats pme_stats; |
59 | }; | |
60 | ||
61 | static const char *eqe_type_str(u8 type) | |
62 | { | |
63 | switch (type) { | |
64 | case MLX5_EVENT_TYPE_COMP: | |
65 | return "MLX5_EVENT_TYPE_COMP"; | |
66 | case MLX5_EVENT_TYPE_PATH_MIG: | |
67 | return "MLX5_EVENT_TYPE_PATH_MIG"; | |
68 | case MLX5_EVENT_TYPE_COMM_EST: | |
69 | return "MLX5_EVENT_TYPE_COMM_EST"; | |
70 | case MLX5_EVENT_TYPE_SQ_DRAINED: | |
71 | return "MLX5_EVENT_TYPE_SQ_DRAINED"; | |
72 | case MLX5_EVENT_TYPE_SRQ_LAST_WQE: | |
73 | return "MLX5_EVENT_TYPE_SRQ_LAST_WQE"; | |
74 | case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: | |
75 | return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT"; | |
76 | case MLX5_EVENT_TYPE_CQ_ERROR: | |
77 | return "MLX5_EVENT_TYPE_CQ_ERROR"; | |
78 | case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: | |
79 | return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR"; | |
80 | case MLX5_EVENT_TYPE_PATH_MIG_FAILED: | |
81 | return "MLX5_EVENT_TYPE_PATH_MIG_FAILED"; | |
82 | case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: | |
83 | return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR"; | |
84 | case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: | |
85 | return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR"; | |
86 | case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: | |
87 | return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR"; | |
88 | case MLX5_EVENT_TYPE_INTERNAL_ERROR: | |
89 | return "MLX5_EVENT_TYPE_INTERNAL_ERROR"; | |
90 | case MLX5_EVENT_TYPE_PORT_CHANGE: | |
91 | return "MLX5_EVENT_TYPE_PORT_CHANGE"; | |
92 | case MLX5_EVENT_TYPE_GPIO_EVENT: | |
93 | return "MLX5_EVENT_TYPE_GPIO_EVENT"; | |
94 | case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: | |
95 | return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT"; | |
96 | case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: | |
97 | return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT"; | |
98 | case MLX5_EVENT_TYPE_REMOTE_CONFIG: | |
99 | return "MLX5_EVENT_TYPE_REMOTE_CONFIG"; | |
100 | case MLX5_EVENT_TYPE_DB_BF_CONGESTION: | |
101 | return "MLX5_EVENT_TYPE_DB_BF_CONGESTION"; | |
102 | case MLX5_EVENT_TYPE_STALL_EVENT: | |
103 | return "MLX5_EVENT_TYPE_STALL_EVENT"; | |
104 | case MLX5_EVENT_TYPE_CMD: | |
105 | return "MLX5_EVENT_TYPE_CMD"; | |
106 | case MLX5_EVENT_TYPE_PAGE_REQUEST: | |
107 | return "MLX5_EVENT_TYPE_PAGE_REQUEST"; | |
108 | case MLX5_EVENT_TYPE_PAGE_FAULT: | |
109 | return "MLX5_EVENT_TYPE_PAGE_FAULT"; | |
110 | case MLX5_EVENT_TYPE_PPS_EVENT: | |
111 | return "MLX5_EVENT_TYPE_PPS_EVENT"; | |
112 | case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: | |
113 | return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE"; | |
114 | case MLX5_EVENT_TYPE_FPGA_ERROR: | |
115 | return "MLX5_EVENT_TYPE_FPGA_ERROR"; | |
116 | case MLX5_EVENT_TYPE_FPGA_QP_ERROR: | |
117 | return "MLX5_EVENT_TYPE_FPGA_QP_ERROR"; | |
118 | case MLX5_EVENT_TYPE_GENERAL_EVENT: | |
119 | return "MLX5_EVENT_TYPE_GENERAL_EVENT"; | |
fd4572b3 ED |
120 | case MLX5_EVENT_TYPE_MONITOR_COUNTER: |
121 | return "MLX5_EVENT_TYPE_MONITOR_COUNTER"; | |
69c1280b SM |
122 | case MLX5_EVENT_TYPE_DEVICE_TRACER: |
123 | return "MLX5_EVENT_TYPE_DEVICE_TRACER"; | |
124 | default: | |
125 | return "Unrecognized event"; | |
126 | } | |
127 | } | |
128 | ||
2c891560 SM |
129 | /* handles all FW events, type == eqe->type */ |
130 | static int any_notifier(struct notifier_block *nb, | |
131 | unsigned long type, void *data) | |
132 | { | |
133 | struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); | |
134 | struct mlx5_events *events = event_nb->ctx; | |
135 | struct mlx5_eqe *eqe = data; | |
136 | ||
137 | mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n", | |
138 | eqe_type_str(eqe->type), eqe->sub_type); | |
139 | return NOTIFY_OK; | |
140 | } | |
141 | ||
2c891560 SM |
142 | /* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */ |
143 | static int temp_warn(struct notifier_block *nb, unsigned long type, void *data) | |
144 | { | |
145 | struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); | |
146 | struct mlx5_events *events = event_nb->ctx; | |
147 | struct mlx5_eqe *eqe = data; | |
69c1280b SM |
148 | u64 value_lsb; |
149 | u64 value_msb; | |
150 | ||
151 | value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb); | |
152 | value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb); | |
153 | ||
2c891560 | 154 | mlx5_core_warn(events->dev, |
69c1280b SM |
155 | "High temperature on sensors with bit set %llx %llx", |
156 | value_msb, value_lsb); | |
2c891560 SM |
157 | |
158 | return NOTIFY_OK; | |
69c1280b SM |
159 | } |
160 | ||
2c891560 | 161 | /* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */ |
c2fb3db2 MG |
162 | static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status) |
163 | { | |
164 | switch (status) { | |
165 | case MLX5_MODULE_STATUS_PLUGGED: | |
166 | return "Cable plugged"; | |
167 | case MLX5_MODULE_STATUS_UNPLUGGED: | |
168 | return "Cable unplugged"; | |
169 | case MLX5_MODULE_STATUS_ERROR: | |
170 | return "Cable error"; | |
8d6b57e6 MG |
171 | case MLX5_MODULE_STATUS_DISABLED: |
172 | return "Cable disabled"; | |
c2fb3db2 MG |
173 | default: |
174 | return "Unknown status"; | |
175 | } | |
176 | } | |
69c1280b | 177 | |
c2fb3db2 MG |
178 | static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error) |
179 | { | |
180 | switch (error) { | |
181 | case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED: | |
182 | return "Power budget exceeded"; | |
183 | case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX: | |
184 | return "Long Range for non MLNX cable"; | |
185 | case MLX5_MODULE_EVENT_ERROR_BUS_STUCK: | |
186 | return "Bus stuck (I2C or data shorted)"; | |
187 | case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT: | |
188 | return "No EEPROM/retry timeout"; | |
189 | case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST: | |
190 | return "Enforce part number list"; | |
191 | case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER: | |
192 | return "Unknown identifier"; | |
193 | case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE: | |
194 | return "High Temperature"; | |
195 | case MLX5_MODULE_EVENT_ERROR_BAD_CABLE: | |
196 | return "Bad or shorted cable/module"; | |
37a12aae MG |
197 | case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED: |
198 | return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot"; | |
c2fb3db2 MG |
199 | default: |
200 | return "Unknown error"; | |
201 | } | |
202 | } | |
69c1280b | 203 | |
2c891560 SM |
204 | /* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */ |
205 | static int port_module(struct notifier_block *nb, unsigned long type, void *data) | |
69c1280b | 206 | { |
2c891560 SM |
207 | struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); |
208 | struct mlx5_events *events = event_nb->ctx; | |
209 | struct mlx5_eqe *eqe = data; | |
210 | ||
69c1280b SM |
211 | enum port_module_event_status_type module_status; |
212 | enum port_module_event_error_type error_type; | |
213 | struct mlx5_eqe_port_module *module_event_eqe; | |
5400261e | 214 | const char *status_str; |
69c1280b SM |
215 | u8 module_num; |
216 | ||
217 | module_event_eqe = &eqe->data.port_module; | |
69c1280b SM |
218 | module_status = module_event_eqe->module_status & |
219 | PORT_MODULE_EVENT_MODULE_STATUS_MASK; | |
220 | error_type = module_event_eqe->error_type & | |
221 | PORT_MODULE_EVENT_ERROR_TYPE_MASK; | |
c2fb3db2 MG |
222 | |
223 | if (module_status < MLX5_MODULE_STATUS_NUM) | |
224 | events->pme_stats.status_counters[module_status]++; | |
c2fb3db2 | 225 | |
5400261e | 226 | if (module_status == MLX5_MODULE_STATUS_ERROR) |
c2fb3db2 MG |
227 | if (error_type < MLX5_MODULE_EVENT_ERROR_NUM) |
228 | events->pme_stats.error_counters[error_type]++; | |
69c1280b SM |
229 | |
230 | if (!printk_ratelimit()) | |
2c891560 | 231 | return NOTIFY_OK; |
69c1280b | 232 | |
5400261e TT |
233 | module_num = module_event_eqe->module; |
234 | status_str = mlx5_pme_status_to_string(module_status); | |
235 | if (module_status == MLX5_MODULE_STATUS_ERROR) { | |
236 | const char *error_str = mlx5_pme_error_to_string(error_type); | |
237 | ||
c2fb3db2 MG |
238 | mlx5_core_err(events->dev, |
239 | "Port module event[error]: module %u, %s, %s\n", | |
240 | module_num, status_str, error_str); | |
5400261e | 241 | } else { |
2c891560 | 242 | mlx5_core_info(events->dev, |
69c1280b | 243 | "Port module event: module %u, %s\n", |
c2fb3db2 | 244 | module_num, status_str); |
5400261e | 245 | } |
2c891560 SM |
246 | |
247 | return NOTIFY_OK; | |
69c1280b SM |
248 | } |
249 | ||
250 | void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats) | |
251 | { | |
252 | *stats = dev->priv.events->pme_stats; | |
253 | } | |
254 | ||
7a179555 SM |
255 | /* forward event as is to registered interfaces (mlx5e/mlx5_ib) */ |
256 | static int forward_event(struct notifier_block *nb, unsigned long event, void *data) | |
257 | { | |
258 | struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); | |
259 | struct mlx5_events *events = event_nb->ctx; | |
93631211 | 260 | struct mlx5_eqe *eqe = data; |
7a179555 | 261 | |
93631211 SM |
262 | mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n", |
263 | eqe_type_str(eqe->type), eqe->sub_type); | |
7a179555 SM |
264 | atomic_notifier_call_chain(&events->nh, event, data); |
265 | return NOTIFY_OK; | |
266 | } | |
267 | ||
69c1280b SM |
268 | int mlx5_events_init(struct mlx5_core_dev *dev) |
269 | { | |
270 | struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL); | |
271 | ||
272 | if (!events) | |
273 | return -ENOMEM; | |
274 | ||
20902be4 | 275 | ATOMIC_INIT_NOTIFIER_HEAD(&events->nh); |
69c1280b SM |
276 | events->dev = dev; |
277 | dev->priv.events = events; | |
278 | return 0; | |
279 | } | |
280 | ||
281 | void mlx5_events_cleanup(struct mlx5_core_dev *dev) | |
282 | { | |
283 | kvfree(dev->priv.events); | |
284 | } | |
285 | ||
286 | void mlx5_events_start(struct mlx5_core_dev *dev) | |
287 | { | |
288 | struct mlx5_events *events = dev->priv.events; | |
2c891560 | 289 | int i; |
69c1280b | 290 | |
2c891560 SM |
291 | for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) { |
292 | events->notifiers[i].nb = events_nbs_ref[i]; | |
293 | events->notifiers[i].ctx = events; | |
294 | mlx5_eq_notifier_register(dev, &events->notifiers[i].nb); | |
295 | } | |
69c1280b SM |
296 | } |
297 | ||
298 | void mlx5_events_stop(struct mlx5_core_dev *dev) | |
299 | { | |
300 | struct mlx5_events *events = dev->priv.events; | |
2c891560 | 301 | int i; |
69c1280b | 302 | |
2c891560 SM |
303 | for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--) |
304 | mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb); | |
69c1280b | 305 | } |
20902be4 SM |
306 | |
307 | int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb) | |
308 | { | |
309 | struct mlx5_events *events = dev->priv.events; | |
310 | ||
311 | return atomic_notifier_chain_register(&events->nh, nb); | |
312 | } | |
313 | EXPORT_SYMBOL(mlx5_notifier_register); | |
314 | ||
315 | int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb) | |
316 | { | |
317 | struct mlx5_events *events = dev->priv.events; | |
318 | ||
319 | return atomic_notifier_chain_unregister(&events->nh, nb); | |
320 | } | |
321 | EXPORT_SYMBOL(mlx5_notifier_unregister); | |
322 | ||
323 | int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data) | |
324 | { | |
325 | return atomic_notifier_call_chain(&events->nh, event, data); | |
326 | } |