From 79c0a2b7abc906c7cf3c793256c6b638d7dc477f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 27 Sep 2025 15:26:34 +0300 Subject: [PATCH 001/867] EDAC/versalnet: Fix off by one in handle_error() The priv->mci[] array has NUM_CONTROLLERS so this > comparison needs to be >= to prevent an out of bounds access. Fixes: d5fe2fec6c40 ("EDAC: Add a driver for the AMD Versal NET DDR controller") Signed-off-by: Dan Carpenter Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Yazen Ghannam --- drivers/edac/versalnet_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/versalnet_edac.c b/drivers/edac/versalnet_edac.c index 7c5db8bf0595b..1ded4c3f02133 100644 --- a/drivers/edac/versalnet_edac.c +++ b/drivers/edac/versalnet_edac.c @@ -433,7 +433,7 @@ static void handle_error(struct mc_priv *priv, struct ecc_status *stat, phys_addr_t pfn; int err; - if (WARN_ON_ONCE(ctl_num > NUM_CONTROLLERS)) + if (WARN_ON_ONCE(ctl_num >= NUM_CONTROLLERS)) return; mci = priv->mci[ctl_num]; From 3dacc900c00bad7275ee8c096dbcaab699d83e36 Mon Sep 17 00:00:00 2001 From: Kriish Sharma Date: Mon, 13 Oct 2025 01:43:19 +0000 Subject: [PATCH 002/867] hdlc_ppp: fix potential null pointer in ppp_cp_event logging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/net/wan/hdlc_ppp.c: In function ‘ppp_cp_event’: drivers/net/wan/hdlc_ppp.c:353:17: warning: ‘%s’ directive argument is null [-Wformat-overflow=] 353 | netdev_info(dev, "%s down\n", proto_name(pid)); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/net/wan/hdlc_ppp.c:342:17: warning: ‘%s’ directive argument is null [-Wformat-overflow=] 342 | netdev_info(dev, "%s up\n", proto_name(pid)); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Update proto_name() to return "LCP" by default instead of NULL. This change silences the compiler without changing existing behavior and removes the need for the local 'pname' variable in ppp_cp_event. Suggested-by: Krzysztof Hałasa Signed-off-by: Kriish Sharma Acked-by: Krzysztof Hałasa Link: https://patch.msgid.link/20251013014319.1608706-1-kriish.sharma2006@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/wan/hdlc_ppp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c index 7496a2e9a2820..159295c4bd6d8 100644 --- a/drivers/net/wan/hdlc_ppp.c +++ b/drivers/net/wan/hdlc_ppp.c @@ -126,14 +126,12 @@ static inline struct proto *get_proto(struct net_device *dev, u16 pid) static inline const char *proto_name(u16 pid) { switch (pid) { - case PID_LCP: - return "LCP"; case PID_IPCP: return "IPCP"; case PID_IPV6CP: return "IPV6CP"; default: - return NULL; + return "LCP"; } } From 53615ad26e9789bfcdf3a4dccbcecb15294ea024 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 13 Oct 2025 13:41:33 +0900 Subject: [PATCH 003/867] netmem: replace __netmem_clear_lsb() with netmem_to_nmdesc() Now that we have struct netmem_desc, it'd better access the pp fields via struct netmem_desc rather than struct net_iov. Introduce netmem_to_nmdesc() for safely converting netmem_ref to netmem_desc regardless of the type underneath e.i. netmem_desc, net_iov. While at it, remove __netmem_clear_lsb() and make netmem_to_nmdesc() used instead. Suggested-by: Pavel Begunkov Signed-off-by: Byungchul Park Reviewed-by: Mina Almasry Link: https://patch.msgid.link/20251013044133.69472-1-byungchul@sk.com Signed-off-by: Paolo Abeni --- include/net/netmem.h | 66 +++++++++++++++++++++--------------------- net/core/netmem_priv.h | 16 +++++----- 2 files changed, 41 insertions(+), 41 deletions(-) diff --git a/include/net/netmem.h b/include/net/netmem.h index f7dacc9e75fd1..651e2c62d1dde 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -247,6 +247,23 @@ static inline unsigned long netmem_pfn_trace(netmem_ref netmem) return page_to_pfn(netmem_to_page(netmem)); } +/* XXX: How to extract netmem_desc from page must be changed, once + * netmem_desc no longer overlays on page and will be allocated through + * slab. + */ +#define __pp_page_to_nmdesc(p) (_Generic((p), \ + const struct page * : (const struct netmem_desc *)(p), \ + struct page * : (struct netmem_desc *)(p))) + +/* CAUTION: Check if the page is a pp page before calling this helper or + * know it's a pp page. + */ +#define pp_page_to_nmdesc(p) \ +({ \ + DEBUG_NET_WARN_ON_ONCE(!page_pool_page_is_pp(p)); \ + __pp_page_to_nmdesc(p); \ +}) + /** * __netmem_to_nmdesc - unsafely get pointer to the &netmem_desc backing * @netmem @@ -265,42 +282,25 @@ static inline struct netmem_desc *__netmem_to_nmdesc(netmem_ref netmem) return (__force struct netmem_desc *)netmem; } -/* __netmem_clear_lsb - convert netmem_ref to struct net_iov * for access to - * common fields. - * @netmem: netmem reference to extract as net_iov. - * - * All the sub types of netmem_ref (page, net_iov) have the same pp, pp_magic, - * dma_addr, and pp_ref_count fields at the same offsets. Thus, we can access - * these fields without a type check to make sure that the underlying mem is - * net_iov or page. +/* netmem_to_nmdesc - convert netmem_ref to struct netmem_desc * for + * access to common fields. + * @netmem: netmem reference to get netmem_desc. * - * The resulting value of this function can only be used to access the fields - * that are NET_IOV_ASSERT_OFFSET'd. Accessing any other fields will result in - * undefined behavior. + * All the sub types of netmem_ref (netmem_desc, net_iov) have the same + * pp, pp_magic, dma_addr, and pp_ref_count fields via netmem_desc. * - * Return: the netmem_ref cast to net_iov* regardless of its underlying type. + * Return: the pointer to struct netmem_desc * regardless of its + * underlying type. */ -static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem) +static inline struct netmem_desc *netmem_to_nmdesc(netmem_ref netmem) { - return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV); -} + void *p = (void *)((__force unsigned long)netmem & ~NET_IOV); -/* XXX: How to extract netmem_desc from page must be changed, once - * netmem_desc no longer overlays on page and will be allocated through - * slab. - */ -#define __pp_page_to_nmdesc(p) (_Generic((p), \ - const struct page * : (const struct netmem_desc *)(p), \ - struct page * : (struct netmem_desc *)(p))) + if (netmem_is_net_iov(netmem)) + return &((struct net_iov *)p)->desc; -/* CAUTION: Check if the page is a pp page before calling this helper or - * know it's a pp page. - */ -#define pp_page_to_nmdesc(p) \ -({ \ - DEBUG_NET_WARN_ON_ONCE(!page_pool_page_is_pp(p)); \ - __pp_page_to_nmdesc(p); \ -}) + return __pp_page_to_nmdesc((struct page *)p); +} /** * __netmem_get_pp - unsafely get pointer to the &page_pool backing @netmem @@ -320,12 +320,12 @@ static inline struct page_pool *__netmem_get_pp(netmem_ref netmem) static inline struct page_pool *netmem_get_pp(netmem_ref netmem) { - return __netmem_clear_lsb(netmem)->pp; + return netmem_to_nmdesc(netmem)->pp; } static inline atomic_long_t *netmem_get_pp_ref_count_ref(netmem_ref netmem) { - return &__netmem_clear_lsb(netmem)->pp_ref_count; + return &netmem_to_nmdesc(netmem)->pp_ref_count; } static inline bool netmem_is_pref_nid(netmem_ref netmem, int pref_nid) @@ -390,7 +390,7 @@ static inline bool netmem_is_pfmemalloc(netmem_ref netmem) static inline unsigned long netmem_get_dma_addr(netmem_ref netmem) { - return __netmem_clear_lsb(netmem)->dma_addr; + return netmem_to_nmdesc(netmem)->dma_addr; } void get_netmem(netmem_ref netmem); diff --git a/net/core/netmem_priv.h b/net/core/netmem_priv.h index cd95394399b40..23175cb2bd866 100644 --- a/net/core/netmem_priv.h +++ b/net/core/netmem_priv.h @@ -5,19 +5,19 @@ static inline unsigned long netmem_get_pp_magic(netmem_ref netmem) { - return __netmem_clear_lsb(netmem)->pp_magic & ~PP_DMA_INDEX_MASK; + return netmem_to_nmdesc(netmem)->pp_magic & ~PP_DMA_INDEX_MASK; } static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic) { - __netmem_clear_lsb(netmem)->pp_magic |= pp_magic; + netmem_to_nmdesc(netmem)->pp_magic |= pp_magic; } static inline void netmem_clear_pp_magic(netmem_ref netmem) { - WARN_ON_ONCE(__netmem_clear_lsb(netmem)->pp_magic & PP_DMA_INDEX_MASK); + WARN_ON_ONCE(netmem_to_nmdesc(netmem)->pp_magic & PP_DMA_INDEX_MASK); - __netmem_clear_lsb(netmem)->pp_magic = 0; + netmem_to_nmdesc(netmem)->pp_magic = 0; } static inline bool netmem_is_pp(netmem_ref netmem) @@ -27,13 +27,13 @@ static inline bool netmem_is_pp(netmem_ref netmem) static inline void netmem_set_pp(netmem_ref netmem, struct page_pool *pool) { - __netmem_clear_lsb(netmem)->pp = pool; + netmem_to_nmdesc(netmem)->pp = pool; } static inline void netmem_set_dma_addr(netmem_ref netmem, unsigned long dma_addr) { - __netmem_clear_lsb(netmem)->dma_addr = dma_addr; + netmem_to_nmdesc(netmem)->dma_addr = dma_addr; } static inline unsigned long netmem_get_dma_index(netmem_ref netmem) @@ -43,7 +43,7 @@ static inline unsigned long netmem_get_dma_index(netmem_ref netmem) if (WARN_ON_ONCE(netmem_is_net_iov(netmem))) return 0; - magic = __netmem_clear_lsb(netmem)->pp_magic; + magic = netmem_to_nmdesc(netmem)->pp_magic; return (magic & PP_DMA_INDEX_MASK) >> PP_DMA_INDEX_SHIFT; } @@ -57,6 +57,6 @@ static inline void netmem_set_dma_index(netmem_ref netmem, return; magic = netmem_get_pp_magic(netmem) | (id << PP_DMA_INDEX_SHIFT); - __netmem_clear_lsb(netmem)->pp_magic = magic; + netmem_to_nmdesc(netmem)->pp_magic = magic; } #endif From 16a2206354d169bfd13552ad577e07ce66e439ab Mon Sep 17 00:00:00 2001 From: Jan Vaclav Date: Thu, 9 Oct 2025 23:09:08 +0200 Subject: [PATCH 004/867] net/hsr: add protocol version to fill_info output Currently, it is possible to configure IFLA_HSR_VERSION, but there is no way to check in userspace what the currently configured HSR protocol version is. Add it to the output of hsr_fill_info(), when the interface is using the HSR protocol. Let's not expose it when using the PRP protocol, since it only has one version and it's not possible to set it from userspace. This info could then be used by e.g. ip(8), like so: $ ip -d link show hsr0 12: hsr0: mtu ... ... hsr slave1 veth0 slave2 veth1 ... proto 0 version 1 Reviewed-by: Fernando Fernandez Mancera Signed-off-by: Jan Vaclav Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251009210903.1055187-6-jvaclav@redhat.com Signed-off-by: Paolo Abeni --- net/hsr/hsr_netlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index b120470246cc5..4461adf696234 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -166,6 +166,8 @@ static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev) goto nla_put_failure; if (hsr->prot_version == PRP_V1) proto = HSR_PROTOCOL_PRP; + else if (nla_put_u8(skb, IFLA_HSR_VERSION, hsr->prot_version)) + goto nla_put_failure; if (nla_put_u8(skb, IFLA_HSR_PROTOCOL, proto)) goto nla_put_failure; From 512f0b7ebbc79d97d9485cd055902d439237e91f Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Tue, 14 Oct 2025 15:19:33 +0200 Subject: [PATCH 005/867] media: cx18: Fix invalid access to file * Sice commit 7b9eb53e8591 ("media: cx18: Access v4l2_fh from file") all ioctl handlers have been ported to operate on the file * first function argument. The cx18 DVB layer calls cx18_init_on_first_open() when the driver needs to start streaming. This function calls the s_input(), s_std() and s_frequency() ioctl handlers directly, but being called from the driver context, it doesn't have a valid file * to pass them. This causes the ioctl handlers to deference an invalid pointer. Fix this by moving the implementation of those ioctls to functions that take a cx18 pointer instead of a file pointer, and turn the V4L2 ioctl handlers into wrappers that get the cx18 from the file. When calling from cx18_init_on_first_open(), pass the cx18 pointer directly. This allows removing the fake fh in cx18_init_on_first_open(). The bug has been reported by Smatch: --> 1223 cx18_s_input(NULL, &fh, video_input); The patch adds a new dereference of "file" but some of the callers pass a NULL pointer. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/aKL4OMWsESUdX8KQ@stanley.mountain/ Fixes: 7b9eb53e8591 ("media: cx18: Access v4l2_fh from file") Cc: stable@vger.kernel.org Reviewed-by: Laurent Pinchart Tested-by: Hans Verkuil Signed-off-by: Jacopo Mondi Signed-off-by: Hans Verkuil --- drivers/media/pci/cx18/cx18-driver.c | 9 +++------ drivers/media/pci/cx18/cx18-ioctl.c | 30 ++++++++++++++++++---------- drivers/media/pci/cx18/cx18-ioctl.h | 8 +++++--- 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/drivers/media/pci/cx18/cx18-driver.c b/drivers/media/pci/cx18/cx18-driver.c index b62fd12c93c1a..74c59a94b2b05 100644 --- a/drivers/media/pci/cx18/cx18-driver.c +++ b/drivers/media/pci/cx18/cx18-driver.c @@ -1136,11 +1136,8 @@ int cx18_init_on_first_open(struct cx18 *cx) int video_input; int fw_retry_count = 3; struct v4l2_frequency vf; - struct cx18_open_id fh; v4l2_std_id std; - fh.cx = cx; - if (test_bit(CX18_F_I_FAILED, &cx->i_flags)) return -ENXIO; @@ -1220,14 +1217,14 @@ int cx18_init_on_first_open(struct cx18 *cx) video_input = cx->active_input; cx->active_input++; /* Force update of input */ - cx18_s_input(NULL, &fh, video_input); + cx18_do_s_input(cx, video_input); /* Let the VIDIOC_S_STD ioctl do all the work, keeps the code in one place. */ cx->std++; /* Force full standard initialization */ std = (cx->tuner_std == V4L2_STD_ALL) ? V4L2_STD_NTSC_M : cx->tuner_std; - cx18_s_std(NULL, &fh, std); - cx18_s_frequency(NULL, &fh, &vf); + cx18_do_s_std(cx, std); + cx18_do_s_frequency(cx, &vf); return 0; } diff --git a/drivers/media/pci/cx18/cx18-ioctl.c b/drivers/media/pci/cx18/cx18-ioctl.c index 0f3019739d03d..0d676a57e24ec 100644 --- a/drivers/media/pci/cx18/cx18-ioctl.c +++ b/drivers/media/pci/cx18/cx18-ioctl.c @@ -521,10 +521,8 @@ static int cx18_g_input(struct file *file, void *fh, unsigned int *i) return 0; } -int cx18_s_input(struct file *file, void *fh, unsigned int inp) +int cx18_do_s_input(struct cx18 *cx, unsigned int inp) { - struct cx18_open_id *id = file2id(file); - struct cx18 *cx = id->cx; v4l2_std_id std = V4L2_STD_ALL; const struct cx18_card_video_input *card_input = cx->card->video_inputs + inp; @@ -558,6 +556,11 @@ int cx18_s_input(struct file *file, void *fh, unsigned int inp) return 0; } +static int cx18_s_input(struct file *file, void *fh, unsigned int inp) +{ + return cx18_do_s_input(file2id(file)->cx, inp); +} + static int cx18_g_frequency(struct file *file, void *fh, struct v4l2_frequency *vf) { @@ -570,11 +573,8 @@ static int cx18_g_frequency(struct file *file, void *fh, return 0; } -int cx18_s_frequency(struct file *file, void *fh, const struct v4l2_frequency *vf) +int cx18_do_s_frequency(struct cx18 *cx, const struct v4l2_frequency *vf) { - struct cx18_open_id *id = file2id(file); - struct cx18 *cx = id->cx; - if (vf->tuner != 0) return -EINVAL; @@ -585,6 +585,12 @@ int cx18_s_frequency(struct file *file, void *fh, const struct v4l2_frequency *v return 0; } +static int cx18_s_frequency(struct file *file, void *fh, + const struct v4l2_frequency *vf) +{ + return cx18_do_s_frequency(file2id(file)->cx, vf); +} + static int cx18_g_std(struct file *file, void *fh, v4l2_std_id *std) { struct cx18 *cx = file2id(file)->cx; @@ -593,11 +599,8 @@ static int cx18_g_std(struct file *file, void *fh, v4l2_std_id *std) return 0; } -int cx18_s_std(struct file *file, void *fh, v4l2_std_id std) +int cx18_do_s_std(struct cx18 *cx, v4l2_std_id std) { - struct cx18_open_id *id = file2id(file); - struct cx18 *cx = id->cx; - if ((std & V4L2_STD_ALL) == 0) return -EINVAL; @@ -642,6 +645,11 @@ int cx18_s_std(struct file *file, void *fh, v4l2_std_id std) return 0; } +static int cx18_s_std(struct file *file, void *fh, v4l2_std_id std) +{ + return cx18_do_s_std(file2id(file)->cx, std); +} + static int cx18_s_tuner(struct file *file, void *fh, const struct v4l2_tuner *vt) { struct cx18_open_id *id = file2id(file); diff --git a/drivers/media/pci/cx18/cx18-ioctl.h b/drivers/media/pci/cx18/cx18-ioctl.h index 97cd9f99e22d9..42a8acd697354 100644 --- a/drivers/media/pci/cx18/cx18-ioctl.h +++ b/drivers/media/pci/cx18/cx18-ioctl.h @@ -12,6 +12,8 @@ u16 cx18_service2vbi(int type); void cx18_expand_service_set(struct v4l2_sliced_vbi_format *fmt, int is_pal); u16 cx18_get_service_set(struct v4l2_sliced_vbi_format *fmt); void cx18_set_funcs(struct video_device *vdev); -int cx18_s_std(struct file *file, void *fh, v4l2_std_id std); -int cx18_s_frequency(struct file *file, void *fh, const struct v4l2_frequency *vf); -int cx18_s_input(struct file *file, void *fh, unsigned int inp); + +struct cx18; +int cx18_do_s_std(struct cx18 *cx, v4l2_std_id std); +int cx18_do_s_frequency(struct cx18 *cx, const struct v4l2_frequency *vf); +int cx18_do_s_input(struct cx18 *cx, unsigned int inp); From c90fad3e4157f943b6d5842d3ceb20b32e566986 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Tue, 14 Oct 2025 15:19:34 +0200 Subject: [PATCH 006/867] media: ivtv: Fix invalid access to file * Since commit 9ba9d11544f9 ("media: ivtv: Access v4l2_fh from file") all ioctl handlers have been ported to operate on the file * first function argument. The ivtv DVB layer calls ivtv_init_on_first_open() when the driver needs to start streaming. This function calls the s_input() and s_frequency() ioctl handlers directly, but being called from the driver context, it doesn't have a valid file * to pass them. This causes the ioctl handlers to deference an invalid pointer. Fix this by moving the implementation of those ioctls to two helper functions. The ivtv_do_s_input() helper accepts a struct ivtv * as first argument, which is easily accessible in ivtv_init_on_first_open() as well as from the file * argument of the ioctl handler. The ivtv_s_frequency() takes an ivtv_stream * instead. The stream * can safely be accessed in ivtv_init_on_first_open() where it is hard-coded to the IVTV_ENC_STREAM_TYPE_MPG stream type, as well as from the ioctl handler as a valid stream type is associated to each open file handle depending on which video device node has been opened in the ivtv_open() file operation. The bug has been reported by Smatch. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/aKL4OMWsESUdX8KQ@stanley.mountain/ Fixes: 9ba9d11544f9 ("media: ivtv: Access v4l2_fh from file") Cc: stable@vger.kernel.org Reviewed-by: Laurent Pinchart Tested-by: Hans Verkuil Signed-off-by: Jacopo Mondi Signed-off-by: Hans Verkuil --- drivers/media/pci/ivtv/ivtv-driver.c | 11 ++++------- drivers/media/pci/ivtv/ivtv-ioctl.c | 22 +++++++++++++++++----- drivers/media/pci/ivtv/ivtv-ioctl.h | 6 ++++-- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/drivers/media/pci/ivtv/ivtv-driver.c b/drivers/media/pci/ivtv/ivtv-driver.c index 72a8f76a41f45..459eb6cc370cc 100644 --- a/drivers/media/pci/ivtv/ivtv-driver.c +++ b/drivers/media/pci/ivtv/ivtv-driver.c @@ -1247,15 +1247,12 @@ static int ivtv_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) int ivtv_init_on_first_open(struct ivtv *itv) { - struct v4l2_frequency vf; /* Needed to call ioctls later */ - struct ivtv_open_id fh; + struct ivtv_stream *s = &itv->streams[IVTV_ENC_STREAM_TYPE_MPG]; + struct v4l2_frequency vf; int fw_retry_count = 3; int video_input; - fh.itv = itv; - fh.type = IVTV_ENC_STREAM_TYPE_MPG; - if (test_bit(IVTV_F_I_FAILED, &itv->i_flags)) return -ENXIO; @@ -1297,13 +1294,13 @@ int ivtv_init_on_first_open(struct ivtv *itv) video_input = itv->active_input; itv->active_input++; /* Force update of input */ - ivtv_s_input(NULL, &fh, video_input); + ivtv_do_s_input(itv, video_input); /* Let the VIDIOC_S_STD ioctl do all the work, keeps the code in one place. */ itv->std++; /* Force full standard initialization */ itv->std_out = itv->std; - ivtv_s_frequency(NULL, &fh, &vf); + ivtv_do_s_frequency(s, &vf); if (itv->card->v4l2_capabilities & V4L2_CAP_VIDEO_OUTPUT) { /* Turn on the TV-out: ivtv_init_mpeg_decoder() initializes diff --git a/drivers/media/pci/ivtv/ivtv-ioctl.c b/drivers/media/pci/ivtv/ivtv-ioctl.c index 84c73bd22f2da..8d5ea3aec06f3 100644 --- a/drivers/media/pci/ivtv/ivtv-ioctl.c +++ b/drivers/media/pci/ivtv/ivtv-ioctl.c @@ -974,9 +974,8 @@ static int ivtv_g_input(struct file *file, void *fh, unsigned int *i) return 0; } -int ivtv_s_input(struct file *file, void *fh, unsigned int inp) +int ivtv_do_s_input(struct ivtv *itv, unsigned int inp) { - struct ivtv *itv = file2id(file)->itv; v4l2_std_id std; int i; @@ -1017,6 +1016,11 @@ int ivtv_s_input(struct file *file, void *fh, unsigned int inp) return 0; } +static int ivtv_s_input(struct file *file, void *fh, unsigned int inp) +{ + return ivtv_do_s_input(file2id(file)->itv, inp); +} + static int ivtv_g_output(struct file *file, void *fh, unsigned int *i) { struct ivtv *itv = file2id(file)->itv; @@ -1065,10 +1069,9 @@ static int ivtv_g_frequency(struct file *file, void *fh, struct v4l2_frequency * return 0; } -int ivtv_s_frequency(struct file *file, void *fh, const struct v4l2_frequency *vf) +int ivtv_do_s_frequency(struct ivtv_stream *s, const struct v4l2_frequency *vf) { - struct ivtv *itv = file2id(file)->itv; - struct ivtv_stream *s = &itv->streams[file2id(file)->type]; + struct ivtv *itv = s->itv; if (s->vdev.vfl_dir) return -ENOTTY; @@ -1082,6 +1085,15 @@ int ivtv_s_frequency(struct file *file, void *fh, const struct v4l2_frequency *v return 0; } +static int ivtv_s_frequency(struct file *file, void *fh, + const struct v4l2_frequency *vf) +{ + struct ivtv_open_id *id = file2id(file); + struct ivtv *itv = id->itv; + + return ivtv_do_s_frequency(&itv->streams[id->type], vf); +} + static int ivtv_g_std(struct file *file, void *fh, v4l2_std_id *std) { struct ivtv *itv = file2id(file)->itv; diff --git a/drivers/media/pci/ivtv/ivtv-ioctl.h b/drivers/media/pci/ivtv/ivtv-ioctl.h index 7f8c6f43d397f..96ca7e2ef973f 100644 --- a/drivers/media/pci/ivtv/ivtv-ioctl.h +++ b/drivers/media/pci/ivtv/ivtv-ioctl.h @@ -9,6 +9,8 @@ #ifndef IVTV_IOCTL_H #define IVTV_IOCTL_H +struct ivtv; + u16 ivtv_service2vbi(int type); void ivtv_expand_service_set(struct v4l2_sliced_vbi_format *fmt, int is_pal); u16 ivtv_get_service_set(struct v4l2_sliced_vbi_format *fmt); @@ -17,7 +19,7 @@ int ivtv_set_speed(struct ivtv *itv, int speed); void ivtv_set_funcs(struct video_device *vdev); void ivtv_s_std_enc(struct ivtv *itv, v4l2_std_id std); void ivtv_s_std_dec(struct ivtv *itv, v4l2_std_id std); -int ivtv_s_frequency(struct file *file, void *fh, const struct v4l2_frequency *vf); -int ivtv_s_input(struct file *file, void *fh, unsigned int inp); +int ivtv_do_s_frequency(struct ivtv_stream *s, const struct v4l2_frequency *vf); +int ivtv_do_s_input(struct ivtv *itv, unsigned int inp); #endif From 0513a3f97b96aaf41e91e02bc3b5d08c9dce5bfa Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Mon, 13 Oct 2025 03:01:16 -0700 Subject: [PATCH 007/867] net: bridge: correct debug message function name in br_fill_ifinfo The debug message in br_fill_ifinfo() incorrectly refers to br_fill_info instead of the actual function name. Update it for clarity in debugging output. Signed-off-by: Alok Tiwari Reviewed-by: Simon Horman Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20251013100121.755899-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- net/bridge/br_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 4e2d53b272210..0264730938f4b 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -467,7 +467,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, else br = netdev_priv(dev); - br_debug(br, "br_fill_info event %d port %s master %s\n", + br_debug(br, "br_fill_ifinfo event %d port %s master %s\n", event, dev->name, br->dev->name); nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); From e0aa115271394d68992e4a0369479e3632038c2a Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Mon, 13 Oct 2025 09:05:02 -0700 Subject: [PATCH 008/867] eth: fbnic: fix various typos in comments and strings Fix several minor typos and grammatical errors in comments and log (in fbnic firmware, PCI, and time modules) Changes include: - "cordeump" -> "coredump" - "of" -> "off" in RPC config comment - "healty" -> "healthy" in firmware heartbeat comment - "Firmware crashed detected!" -> "Firmware crash detected!" - "The could be caused" -> "This could be caused" - "lockng" -> "locking" in fbnic_time.c Signed-off-by: Alok Tiwari Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251013160507.768820-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 6 +++--- drivers/net/ethernet/meta/fbnic/fbnic_pci.c | 6 +++--- drivers/net/ethernet/meta/fbnic/fbnic_time.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index c87cb9ed09e7e..1166fa17438d9 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -878,11 +878,11 @@ fbnic_fw_parse_coredump_info_resp(void *opaque, struct fbnic_tlv_msg **results) * @fbd: FBNIC device structure * @cmpl_data: Completion struct to store coredump * @offset: Offset into coredump requested - * @length: Length of section of cordeump to fetch + * @length: Length of section of coredump to fetch * * Return: zero on success, negative errno on failure * - * Asks the firmware to provide a section of the cordeump back in a message. + * Asks the firmware to provide a section of the coredump back in a message. * The response will have an offset and size matching the values provided. */ int fbnic_fw_xmit_coredump_read_msg(struct fbnic_dev *fbd, @@ -1868,7 +1868,7 @@ int fbnic_fw_xmit_rpc_macda_sync(struct fbnic_dev *fbd) if (err) goto free_message; - /* Send message of to FW notifying it of current RPC config */ + /* Send message off to FW notifying it of current RPC config */ err = fbnic_mbx_map_tlv_msg(fbd, msg); if (err) goto free_message; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c index a7a6b4db8016f..4620f1847f2e0 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c @@ -185,7 +185,7 @@ static void fbnic_health_check(struct fbnic_dev *fbd) { struct fbnic_fw_mbx *tx_mbx = &fbd->mbx[FBNIC_IPC_MBX_TX_IDX]; - /* As long as the heart is beating the FW is healty */ + /* As long as the heart is beating the FW is healthy */ if (fbd->fw_heartbeat_enabled) return; @@ -196,7 +196,7 @@ static void fbnic_health_check(struct fbnic_dev *fbd) if (tx_mbx->head != tx_mbx->tail) return; - fbnic_devlink_fw_report(fbd, "Firmware crashed detected!"); + fbnic_devlink_fw_report(fbd, "Firmware crash detected!"); fbnic_devlink_otp_check(fbd, "error detected after firmware recovery"); if (fbnic_fw_config_after_crash(fbd)) @@ -378,7 +378,7 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * @pdev: PCI device information struct * * Called by the PCI subsystem to alert the driver that it should release - * a PCI device. The could be caused by a Hot-Plug event, or because the + * a PCI device. This could be caused by a Hot-Plug event, or because the * driver is going to be removed from memory. **/ static void fbnic_remove(struct pci_dev *pdev) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_time.c b/drivers/net/ethernet/meta/fbnic/fbnic_time.c index 39d99677b71ea..db7748189f457 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_time.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_time.c @@ -253,7 +253,7 @@ static void fbnic_ptp_reset(struct fbnic_dev *fbd) void fbnic_time_init(struct fbnic_net *fbn) { - /* This is not really a statistic, but the lockng primitive fits + /* This is not really a statistic, but the locking primitive fits * our usecase perfectly, we need an atomic 8 bytes READ_ONCE() / * WRITE_ONCE() behavior. */ From bdec4271e8081fed339759c45f1db81ea7f8f8ed Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 13 Oct 2025 17:28:34 +0200 Subject: [PATCH 009/867] net: dsa: b53: implement port isolation support Implement port isolation support via the Protected Ports register. Protected ports can only communicate with unprotected ports, but not with each other, matching the expected behaviour of isolated ports. Tested on BCM963268BU. Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20251013152834.100169-1-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 25 ++++++++++++++++++++++++- drivers/net/dsa/b53/b53_regs.h | 4 ++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 2f846381d5a76..ad4990da9f7cf 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -632,6 +632,25 @@ static void b53_port_set_learning(struct b53_device *dev, int port, b53_write16(dev, B53_CTRL_PAGE, B53_DIS_LEARNING, reg); } +static void b53_port_set_isolated(struct b53_device *dev, int port, + bool isolated) +{ + u8 offset; + u16 reg; + + if (is5325(dev)) + offset = B53_PROTECTED_PORT_SEL_25; + else + offset = B53_PROTECTED_PORT_SEL; + + b53_read16(dev, B53_CTRL_PAGE, offset, ®); + if (isolated) + reg |= BIT(port); + else + reg &= ~BIT(port); + b53_write16(dev, B53_CTRL_PAGE, offset, reg); +} + static void b53_eee_enable_set(struct dsa_switch *ds, int port, bool enable) { struct b53_device *dev = ds->priv; @@ -652,6 +671,7 @@ int b53_setup_port(struct dsa_switch *ds, int port) b53_port_set_ucast_flood(dev, port, true); b53_port_set_mcast_flood(dev, port, true); b53_port_set_learning(dev, port, false); + b53_port_set_isolated(dev, port, false); /* Force all traffic to go to the CPU port to prevent the ASIC from * trying to forward to bridged ports on matching FDB entries, then @@ -2318,7 +2338,7 @@ int b53_br_flags_pre(struct dsa_switch *ds, int port, struct netlink_ext_ack *extack) { struct b53_device *dev = ds->priv; - unsigned long mask = (BR_FLOOD | BR_MCAST_FLOOD); + unsigned long mask = (BR_FLOOD | BR_MCAST_FLOOD | BR_ISOLATED); if (!is5325(dev)) mask |= BR_LEARNING; @@ -2343,6 +2363,9 @@ int b53_br_flags(struct dsa_switch *ds, int port, if (flags.mask & BR_LEARNING) b53_port_set_learning(ds->priv, port, !!(flags.val & BR_LEARNING)); + if (flags.mask & BR_ISOLATED) + b53_port_set_isolated(ds->priv, port, + !!(flags.val & BR_ISOLATED)); return 0; } diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index 309fe0e46dadf..c16b3e3e82278 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -120,6 +120,10 @@ #define B53_SWITCH_CTRL 0x22 #define B53_MII_DUMB_FWDG_EN BIT(6) +/* Protected Port Selection (16 bit) */ +#define B53_PROTECTED_PORT_SEL 0x24 +#define B53_PROTECTED_PORT_SEL_25 0x26 + /* (16 bit) */ #define B53_UC_FLOOD_MASK 0x32 #define B53_MC_FLOOD_MASK 0x34 From 10c4b4f60f5d0dbd29fa819be76e888501c7b729 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 13 Oct 2025 22:50:27 +0200 Subject: [PATCH 010/867] net: mdio: use macro module_driver to avoid boilerplate code Use macro module_driver to avoid boilerplate code. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/e5c37417-4984-4b57-8154-264deef61e0d@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/mdio.h | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/include/linux/mdio.h b/include/linux/mdio.h index c640ba44dd6ee..42d6d47e445b7 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -689,16 +689,7 @@ struct phy_device *mdiobus_get_phy(struct mii_bus *bus, int addr); * init/exit. Each module may only use this macro once, and calling it * replaces module_init() and module_exit(). */ -#define mdio_module_driver(_mdio_driver) \ -static int __init mdio_module_init(void) \ -{ \ - return mdio_driver_register(&_mdio_driver); \ -} \ -module_init(mdio_module_init); \ -static void __exit mdio_module_exit(void) \ -{ \ - mdio_driver_unregister(&_mdio_driver); \ -} \ -module_exit(mdio_module_exit) +#define mdio_module_driver(_mdio_driver) \ + module_driver(_mdio_driver, mdio_driver_register, mdio_driver_unregister) #endif /* __LINUX_MDIO_H__ */ From c3527eeb65cfe6fb93f9e06bc0429616a3a23592 Mon Sep 17 00:00:00 2001 From: Denis Benato Date: Mon, 13 Oct 2025 20:36:32 +0200 Subject: [PATCH 011/867] eth: fealnx: fix typo in comments There are a few typos in comments: - replace "avilable" with "available" - replace "mutlicast" with "multicast" Signed-off-by: Denis Benato Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251013183632.1226627-1-benato.denis96@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/fealnx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c index 6ac8547ef9b8d..3c9961806f756 100644 --- a/drivers/net/ethernet/fealnx.c +++ b/drivers/net/ethernet/fealnx.c @@ -196,7 +196,7 @@ enum intr_status_bits { ERI = 0x00000080, /* receive early int */ CNTOVF = 0x00000040, /* counter overflow */ RBU = 0x00000020, /* receive buffer unavailable */ - TBU = 0x00000010, /* transmit buffer unavilable */ + TBU = 0x00000010, /* transmit buffer unavailable */ TI = 0x00000008, /* transmit interrupt */ RI = 0x00000004, /* receive interrupt */ RxErr = 0x00000002, /* receive error */ @@ -215,7 +215,7 @@ enum rx_mode_bits { CR_W_RXMODEMASK = 0x000000e0, CR_W_PROM = 0x00000080, /* promiscuous mode */ CR_W_AB = 0x00000040, /* accept broadcast */ - CR_W_AM = 0x00000020, /* accept mutlicast */ + CR_W_AM = 0x00000020, /* accept multicast */ CR_W_ARP = 0x00000008, /* receive runt pkt */ CR_W_ALP = 0x00000004, /* receive long pkt */ CR_W_SEP = 0x00000002, /* receive error pkt */ From 331f8a8bea22aecf99437f3561453a85f40026de Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 13 Oct 2025 16:29:41 +0200 Subject: [PATCH 012/867] net: airoha: Add missing stats to ethtool_eth_mac_stats Add the following stats to ethtool ethtool_eth_mac_stats stats: - FramesTransmittedOK - OctetsTransmittedOK - FramesReceivedOK - OctetsReceivedOK Signed-off-by: Lorenzo Bianconi Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251013-airoha-ethtool-improvements-v1-1-fdd1c6fc9be1@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 833dd911980b3..2fe1f39558b80 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2022,8 +2022,12 @@ static void airoha_ethtool_get_mac_stats(struct net_device *dev, airoha_update_hw_stats(port); do { start = u64_stats_fetch_begin(&port->stats.syncp); + stats->FramesTransmittedOK = port->stats.tx_ok_pkts; + stats->OctetsTransmittedOK = port->stats.tx_ok_bytes; stats->MulticastFramesXmittedOK = port->stats.tx_multicast; stats->BroadcastFramesXmittedOK = port->stats.tx_broadcast; + stats->FramesReceivedOK = port->stats.rx_ok_pkts; + stats->OctetsReceivedOK = port->stats.rx_ok_bytes; stats->BroadcastFramesReceivedOK = port->stats.rx_broadcast; } while (u64_stats_fetch_retry(&port->stats.syncp, start)); } From fc4fed9054ef5b5269d4395dd9db36fe98fce9e3 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 13 Oct 2025 16:29:42 +0200 Subject: [PATCH 013/867] net: airoha: Add get_link ethtool callback Set get_link ethtool callback to ethtool_op_get_link routine. Signed-off-by: Lorenzo Bianconi Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251013-airoha-ethtool-improvements-v1-2-fdd1c6fc9be1@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 2fe1f39558b80..6effdda64380b 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2770,6 +2770,7 @@ static const struct ethtool_ops airoha_ethtool_ops = { .get_drvinfo = airoha_ethtool_get_drvinfo, .get_eth_mac_stats = airoha_ethtool_get_mac_stats, .get_rmon_stats = airoha_ethtool_get_rmon_stats, + .get_link = ethtool_op_get_link, }; static int airoha_metadata_dst_alloc(struct airoha_gdm_port *port) From 6378e25ee1ca2ed687eee78eff7bd588d52a4e14 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Fri, 10 Oct 2025 14:34:17 -0400 Subject: [PATCH 014/867] dt-bindings: net: dsa: nxp,sja1105: Add optional clock Add optional clock for OSC_IN and fix the below CHECK_DTBS warnings: arch/arm/boot/dts/nxp/imx/imx6qp-prtwd3.dtb: switch@0 (nxp,sja1105q): Unevaluated properties are not allowed ('clocks' was unexpected) Signed-off-by: Frank Li Acked-by: Conor Dooley Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20251010183418.2179063-1-Frank.Li@nxp.com Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml index e9dd914b0734c..607b7fe8d28ee 100644 --- a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml +++ b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml @@ -41,6 +41,9 @@ properties: therefore discouraged. maxItems: 1 + clocks: + maxItems: 1 + spi-cpha: true spi-cpol: true From 1c51450f1afff1e7419797720df3fbd9ccbf610c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Oct 2025 14:59:26 +0000 Subject: [PATCH 015/867] tcp: better handle TCP_TX_DELAY on established flows Some applications uses TCP_TX_DELAY socket option after TCP flow is established. Some metrics need to be updated, otherwise TCP might take time to adapt to the new (emulated) RTT. This patch adjusts tp->srtt_us, tp->rtt_min, icsk_rto and sk->sk_pacing_rate. This is best effort, and for instance icsk_rto is reset without taking backoff into account. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20251013145926.833198-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 2 ++ net/ipv4/tcp.c | 31 +++++++++++++++++++++++++++---- net/ipv4/tcp_input.c | 4 ++-- 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 5ca230ed526ae..1e547138f4fb7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -461,6 +461,8 @@ enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child, void tcp_enter_loss(struct sock *sk); void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, int flag); void tcp_clear_retrans(struct tcp_sock *tp); +void tcp_update_pacing_rate(struct sock *sk); +void tcp_set_rto(struct sock *sk); void tcp_update_metrics(struct sock *sk); void tcp_init_metrics(struct sock *sk); void tcp_metrics_init(void); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8a18aeca7ab07..4d720aa09a4c6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3583,9 +3583,12 @@ static int tcp_repair_options_est(struct sock *sk, sockptr_t optbuf, DEFINE_STATIC_KEY_FALSE(tcp_tx_delay_enabled); EXPORT_IPV6_MOD(tcp_tx_delay_enabled); -static void tcp_enable_tx_delay(void) +static void tcp_enable_tx_delay(struct sock *sk, int val) { - if (!static_branch_unlikely(&tcp_tx_delay_enabled)) { + struct tcp_sock *tp = tcp_sk(sk); + s32 delta = (val - tp->tcp_tx_delay) << 3; + + if (val && !static_branch_unlikely(&tcp_tx_delay_enabled)) { static int __tcp_tx_delay_enabled = 0; if (cmpxchg(&__tcp_tx_delay_enabled, 0, 1) == 0) { @@ -3593,6 +3596,22 @@ static void tcp_enable_tx_delay(void) pr_info("TCP_TX_DELAY enabled\n"); } } + /* If we change tcp_tx_delay on a live flow, adjust tp->srtt_us, + * tp->rtt_min, icsk_rto and sk->sk_pacing_rate. + * This is best effort. + */ + if (delta && sk->sk_state == TCP_ESTABLISHED) { + s64 srtt = (s64)tp->srtt_us + delta; + + tp->srtt_us = clamp_t(s64, srtt, 1, ~0U); + + /* Note: does not deal with non zero icsk_backoff */ + tcp_set_rto(sk); + + minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U); + + tcp_update_pacing_rate(sk); + } } /* When set indicates to always queue non-full frames. Later the user clears @@ -4119,8 +4138,12 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, tp->recvmsg_inq = val; break; case TCP_TX_DELAY: - if (val) - tcp_enable_tx_delay(); + /* tp->srtt_us is u32, and is shifted by 3 */ + if (val < 0 || val >= (1U << (31 - 3))) { + err = -EINVAL; + break; + } + tcp_enable_tx_delay(sk, val); WRITE_ONCE(tp->tcp_tx_delay, val); break; default: diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 31ea5af49f2dc..8fc97f4d8a6b2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1095,7 +1095,7 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) tp->srtt_us = max(1U, srtt); } -static void tcp_update_pacing_rate(struct sock *sk) +void tcp_update_pacing_rate(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); u64 rate; @@ -1132,7 +1132,7 @@ static void tcp_update_pacing_rate(struct sock *sk) /* Calculate rto without backoff. This is the second half of Van Jacobson's * routine referred to above. */ -static void tcp_set_rto(struct sock *sk) +void tcp_set_rto(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); /* Old crap is replaced with new one. 8) From 6ddb811a579f87b8506344020002d396f814f7c8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Oct 2025 15:22:31 +0000 Subject: [PATCH 016/867] net: add SK_WMEM_ALLOC_BIAS constant sk->sk_wmem_alloc is initialized to 1, and sk_wmem_alloc_get() takes care of this initial value. Add SK_WMEM_ALLOC_BIAS define to not spread this magic value. Signed-off-by: Eric Dumazet Reviewed-by: Neal Cardwell Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20251013152234.842065-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 3 ++- net/atm/common.c | 2 +- net/core/sock.c | 5 ++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 60bcb13f045c3..2794bc5c56542 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2303,6 +2303,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro return 0; } +#define SK_WMEM_ALLOC_BIAS 1 /** * sk_wmem_alloc_get - returns write allocations * @sk: socket @@ -2311,7 +2312,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro */ static inline int sk_wmem_alloc_get(const struct sock *sk) { - return refcount_read(&sk->sk_wmem_alloc) - 1; + return refcount_read(&sk->sk_wmem_alloc) - SK_WMEM_ALLOC_BIAS; } /** diff --git a/net/atm/common.c b/net/atm/common.c index 881c7f259dbd4..cecc71a8bee11 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -157,7 +157,7 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family, i memset(&vcc->local, 0, sizeof(struct sockaddr_atmsvc)); memset(&vcc->remote, 0, sizeof(struct sockaddr_atmsvc)); vcc->qos.txtp.max_sdu = 1 << 16; /* for meta VCs */ - refcount_set(&sk->sk_wmem_alloc, 1); + refcount_set(&sk->sk_wmem_alloc, SK_WMEM_ALLOC_BIAS); atomic_set(&sk->sk_rmem_alloc, 0); vcc->push = NULL; vcc->pop = NULL; diff --git a/net/core/sock.c b/net/core/sock.c index dc03d4b5909a2..542cfa16ee125 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2313,7 +2313,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, } sock_net_set(sk, net); - refcount_set(&sk->sk_wmem_alloc, 1); + refcount_set(&sk->sk_wmem_alloc, SK_WMEM_ALLOC_BIAS); mem_cgroup_sk_alloc(sk); cgroup_sk_alloc(&sk->sk_cgrp_data); @@ -2494,8 +2494,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) atomic_set(&newsk->sk_rmem_alloc, 0); - /* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */ - refcount_set(&newsk->sk_wmem_alloc, 1); + refcount_set(&newsk->sk_wmem_alloc, SK_WMEM_ALLOC_BIAS); atomic_set(&newsk->sk_omem_alloc, 0); sk_init_common(newsk); From d365c9bca35cdeb534aac279c81d1fc9730bb100 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Oct 2025 15:22:32 +0000 Subject: [PATCH 017/867] net: control skb->ooo_okay from skb_set_owner_w() 15 years after Tom Herbert added skb->ooo_okay, only TCP transport benefits from it. We can support other transports directly from skb_set_owner_w(). If no other TX packet for this socket is in a host queue (qdisc, NIC queue) there is no risk of self-inflicted reordering, we can set skb->ooo_okay. This allows netdev_pick_tx() to choose a TX queue based on XPS settings, instead of reusing the queue chosen at the time the first packet was sent for connected sockets. Tested: 500 concurrent UDP_RR connected UDP flows, host with 32 TX queues, 512 cpus, XPS setup. super_netperf 500 -t UDP_RR -H -l 1000 -- -r 100,100 -Nn & This patch saves between 10% and 20% of cycles, depending on how process scheduler migrates threads among cpus. Using following bpftrace script, we can see the effect on Qdisc/NIC tx queues being better used (less cache line misses). bpftrace -e ' k:__dev_queue_xmit { @start[cpu] = nsecs; } kr:__dev_queue_xmit { if (@start[cpu]) { $delay = nsecs - @start[cpu]; delete(@start[cpu]); @__dev_queue_xmit_ns = hist($delay); } } END { clear(@start); }' Before: @__dev_queue_xmit_ns: [128, 256) 6 | | [256, 512) 116283 | | [512, 1K) 1888205 |@@@@@@@@@@@ | [1K, 2K) 8106167 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | [2K, 4K) 8699293 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| [4K, 8K) 2600676 |@@@@@@@@@@@@@@@ | [8K, 16K) 721688 |@@@@ | [16K, 32K) 122995 | | [32K, 64K) 10639 | | [64K, 128K) 119 | | [128K, 256K) 1 | | After: @__dev_queue_xmit_ns: [128, 256) 3 | | [256, 512) 651112 |@@ | [512, 1K) 8109938 |@@@@@@@@@@@@@@@@@@@@@@@@@@ | [1K, 2K) 16081031 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| [2K, 4K) 2411692 |@@@@@@@ | [4K, 8K) 98994 | | [8K, 16K) 1536 | | [16K, 32K) 587 | | [32K, 64K) 2 | | Signed-off-by: Eric Dumazet Reviewed-by: Neal Cardwell Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20251013152234.842065-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/sock.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index 542cfa16ee125..08ae20069b6d2 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2694,6 +2694,8 @@ void __sock_wfree(struct sk_buff *skb) void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) { + int old_wmem; + skb_orphan(skb); #ifdef CONFIG_INET if (unlikely(!sk_fullsock(sk))) @@ -2707,7 +2709,15 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) * is enough to guarantee sk_free() won't free this sock until * all in-flight packets are completed */ - refcount_add(skb->truesize, &sk->sk_wmem_alloc); + __refcount_add(skb->truesize, &sk->sk_wmem_alloc, &old_wmem); + + /* (old_wmem == SK_WMEM_ALLOC_BIAS) if no other TX packet for this socket + * is in a host queue (qdisc, NIC queue). + * Set skb->ooo_okay so that netdev_pick_tx() can choose a TX queue + * based on XPS for better performance. + * Otherwise clear ooo_okay to not risk Out Of Order delivery. + */ + skb->ooo_okay = (old_wmem == SK_WMEM_ALLOC_BIAS); } EXPORT_SYMBOL(skb_set_owner_w); From 2ddef3462b3a5d62e5485e22ce128a5c02276438 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Oct 2025 15:22:33 +0000 Subject: [PATCH 018/867] net: add /proc/sys/net/core/txq_reselection_ms control Add a new sysctl to control how often a queue reselection can happen even if a flow has a persistent queue of skbs in a Qdisc or NIC queue. A value of zero means the feature is disabled. Default is 1000 (1 second). This sysctl is used in the following patch. Signed-off-by: Eric Dumazet Reviewed-by: Neal Cardwell Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20251013152234.842065-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- Documentation/admin-guide/sysctl/net.rst | 17 +++++++++++++++++ include/net/netns/core.h | 1 + net/core/net_namespace.c | 1 + net/core/sysctl_net_core.c | 7 +++++++ 4 files changed, 26 insertions(+) diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 2ef50828aff16..40749b3cd3569 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -406,6 +406,23 @@ to SOCK_TXREHASH_DEFAULT (i. e. not overridden by setsockopt). If set to 1 (default), hash rethink is performed on listening socket. If set to 0, hash rethink is not performed. +txq_reselection_ms +------------------ + +Controls how often (in ms) a busy connected flow can select another tx queue. + +A resection is desirable when/if user thread has migrated and XPS +would select a different queue. Same can occur without XPS +if the flow hash has changed. + +But switching txq can introduce reorders, especially if the +old queue is under high pressure. Modern TCP stacks deal +well with reorders if they happen not too often. + +To disable this feature, set the value to 0. + +Default : 1000 + gro_normal_batch ---------------- diff --git a/include/net/netns/core.h b/include/net/netns/core.h index 9b36f0ff0c200..cb9c3e4cd7385 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -13,6 +13,7 @@ struct netns_core { struct ctl_table_header *sysctl_hdr; int sysctl_somaxconn; + int sysctl_txq_reselection; int sysctl_optmem_max; u8 sysctl_txrehash; u8 sysctl_tstamp_allow_data; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index b0e0f22d7b213..adcfef55a66f1 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -395,6 +395,7 @@ static __net_init void preinit_net_sysctl(struct net *net) net->core.sysctl_optmem_max = 128 * 1024; net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED; net->core.sysctl_tstamp_allow_data = 1; + net->core.sysctl_txq_reselection = msecs_to_jiffies(1000); } /* init code that must occur even if setup_net() is not called. */ diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 8cf04b57ade1e..f79137826d7f9 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -667,6 +667,13 @@ static struct ctl_table netns_core_table[] = { .extra2 = SYSCTL_ONE, .proc_handler = proc_dou8vec_minmax, }, + { + .procname = "txq_reselection_ms", + .data = &init_net.core.sysctl_txq_reselection, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + }, { .procname = "tstamp_allow_data", .data = &init_net.core.sysctl_tstamp_allow_data, From 4a7708443dec13b074bc43855f494358fedbd3c0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Oct 2025 15:22:34 +0000 Subject: [PATCH 019/867] net: allow busy connected flows to switch tx queues This is a followup of commit 726e9e8b94b9 ("tcp: refine skb->ooo_okay setting") and of prior commit in this series ("net: control skb->ooo_okay from skb_set_owner_w()") skb->ooo_okay might never be set for bulk flows that always have at least one skb in a qdisc queue of NIC queue, especially if TX completion is delayed because of a stressed cpu. The so-called "strange attractors" has caused many performance issues (see for instance 9b462d02d6dd ("tcp: TCP Small Queues and strange attractors")), we need to do better. We have tried very hard to avoid reorders because TCP was not dealing with them nicely a decade ago. Use the new net.core.txq_reselection_ms sysctl to let flows follow XPS and select a more efficient queue. After this patch, we no longer have to make sure threads are pinned to cpus, they now can be migrated without adding too much spinlock/qdisc/TX completion pressure anymore. TX completion part was problematic, because it added false sharing on various socket fields, but also added false sharing and spinlock contention in mm layers. Calling skb_orphan() from ndo_start_xmit() is not an option unfortunately. Note for later: 1) move sk->sk_tx_queue_mapping closer to sk_tx_queue_mapping_jiffies for better cache locality. 2) Study if 9b462d02d6dd ("tcp: TCP Small Queues and strange attractors") could be revised. Tested: Used a host with 32 TX queues, shared by groups of 8 cores. XPS setup : echo ff >/sys/class/net/eth1/queue/tx-0/xps_cpus echo ff00 >/sys/class/net/eth1/queue/tx-1/xps_cpus echo ff0000 >/sys/class/net/eth1/queue/tx-2/xps_cpus echo ff000000 >/sys/class/net/eth1/queue/tx-3/xps_cpus echo ff,00000000 >/sys/class/net/eth1/queue/tx-4/xps_cpus echo ff00,00000000 >/sys/class/net/eth1/queue/tx-5/xps_cpus echo ff0000,00000000 >/sys/class/net/eth1/queue/tx-6/xps_cpus echo ff000000,00000000 >/sys/class/net/eth1/queue/tx-7/xps_cpus ... Launched a tcp_stream with 15 threads and 1000 flows, initially affined to core 0-15 taskset -c 0-15 tcp_stream -T15 -F1000 -l1000 -c -H target_host Checked that only queues 0 and 1 are used as instructed by XPS : tc -s qdisc show dev eth1|grep backlog|grep -v "backlog 0b 0p" backlog 123489410b 1890p backlog 69809026b 1064p backlog 52401054b 805p Then force each thread to run on cpu 1,9,17,25,33,41,49,57,65,73,81,89,97,105,113,121 C=1;PID=`pidof tcp_stream`;for P in `ls /proc/$PID/task`; do taskset -pc $C $P; C=$(($C + 8));done Set txq_reselection_ms to 1000 echo 1000 > /proc/sys/net/core/txq_reselection_ms Check that the flows have migrated nicely: tc -s qdisc show dev eth1|grep backlog|grep -v "backlog 0b 0p" backlog 130508314b 1916p backlog 8584380b 126p backlog 8584380b 126p backlog 8379990b 123p backlog 8584380b 126p backlog 8487484b 125p backlog 8584380b 126p backlog 8448120b 124p backlog 8584380b 126p backlog 8720640b 128p backlog 8856900b 130p backlog 8584380b 126p backlog 8652510b 127p backlog 8448120b 124p backlog 8516250b 125p backlog 7834950b 115p Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20251013152234.842065-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 26 ++++++++++++-------------- net/core/dev.c | 29 +++++++++++++++++++++++++++-- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 2794bc5c56542..f0d00928db9e9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -313,6 +313,7 @@ struct sk_filter; * @sk_bind_phc: SO_TIMESTAMPING bind PHC index of PTP virtual clock * for timestamping * @sk_tskey: counter to disambiguate concurrent tstamp requests + * @sk_tx_queue_mapping_jiffies: time in jiffies of last @sk_tx_queue_mapping refresh. * @sk_zckey: counter to order MSG_ZEROCOPY notifications * @sk_socket: Identd and reporting IO signals * @sk_user_data: RPC layer private data. Write-protected by @sk_callback_lock. @@ -485,6 +486,7 @@ struct sock { unsigned long sk_pacing_rate; /* bytes per second */ atomic_t sk_zckey; atomic_t sk_tskey; + unsigned long sk_tx_queue_mapping_jiffies; __cacheline_group_end(sock_write_tx); __cacheline_group_begin(sock_read_tx); @@ -1992,7 +1994,15 @@ static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) /* Paired with READ_ONCE() in sk_tx_queue_get() and * other WRITE_ONCE() because socket lock might be not held. */ - WRITE_ONCE(sk->sk_tx_queue_mapping, tx_queue); + if (READ_ONCE(sk->sk_tx_queue_mapping) != tx_queue) { + WRITE_ONCE(sk->sk_tx_queue_mapping, tx_queue); + WRITE_ONCE(sk->sk_tx_queue_mapping_jiffies, jiffies); + return; + } + + /* Refresh sk_tx_queue_mapping_jiffies if too old. */ + if (time_is_before_jiffies(READ_ONCE(sk->sk_tx_queue_mapping_jiffies) + HZ)) + WRITE_ONCE(sk->sk_tx_queue_mapping_jiffies, jiffies); } #define NO_QUEUE_MAPPING USHRT_MAX @@ -2005,19 +2015,7 @@ static inline void sk_tx_queue_clear(struct sock *sk) WRITE_ONCE(sk->sk_tx_queue_mapping, NO_QUEUE_MAPPING); } -static inline int sk_tx_queue_get(const struct sock *sk) -{ - if (sk) { - /* Paired with WRITE_ONCE() in sk_tx_queue_clear() - * and sk_tx_queue_set(). - */ - int val = READ_ONCE(sk->sk_tx_queue_mapping); - - if (val != NO_QUEUE_MAPPING) - return val; - } - return -1; -} +int sk_tx_queue_get(const struct sock *sk); static inline void __sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb, diff --git a/net/core/dev.c b/net/core/dev.c index a64cef2c537e9..33e6101dbc454 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4591,6 +4591,32 @@ u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, } EXPORT_SYMBOL(dev_pick_tx_zero); +int sk_tx_queue_get(const struct sock *sk) +{ + int resel, val; + + if (!sk) + return -1; + /* Paired with WRITE_ONCE() in sk_tx_queue_clear() + * and sk_tx_queue_set(). + */ + val = READ_ONCE(sk->sk_tx_queue_mapping); + + if (val == NO_QUEUE_MAPPING) + return -1; + + if (!sk_fullsock(sk)) + return val; + + resel = READ_ONCE(sock_net(sk)->core.sysctl_txq_reselection); + if (resel && time_is_before_jiffies( + READ_ONCE(sk->sk_tx_queue_mapping_jiffies) + resel)) + return -1; + + return val; +} +EXPORT_SYMBOL(sk_tx_queue_get); + u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev) { @@ -4606,8 +4632,7 @@ u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, if (new_index < 0) new_index = skb_tx_hash(dev, sb_dev, skb); - if (queue_index != new_index && sk && - sk_fullsock(sk) && + if (sk && sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache)) sk_tx_queue_set(sk, new_index); From a8e846b8d93de748485653dd8a6a8efd8f5d7613 Mon Sep 17 00:00:00 2001 From: Abhishek Rawal Date: Tue, 14 Oct 2025 11:22:33 +0530 Subject: [PATCH 020/867] r8152: Advertise software timestamp information. Driver calls skb_tx_timestamp(skb) in rtl8152_start_xmit(), but does not advertise the capability in ethtool. Advertise software timestamp capabilities on struct ethtool_ops. Signed-off-by: Abhishek Rawal Reviewed-by: Jamie Bainbridge Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20251014055234.46527-1-rawal.abhishek92@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/r8152.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 44cba7acfe7d9..f896e9f28c3b0 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -9311,6 +9311,7 @@ static const struct ethtool_ops ops = { .set_ringparam = rtl8152_set_ringparam, .get_pauseparam = rtl8152_get_pauseparam, .set_pauseparam = rtl8152_set_pauseparam, + .get_ts_info = ethtool_op_get_ts_info, }; static int rtl8152_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) From 378e6523ebb1e80b3955b7675cfe40b07028d085 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 14 Oct 2025 08:02:47 +0200 Subject: [PATCH 021/867] net: bcmgenet: remove unused platform code This effectively reverts b0ba512e25d7 ("net: bcmgenet: enable driver to work without a device tree"). There has never been an in-tree user of struct bcmgenet_platform_data, all devices use OF or ACPI. Signed-off-by: Heiner Kallweit Reviewed-by: Simon Horman Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/108b4e64-55d4-4b4e-9a11-3c810c319d66@gmail.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 - .../net/ethernet/broadcom/genet/bcmgenet.c | 20 ++--- drivers/net/ethernet/broadcom/genet/bcmmii.c | 75 +------------------ include/linux/platform_data/bcmgenet.h | 19 ----- 4 files changed, 7 insertions(+), 108 deletions(-) delete mode 100644 include/linux/platform_data/bcmgenet.h diff --git a/MAINTAINERS b/MAINTAINERS index 3a27901781c2b..4c4b519171f37 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5123,7 +5123,6 @@ F: Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml F: drivers/net/ethernet/broadcom/genet/ F: drivers/net/ethernet/broadcom/unimac.h F: drivers/net/mdio/mdio-bcm-unimac.c -F: include/linux/platform_data/bcmgenet.h F: include/linux/platform_data/mdio-bcm-unimac.h BROADCOM IPROC ARM ARCHITECTURE diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 98971ae4f87df..d99ef92feb82b 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -35,7 +35,6 @@ #include #include #include -#include #include @@ -3926,7 +3925,6 @@ MODULE_DEVICE_TABLE(of, bcmgenet_match); static int bcmgenet_probe(struct platform_device *pdev) { - struct bcmgenet_platform_data *pd = pdev->dev.platform_data; const struct bcmgenet_plat_data *pdata; struct bcmgenet_priv *priv; struct net_device *dev; @@ -4010,9 +4008,6 @@ static int bcmgenet_probe(struct platform_device *pdev) priv->version = pdata->version; priv->dma_max_burst_length = pdata->dma_max_burst_length; priv->flags = pdata->flags; - } else { - priv->version = pd->genet_version; - priv->dma_max_burst_length = DMA_MAX_BURST_LENGTH; } priv->clk = devm_clk_get_optional(&priv->pdev->dev, "enet"); @@ -4062,16 +4057,13 @@ static int bcmgenet_probe(struct platform_device *pdev) if (device_get_phy_mode(&pdev->dev) == PHY_INTERFACE_MODE_INTERNAL) bcmgenet_power_up(priv, GENET_POWER_PASSIVE); - if (pd && !IS_ERR_OR_NULL(pd->mac_address)) - eth_hw_addr_set(dev, pd->mac_address); - else - if (device_get_ethdev_address(&pdev->dev, dev)) - if (has_acpi_companion(&pdev->dev)) { - u8 addr[ETH_ALEN]; + if (device_get_ethdev_address(&pdev->dev, dev)) + if (has_acpi_companion(&pdev->dev)) { + u8 addr[ETH_ALEN]; - bcmgenet_get_hw_addr(priv, addr); - eth_hw_addr_set(dev, addr); - } + bcmgenet_get_hw_addr(priv, addr); + eth_hw_addr_set(dev, addr); + } if (!is_valid_ether_addr(dev->dev_addr)) { dev_warn(&pdev->dev, "using random Ethernet MAC\n"); diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 573e8b279e52f..38f854b94a799 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include "bcmgenet.h" @@ -436,23 +435,6 @@ static struct device_node *bcmgenet_mii_of_find_mdio(struct bcmgenet_priv *priv) return priv->mdio_dn; } -static void bcmgenet_mii_pdata_init(struct bcmgenet_priv *priv, - struct unimac_mdio_pdata *ppd) -{ - struct device *kdev = &priv->pdev->dev; - struct bcmgenet_platform_data *pd = kdev->platform_data; - - if (pd->phy_interface != PHY_INTERFACE_MODE_MOCA && pd->mdio_enabled) { - /* - * Internal or external PHY with MDIO access - */ - if (pd->phy_address >= 0 && pd->phy_address < PHY_MAX_ADDR) - ppd->phy_mask = 1 << pd->phy_address; - else - ppd->phy_mask = 0; - } -} - static int bcmgenet_mii_wait(void *wait_func_data) { struct bcmgenet_priv *priv = wait_func_data; @@ -467,7 +449,6 @@ static int bcmgenet_mii_wait(void *wait_func_data) static int bcmgenet_mii_register(struct bcmgenet_priv *priv) { struct platform_device *pdev = priv->pdev; - struct bcmgenet_platform_data *pdata = pdev->dev.platform_data; struct device_node *dn = pdev->dev.of_node; struct unimac_mdio_pdata ppd; struct platform_device *ppdev; @@ -511,8 +492,6 @@ static int bcmgenet_mii_register(struct bcmgenet_priv *priv) ppdev->dev.parent = &pdev->dev; if (dn) ppdev->dev.of_node = bcmgenet_mii_of_find_mdio(priv); - else if (pdata) - bcmgenet_mii_pdata_init(priv, &ppd); else ppd.phy_mask = ~0; @@ -594,58 +573,6 @@ static int bcmgenet_mii_of_init(struct bcmgenet_priv *priv) return 0; } -static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv) -{ - struct device *kdev = &priv->pdev->dev; - struct bcmgenet_platform_data *pd = kdev->platform_data; - char phy_name[MII_BUS_ID_SIZE + 3]; - char mdio_bus_id[MII_BUS_ID_SIZE]; - struct phy_device *phydev; - - snprintf(mdio_bus_id, MII_BUS_ID_SIZE, "%s-%d", - UNIMAC_MDIO_DRV_NAME, priv->pdev->id); - - if (pd->phy_interface != PHY_INTERFACE_MODE_MOCA && pd->mdio_enabled) { - snprintf(phy_name, MII_BUS_ID_SIZE, PHY_ID_FMT, - mdio_bus_id, pd->phy_address); - - /* - * Internal or external PHY with MDIO access - */ - phydev = phy_attach(priv->dev, phy_name, pd->phy_interface); - if (IS_ERR(phydev)) { - dev_err(kdev, "failed to register PHY device\n"); - return PTR_ERR(phydev); - } - } else { - /* - * MoCA port or no MDIO access. - * Use fixed PHY to represent the link layer. - */ - struct fixed_phy_status fphy_status = { - .link = 1, - .speed = pd->phy_speed, - .duplex = pd->phy_duplex, - .pause = 0, - .asym_pause = 0, - }; - - phydev = fixed_phy_register(&fphy_status, NULL); - if (IS_ERR(phydev)) { - dev_err(kdev, "failed to register fixed PHY device\n"); - return PTR_ERR(phydev); - } - - /* Make sure we initialize MoCA PHYs with a link down */ - phydev->link = 0; - - } - - priv->phy_interface = pd->phy_interface; - - return 0; -} - static int bcmgenet_mii_bus_init(struct bcmgenet_priv *priv) { struct device *kdev = &priv->pdev->dev; @@ -656,7 +583,7 @@ static int bcmgenet_mii_bus_init(struct bcmgenet_priv *priv) else if (has_acpi_companion(kdev)) return bcmgenet_phy_interface_init(priv); else - return bcmgenet_mii_pd_init(priv); + return -EINVAL; } int bcmgenet_mii_init(struct net_device *dev) diff --git a/include/linux/platform_data/bcmgenet.h b/include/linux/platform_data/bcmgenet.h deleted file mode 100644 index d8f8738629d20..0000000000000 --- a/include/linux/platform_data/bcmgenet.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_PLATFORM_DATA_BCMGENET_H__ -#define __LINUX_PLATFORM_DATA_BCMGENET_H__ - -#include -#include -#include - -struct bcmgenet_platform_data { - bool mdio_enabled; - phy_interface_t phy_interface; - int phy_address; - int phy_speed; - int phy_duplex; - u8 mac_address[ETH_ALEN]; - int genet_version; -}; - -#endif From 4077d7fb27be990a8ddcff9b49f7e1788a960f3a Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 11 Oct 2025 01:10:38 +0100 Subject: [PATCH 022/867] wifi: wcn36xx: Remove unused wcn36xx_smd_update_scan_params wcn36xx_smd_update_scan_params() last use was removed in 2020 by commit 5973a2947430 ("wcn36xx: Fix software-driven scan") Remove it. This leaves the wcn36xx_hal_update_scan_params_req_ex and wcn36xx_hal_update_scan_params_resp structs unused. Remove them, together with the unused wcn36xx_hal_update_scan_params_req. Signed-off-by: Dr. David Alan Gilbert Acked-by: Loic Poulain Link: https://patch.msgid.link/20251011001038.352393-1-linux@treblig.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/wcn36xx/hal.h | 74 -------------------------- drivers/net/wireless/ath/wcn36xx/smd.c | 60 --------------------- drivers/net/wireless/ath/wcn36xx/smd.h | 1 - 3 files changed, 135 deletions(-) diff --git a/drivers/net/wireless/ath/wcn36xx/hal.h b/drivers/net/wireless/ath/wcn36xx/hal.h index d3a9d00e65e13..ef9ea4ff891b3 100644 --- a/drivers/net/wireless/ath/wcn36xx/hal.h +++ b/drivers/net/wireless/ath/wcn36xx/hal.h @@ -4484,80 +4484,6 @@ struct set_rssi_filter_resp { u32 status; }; -/* Update scan params - sent from host to PNO to be used during PNO - * scanningx */ -struct wcn36xx_hal_update_scan_params_req { - - struct wcn36xx_hal_msg_header header; - - /* Host setting for 11d */ - u8 dot11d_enabled; - - /* Lets PNO know that host has determined the regulatory domain */ - u8 dot11d_resolved; - - /* Channels on which PNO is allowed to scan */ - u8 channel_count; - u8 channels[WCN36XX_HAL_PNO_MAX_NETW_CHANNELS]; - - /* Minimum channel time */ - u16 active_min_ch_time; - - /* Maximum channel time */ - u16 active_max_ch_time; - - /* Minimum channel time */ - u16 passive_min_ch_time; - - /* Maximum channel time */ - u16 passive_max_ch_time; - - /* Cb State */ - enum phy_chan_bond_state state; -} __packed; - -/* Update scan params - sent from host to PNO to be used during PNO - * scanningx */ -struct wcn36xx_hal_update_scan_params_req_ex { - - struct wcn36xx_hal_msg_header header; - - /* Host setting for 11d */ - u8 dot11d_enabled; - - /* Lets PNO know that host has determined the regulatory domain */ - u8 dot11d_resolved; - - /* Channels on which PNO is allowed to scan */ - u8 channel_count; - u8 channels[WCN36XX_HAL_PNO_MAX_NETW_CHANNELS_EX]; - - /* Minimum channel time */ - u16 active_min_ch_time; - - /* Maximum channel time */ - u16 active_max_ch_time; - - /* Minimum channel time */ - u16 passive_min_ch_time; - - /* Maximum channel time */ - u16 passive_max_ch_time; - - /* Cb State */ - enum phy_chan_bond_state state; -} __packed; - -/* Update scan params - sent from host to PNO to be used during PNO - * scanningx */ -struct wcn36xx_hal_update_scan_params_resp { - - struct wcn36xx_hal_msg_header header; - - /* status of the request */ - u32 status; -} __packed; - struct wcn36xx_hal_set_tx_per_tracking_req_msg { struct wcn36xx_hal_msg_header header; diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c index 2cf86fc3f8fe0..136acc414714c 100644 --- a/drivers/net/wireless/ath/wcn36xx/smd.c +++ b/drivers/net/wireless/ath/wcn36xx/smd.c @@ -1127,66 +1127,6 @@ int wcn36xx_smd_process_ptt_msg(struct wcn36xx *wcn, return ret; } -static int wcn36xx_smd_update_scan_params_rsp(void *buf, size_t len) -{ - struct wcn36xx_hal_update_scan_params_resp *rsp; - - rsp = buf; - - /* Remove the PNO version bit */ - rsp->status &= (~(WCN36XX_FW_MSG_PNO_VERSION_MASK)); - - if (WCN36XX_FW_MSG_RESULT_SUCCESS != rsp->status) { - wcn36xx_warn("error response from update scan\n"); - return rsp->status; - } - - return 0; -} - -int wcn36xx_smd_update_scan_params(struct wcn36xx *wcn, - u8 *channels, size_t channel_count) -{ - struct wcn36xx_hal_update_scan_params_req_ex msg_body; - int ret; - - mutex_lock(&wcn->hal_mutex); - INIT_HAL_MSG(msg_body, WCN36XX_HAL_UPDATE_SCAN_PARAM_REQ); - - msg_body.dot11d_enabled = false; - msg_body.dot11d_resolved = true; - - msg_body.channel_count = channel_count; - memcpy(msg_body.channels, channels, channel_count); - msg_body.active_min_ch_time = 60; - msg_body.active_max_ch_time = 120; - msg_body.passive_min_ch_time = 60; - msg_body.passive_max_ch_time = 110; - msg_body.state = PHY_SINGLE_CHANNEL_CENTERED; - - PREPARE_HAL_BUF(wcn->hal_buf, msg_body); - - wcn36xx_dbg(WCN36XX_DBG_HAL, - "hal update scan params channel_count %d\n", - msg_body.channel_count); - - ret = wcn36xx_smd_send_and_wait(wcn, msg_body.header.len); - if (ret) { - wcn36xx_err("Sending hal_update_scan_params failed\n"); - goto out; - } - ret = wcn36xx_smd_update_scan_params_rsp(wcn->hal_buf, - wcn->hal_rsp_len); - if (ret) { - wcn36xx_err("hal_update_scan_params response failed err=%d\n", - ret); - goto out; - } -out: - mutex_unlock(&wcn->hal_mutex); - return ret; -} - static int wcn36xx_smd_add_sta_self_rsp(struct wcn36xx *wcn, struct ieee80211_vif *vif, void *buf, diff --git a/drivers/net/wireless/ath/wcn36xx/smd.h b/drivers/net/wireless/ath/wcn36xx/smd.h index 2c1ed9e570bfd..4e39df5589b31 100644 --- a/drivers/net/wireless/ath/wcn36xx/smd.h +++ b/drivers/net/wireless/ath/wcn36xx/smd.h @@ -66,7 +66,6 @@ int wcn36xx_smd_finish_scan(struct wcn36xx *wcn, enum wcn36xx_hal_sys_mode mode, int wcn36xx_smd_init_scan(struct wcn36xx *wcn, enum wcn36xx_hal_sys_mode mode, struct ieee80211_vif *vif); -int wcn36xx_smd_update_scan_params(struct wcn36xx *wcn, u8 *channels, size_t channel_count); int wcn36xx_smd_start_hw_scan(struct wcn36xx *wcn, struct ieee80211_vif *vif, struct cfg80211_scan_request *req); int wcn36xx_smd_stop_hw_scan(struct wcn36xx *wcn); From f35a07a4842a88801d9182b1a76d178bfa616978 Mon Sep 17 00:00:00 2001 From: Kang Yang Date: Tue, 14 Oct 2025 19:07:57 +0800 Subject: [PATCH 023/867] wifi: ath10k: move recovery check logic into a new work MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, ath10k has a recovery check logic. It will wait for the last recovery to finish by wait_for_completion_timeout(); But in SDIO scenarios, the recovery function may be invoked from interrupt context, where long blocking waits are undesirable and can lead to system instability. Additionally, Linux’s ordered workqueue processes one task at a time. If a previous recovery is still queued or executing, new triggers are ignored. This prevents accurate tracking of consecutive failures and delays transition to the WEDGED state. To address this, move the recovery check logic into a different workqueue. Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00288-QCARMSWPZ-1 Tested-on: QCA6174 hw3.2 SDIO WLAN.RMH.4.4.1-00189 Fixes: c256a94d1b1b ("wifi: ath10k: shutdown driver when hardware is unreliable") Signed-off-by: Kang Yang Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20251014110757.155-1-kang.yang@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath10k/core.c | 20 +++++++++----------- drivers/net/wireless/ath/ath10k/core.h | 2 +- drivers/net/wireless/ath/ath10k/mac.c | 2 +- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 6f78f1752cd6f..9ae3595fb6986 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -3,7 +3,6 @@ * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ @@ -2493,8 +2492,9 @@ static int ath10k_init_hw_params(struct ath10k *ar) return 0; } -static bool ath10k_core_needs_recovery(struct ath10k *ar) +static void ath10k_core_recovery_check_work(struct work_struct *work) { + struct ath10k *ar = container_of(work, struct ath10k, recovery_check_work); long time_left; /* Sometimes the recovery will fail and then the next all recovery fail, @@ -2504,7 +2504,7 @@ static bool ath10k_core_needs_recovery(struct ath10k *ar) ath10k_err(ar, "consecutive fail %d times, will shutdown driver!", atomic_read(&ar->fail_cont_count)); ar->state = ATH10K_STATE_WEDGED; - return false; + return; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "total recovery count: %d", ++ar->recovery_count); @@ -2518,27 +2518,24 @@ static bool ath10k_core_needs_recovery(struct ath10k *ar) ATH10K_RECOVERY_TIMEOUT_HZ); if (time_left) { ath10k_warn(ar, "previous recovery succeeded, skip this!\n"); - return false; + return; } /* Record the continuous recovery fail count when recovery failed. */ atomic_inc(&ar->fail_cont_count); /* Avoid having multiple recoveries at the same time. */ - return false; + return; } atomic_inc(&ar->pending_recovery); - - return true; + queue_work(ar->workqueue, &ar->restart_work); } void ath10k_core_start_recovery(struct ath10k *ar) { - if (!ath10k_core_needs_recovery(ar)) - return; - - queue_work(ar->workqueue, &ar->restart_work); + /* Use workqueue_aux to avoid blocking recovery tracking */ + queue_work(ar->workqueue_aux, &ar->recovery_check_work); } EXPORT_SYMBOL(ath10k_core_start_recovery); @@ -3734,6 +3731,7 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev, INIT_WORK(&ar->register_work, ath10k_core_register_work); INIT_WORK(&ar->restart_work, ath10k_core_restart); + INIT_WORK(&ar->recovery_check_work, ath10k_core_recovery_check_work); INIT_WORK(&ar->set_coverage_class_work, ath10k_core_set_coverage_class_work); diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index 8c72ed386edb7..859176fcb5a29 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -3,7 +3,6 @@ * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ @@ -1208,6 +1207,7 @@ struct ath10k { struct work_struct register_work; struct work_struct restart_work; + struct work_struct recovery_check_work; struct work_struct bundle_tx_work; struct work_struct tx_complete_work; diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 154ac7a709824..da6f7957a0ae7 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -3,7 +3,6 @@ * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ @@ -5428,6 +5427,7 @@ static void ath10k_stop(struct ieee80211_hw *hw, bool suspend) cancel_work_sync(&ar->set_coverage_class_work); cancel_delayed_work_sync(&ar->scan.timeout); cancel_work_sync(&ar->restart_work); + cancel_work_sync(&ar->recovery_check_work); } static int ath10k_config_ps(struct ath10k *ar) From 960fc268a9fc269190014773c81507e695bec3d4 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Fri, 10 Oct 2025 09:46:27 -0700 Subject: [PATCH 024/867] wifi: ath11k: Remove struct wmi_bcn_send_from_host_cmd struct wmi_bcn_send_from_host_cmd is unused, so remove it. Compile tested only. Link: https://patch.msgid.link/20251010-ath11k-nuke-wmi_bcn_send_from_host_cmd-v1-1-bfb5118d9018@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/wmi.h | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h index 9fcffaa2f383c..3c4885a12855b 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.h +++ b/drivers/net/wireless/ath/ath11k/wmi.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #ifndef ATH11K_WMI_H @@ -3463,20 +3463,6 @@ struct scan_cancel_param { u32 pdev_id; }; -struct wmi_bcn_send_from_host_cmd { - u32 tlv_header; - u32 vdev_id; - u32 data_len; - union { - u32 frag_ptr; - u32 frag_ptr_lo; - }; - u32 frame_ctrl; - u32 dtim_flag; - u32 bcn_antenna; - u32 frag_ptr_hi; -}; - #define WMI_CHAN_INFO_MODE GENMASK(5, 0) #define WMI_CHAN_INFO_HT40_PLUS BIT(6) #define WMI_CHAN_INFO_PASSIVE BIT(7) From d34a368be24d029544cc97feb87729a9f7984a78 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Fri, 10 Oct 2025 09:47:58 -0700 Subject: [PATCH 025/867] wifi: ath12k: Remove struct wmi_bcn_send_from_host_cmd struct wmi_bcn_send_from_host_cmd is unused, so remove it. Compile tested only. Link: https://patch.msgid.link/20251010-ath12k-nuke-wmi_bcn_send_from_host_cmd-v1-1-6f1172b77848@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/wmi.h | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index a8c3190e8ad95..d9fd6a6b708dd 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #ifndef ATH12K_WMI_H @@ -3609,20 +3609,6 @@ struct ath12k_wmi_scan_cancel_arg { u32 pdev_id; }; -struct wmi_bcn_send_from_host_cmd { - __le32 tlv_header; - __le32 vdev_id; - __le32 data_len; - union { - __le32 frag_ptr; - __le32 frag_ptr_lo; - }; - __le32 frame_ctrl; - __le32 dtim_flag; - __le32 bcn_antenna; - __le32 frag_ptr_hi; -}; - #define WMI_CHAN_INFO_MODE GENMASK(5, 0) #define WMI_CHAN_INFO_HT40_PLUS BIT(6) #define WMI_CHAN_INFO_PASSIVE BIT(7) From 596b911644cc19ecba0dbc9c92849fb59390e29a Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Tue, 14 Oct 2025 10:30:20 +0800 Subject: [PATCH 026/867] wifi: ath11k: restore register window after global reset Hardware target implements an address space larger than that PCI BAR can map. In order to be able to access the whole target address space, the BAR space is split into 4 segments, of which the last 3, called windows, can be dynamically mapped to the desired area. This is achieved by updating window register with appropriate window value. Currently each time when accessing a register that beyond ATH11K_PCI_WINDOW_START, host calculates the window value and caches it after window update, this way next time when accessing a register falling in the same window, host knows that the window is already good hence no additional update needed. However this mechanism breaks after global reset is triggered in ath11k_pci_soc_global_reset(), because with global reset hardware resets window register hence the window is not properly mapped any more. Current host does nothing about this, as a result a subsequent register access may not work as expected if it falls in a window same as before. Although there is no obvious issue seen now, better to fix it to avoid future problem. The fix is done by restoring the window register after global reset. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.30 Fixes: d5c65159f289 ("ath11k: driver for Qualcomm IEEE 802.11ax devices") Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20251014-ath11k-reset-window-cache-v1-1-b85271b111dd@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/pci.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c index d8655badd96d0..7114eca8810db 100644 --- a/drivers/net/wireless/ath/ath11k/pci.c +++ b/drivers/net/wireless/ath/ath11k/pci.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2019-2020 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include @@ -177,6 +177,19 @@ static inline void ath11k_pci_select_static_window(struct ath11k_pci *ab_pci) ab_pci->ab->mem + ATH11K_PCI_WINDOW_REG_ADDRESS); } +static void ath11k_pci_restore_window(struct ath11k_base *ab) +{ + struct ath11k_pci *ab_pci = ath11k_pci_priv(ab); + + spin_lock_bh(&ab_pci->window_lock); + + iowrite32(ATH11K_PCI_WINDOW_ENABLE_BIT | ab_pci->register_window, + ab->mem + ATH11K_PCI_WINDOW_REG_ADDRESS); + ioread32(ab->mem + ATH11K_PCI_WINDOW_REG_ADDRESS); + + spin_unlock_bh(&ab_pci->window_lock); +} + static void ath11k_pci_soc_global_reset(struct ath11k_base *ab) { u32 val, delay; @@ -201,6 +214,11 @@ static void ath11k_pci_soc_global_reset(struct ath11k_base *ab) val = ath11k_pcic_read32(ab, PCIE_SOC_GLOBAL_RESET); if (val == 0xffffffff) ath11k_warn(ab, "link down error during global reset\n"); + + /* Restore window register as its content is cleared during + * hardware global reset, such that it aligns with host cache. + */ + ath11k_pci_restore_window(ab); } static void ath11k_pci_clear_dbg_registers(struct ath11k_base *ab) From 36f9edbb9d0fc36c865c74f3c1ad8e1261ad3981 Mon Sep 17 00:00:00 2001 From: Sarika Sharma Date: Tue, 30 Sep 2025 14:45:50 +0530 Subject: [PATCH 027/867] wifi: ath12k: Fix MSDU buffer types handling in RX error path Currently, packets received on the REO exception ring from unassociated peers are of MSDU buffer type, while the driver expects link descriptor type packets. These packets are not parsed further due to a return check on packet type in ath12k_hal_desc_reo_parse_err(), but the associated skb is not freed. This may lead to kernel crashes and buffer leaks. Hence to fix, update the RX error handler to explicitly drop MSDU buffer type packets received on the REO exception ring. This prevents further processing of invalid packets and ensures stability in the RX error handling path. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: Sarika Sharma Reviewed-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250930091551.3305312-2-sarika.sharma@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/dp_rx.c | 70 ++++++++++++++++++++++-- drivers/net/wireless/ath/ath12k/hal_rx.c | 10 +--- 2 files changed, 66 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 5e5c14a70316d..99d29eda26cf1 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include @@ -3781,6 +3781,48 @@ ath12k_dp_process_rx_err_buf(struct ath12k *ar, struct hal_reo_dest_ring *desc, return 0; } +static int ath12k_dp_h_msdu_buffer_type(struct ath12k_base *ab, + struct list_head *list, + struct hal_reo_dest_ring *desc) +{ + struct ath12k_rx_desc_info *desc_info; + struct ath12k_skb_rxcb *rxcb; + struct sk_buff *msdu; + u64 desc_va; + + desc_va = (u64)le32_to_cpu(desc->buf_va_hi) << 32 | + le32_to_cpu(desc->buf_va_lo); + desc_info = (struct ath12k_rx_desc_info *)(uintptr_t)desc_va; + if (!desc_info) { + u32 cookie; + + cookie = le32_get_bits(desc->buf_addr_info.info1, + BUFFER_ADDR_INFO1_SW_COOKIE); + desc_info = ath12k_dp_get_rx_desc(ab, cookie); + if (!desc_info) { + ath12k_warn(ab, "Invalid cookie in manual descriptor retrieval: 0x%x\n", + cookie); + return -EINVAL; + } + } + + if (desc_info->magic != ATH12K_DP_RX_DESC_MAGIC) { + ath12k_warn(ab, "rx exception, magic check failed with value: %u\n", + desc_info->magic); + return -EINVAL; + } + + msdu = desc_info->skb; + desc_info->skb = NULL; + list_add_tail(&desc_info->list, list); + rxcb = ATH12K_SKB_RXCB(msdu); + dma_unmap_single(ab->dev, rxcb->paddr, msdu->len + skb_tailroom(msdu), + DMA_FROM_DEVICE); + dev_kfree_skb_any(msdu); + + return 0; +} + int ath12k_dp_rx_process_err(struct ath12k_base *ab, struct napi_struct *napi, int budget) { @@ -3825,6 +3867,26 @@ int ath12k_dp_rx_process_err(struct ath12k_base *ab, struct napi_struct *napi, drop = false; ab->device_stats.err_ring_pkts++; + hw_link_id = le32_get_bits(reo_desc->info0, + HAL_REO_DEST_RING_INFO0_SRC_LINK_ID); + device_id = hw_links[hw_link_id].device_id; + partner_ab = ath12k_ag_to_ab(ag, device_id); + + /* Below case is added to handle data packet from un-associated clients. + * As it is expected that AST lookup will fail for + * un-associated station's data packets. + */ + if (le32_get_bits(reo_desc->info0, HAL_REO_DEST_RING_INFO0_BUFFER_TYPE) == + HAL_REO_DEST_RING_BUFFER_TYPE_MSDU) { + if (!ath12k_dp_h_msdu_buffer_type(partner_ab, + &rx_desc_used_list[device_id], + reo_desc)) { + num_buffs_reaped[device_id]++; + tot_n_bufs_reaped++; + } + goto next_desc; + } + ret = ath12k_hal_desc_reo_parse_err(ab, reo_desc, &paddr, &desc_bank); if (ret) { @@ -3833,11 +3895,6 @@ int ath12k_dp_rx_process_err(struct ath12k_base *ab, struct napi_struct *napi, continue; } - hw_link_id = le32_get_bits(reo_desc->info0, - HAL_REO_DEST_RING_INFO0_SRC_LINK_ID); - device_id = hw_links[hw_link_id].device_id; - partner_ab = ath12k_ag_to_ab(ag, device_id); - pdev_id = ath12k_hw_mac_id_to_pdev_id(partner_ab->hw_params, hw_links[hw_link_id].pdev_idx); ar = partner_ab->pdevs[pdev_id].ar; @@ -3886,6 +3943,7 @@ int ath12k_dp_rx_process_err(struct ath12k_base *ab, struct napi_struct *napi, } } +next_desc: if (tot_n_bufs_reaped >= quota) { tot_n_bufs_reaped = quota; goto exit; diff --git a/drivers/net/wireless/ath/ath12k/hal_rx.c b/drivers/net/wireless/ath/ath12k/hal_rx.c index 669096278fdd4..c4443ca05cd65 100644 --- a/drivers/net/wireless/ath/ath12k/hal_rx.c +++ b/drivers/net/wireless/ath/ath12k/hal_rx.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include "debug.h" @@ -323,7 +323,7 @@ int ath12k_hal_desc_reo_parse_err(struct ath12k_base *ab, { enum hal_reo_dest_ring_push_reason push_reason; enum hal_reo_dest_ring_error_code err_code; - u32 cookie, val; + u32 cookie; push_reason = le32_get_bits(desc->info0, HAL_REO_DEST_RING_INFO0_PUSH_REASON); @@ -338,12 +338,6 @@ int ath12k_hal_desc_reo_parse_err(struct ath12k_base *ab, return -EINVAL; } - val = le32_get_bits(desc->info0, HAL_REO_DEST_RING_INFO0_BUFFER_TYPE); - if (val != HAL_REO_DEST_RING_BUFFER_TYPE_LINK_DESC) { - ath12k_warn(ab, "expected buffer type link_desc"); - return -EINVAL; - } - ath12k_hal_rx_reo_ent_paddr_get(ab, &desc->buf_addr_info, paddr, &cookie); *desc_bank = u32_get_bits(cookie, DP_LINK_DESC_BANK_MASK); From 43ba986e7ac7d9420e26e9a9b03c73054bc2149c Mon Sep 17 00:00:00 2001 From: Sarika Sharma Date: Tue, 30 Sep 2025 14:45:51 +0530 Subject: [PATCH 028/867] wifi: ath12k: track dropped MSDU buffer type packets in REO exception ring Add a counter "reo_excep_msdu_buf_type" in ath12k_debugfs_dump_device_dp_stats() to account for packets dropped due to unexpected MSDU buffer types in the RX error path. These packets are discarded to prevent incorrect parsing and potential kernel crashes. This helps in debugging and monitoring RX error handling behavior. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Signed-off-by: Sarika Sharma Reviewed-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250930091551.3305312-3-sarika.sharma@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.h | 1 + drivers/net/wireless/ath/ath12k/debugfs.c | 5 ++++- drivers/net/wireless/ath/ath12k/dp_rx.c | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 3d1956966a485..48d95ea7b3dbc 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -963,6 +963,7 @@ struct ath12k_device_dp_stats { u32 tx_wbm_rel_source[HAL_WBM_REL_SRC_MODULE_MAX]; u32 tx_enqueued[DP_TCL_NUM_RING_MAX]; u32 tx_completed[DP_TCL_NUM_RING_MAX]; + u32 reo_excep_msdu_buf_type; }; struct ath12k_reg_freq { diff --git a/drivers/net/wireless/ath/ath12k/debugfs.c b/drivers/net/wireless/ath/ath12k/debugfs.c index 16601a8c36448..15219429d4ed8 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs.c +++ b/drivers/net/wireless/ath/ath12k/debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include "core.h" @@ -1178,6 +1178,9 @@ static ssize_t ath12k_debugfs_dump_device_dp_stats(struct file *file, len += scnprintf(buf + len, size - len, "\n"); } + len += scnprintf(buf + len, size - len, "\nREO excep MSDU buf type:%u\n", + device_stats->reo_excep_msdu_buf_type); + len += scnprintf(buf + len, size - len, "\nRx WBM REL SRC Errors:\n"); for (i = 0; i < HAL_WBM_REL_SRC_MODULE_MAX; i++) { diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 99d29eda26cf1..6c9f0839c83a3 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -3790,6 +3790,8 @@ static int ath12k_dp_h_msdu_buffer_type(struct ath12k_base *ab, struct sk_buff *msdu; u64 desc_va; + ab->device_stats.reo_excep_msdu_buf_type++; + desc_va = (u64)le32_to_cpu(desc->buf_va_hi) << 32 | le32_to_cpu(desc->buf_va_lo); desc_info = (struct ath12k_rx_desc_info *)(uintptr_t)desc_va; From 6917e268c4338ceb5916c8695423597ed8c8b38e Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Wed, 24 Sep 2025 19:13:36 +0530 Subject: [PATCH 029/867] wifi: ath12k: Defer vdev bring-up until CSA finalize to avoid stale beacon Mac80211 schedules CSA finalize work twice during a channel switch: first during the reserved switch phase and again during the finalize phase. The beacon content is updated only during the second schedule, which occurs after the reserved switch completes. However, the ath12k driver attempts to bring up the VDEV during the channel switch callback (ath12k_mac_update_vif_chan()), which leads to premature installation of stale beacon templates before the updated content is available. This premature VDEV bring-up causes outdated beacon information to be broadcast, which can result in updated channel parameters during the transition. In MBSSID scenarios, this behavior is particularly problematic because the transmitting interface's beacon must be updated before non-transmitting interfaces are brought up. Failing to do so can lead to beacon mismatches across interfaces. Introduce a is_csa_in_progress flag to defer VDEV_UP until CSA finalize is complete. Set this flag during the channel switch callback when CSA is active. In bss_info_changed(), check this flag and issue VDEV_UP only after the beacon template has been updated. Ensure that in MBSSID cases, the transmitting interface is brought up first, followed by all non-transmitting interfaces. This ordering makes sure correct beacon propagation and avoids stale beacon installation during CSA transitions. Additionally, move the call to ath12k_mac_update_peer_puncturing_width() before VDEV bring-up during CSA handling. This ensures that the puncturing bitmap and bandwidth settings are applied before the VDEV is brought up. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Fixes: 8c6faa56bfb2 ("wifi: ath12k: add MBSSID beacon support") Signed-off-by: Aditya Kumar Singh Signed-off-by: Maharaja Kennadyrajan Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250924134336.888-1-maharaja.kennadyrajan@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.h | 1 + drivers/net/wireless/ath/ath12k/mac.c | 90 ++++++++++++++++++++++---- 2 files changed, 79 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 48d95ea7b3dbc..02a32b9f3ac29 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -355,6 +355,7 @@ struct ath12k_link_vif { struct wmi_vdev_install_key_arg group_key; bool pairwise_key_done; u16 num_stations; + bool is_csa_in_progress; }; struct ath12k_vif { diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 1d7b60aa5cb09..84473dbf22e13 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -4221,6 +4221,30 @@ static bool ath12k_mac_supports_tpc(struct ath12k *ar, struct ath12k_vif *ahvif, chandef->chan->band == NL80211_BAND_6GHZ; } +static void ath12k_wmi_vdev_params_up(struct ath12k *ar, + struct ath12k_link_vif *arvif, + struct ath12k_link_vif *tx_arvif, + struct ieee80211_bss_conf *info, u16 aid) +{ + struct ath12k_wmi_vdev_up_params params = { + .vdev_id = arvif->vdev_id, + .aid = aid, + .bssid = arvif->bssid + }; + int ret; + + if (tx_arvif) { + params.tx_bssid = tx_arvif->bssid; + params.nontx_profile_idx = info->bssid_index; + params.nontx_profile_cnt = 1 << info->bssid_indicator; + } + + ret = ath12k_wmi_vdev_up(arvif->ar, ¶ms); + if (ret) + ath12k_warn(ar->ab, "failed to bring vdev up %d: %d\n", + arvif->vdev_id, ret); +} + static void ath12k_mac_bss_info_changed(struct ath12k *ar, struct ath12k_link_vif *arvif, struct ieee80211_bss_conf *info, @@ -4228,6 +4252,7 @@ static void ath12k_mac_bss_info_changed(struct ath12k *ar, { struct ath12k_vif *ahvif = arvif->ahvif; struct ieee80211_vif *vif = ath12k_ahvif_to_vif(ahvif); + struct ath12k_link_vif *tx_arvif; struct cfg80211_chan_def def; u32 param_id, param_value; enum nl80211_band band; @@ -4236,9 +4261,9 @@ static void ath12k_mac_bss_info_changed(struct ath12k *ar, u32 preamble; u16 hw_value; u16 bitrate; - int ret; u8 rateidx; u32 rate; + int ret; lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); @@ -4271,12 +4296,41 @@ static void ath12k_mac_bss_info_changed(struct ath12k *ar, "Set burst beacon mode for VDEV: %d\n", arvif->vdev_id); + /* In MBSSID case, need to install transmitting VIF's template first */ + ret = ath12k_mac_setup_bcn_tmpl(arvif); if (ret) ath12k_warn(ar->ab, "failed to update bcn template: %d\n", ret); + + if (!arvif->is_csa_in_progress) + goto skip_vdev_up; + + tx_arvif = ath12k_mac_get_tx_arvif(arvif, info); + if (tx_arvif && arvif != tx_arvif && tx_arvif->is_csa_in_progress) + /* skip non tx vif's */ + goto skip_vdev_up; + + ath12k_wmi_vdev_params_up(ar, arvif, tx_arvif, info, ahvif->aid); + + arvif->is_csa_in_progress = false; + + if (tx_arvif && arvif == tx_arvif) { + struct ath12k_link_vif *arvif_itr; + + list_for_each_entry(arvif_itr, &ar->arvifs, list) { + if (!arvif_itr->is_csa_in_progress) + continue; + + ath12k_wmi_vdev_params_up(ar, arvif, tx_arvif, + info, ahvif->aid); + arvif_itr->is_csa_in_progress = false; + } + } } +skip_vdev_up: + if (changed & (BSS_CHANGED_BEACON_INFO | BSS_CHANGED_BEACON)) { arvif->dtim_period = info->dtim_period; @@ -10862,9 +10916,9 @@ ath12k_mac_update_vif_chan(struct ath12k *ar, int n_vifs) { struct ath12k_wmi_vdev_up_params params = {}; - struct ath12k_link_vif *arvif; struct ieee80211_bss_conf *link_conf; struct ath12k_base *ab = ar->ab; + struct ath12k_link_vif *arvif; struct ieee80211_vif *vif; struct ath12k_vif *ahvif; u8 link_id; @@ -10925,6 +10979,28 @@ ath12k_mac_update_vif_chan(struct ath12k *ar, continue; } + ret = ath12k_mac_update_peer_puncturing_width(arvif->ar, arvif, + vifs[i].new_ctx->def); + if (ret) { + ath12k_warn(ar->ab, + "failed to update puncturing bitmap %02x and width %d: %d\n", + vifs[i].new_ctx->def.punctured, + vifs[i].new_ctx->def.width, ret); + continue; + } + + /* Defer VDEV bring-up during CSA to avoid installing stale + * beacon templates. The beacon content is updated only + * after CSA finalize, so we mark CSA in progress and skip + * VDEV_UP for now. It will be handled later in + * bss_info_changed(). + */ + if (link_conf->csa_active && + arvif->ahvif->vdev_type == WMI_VDEV_TYPE_AP) { + arvif->is_csa_in_progress = true; + continue; + } + ret = ath12k_mac_setup_bcn_tmpl(arvif); if (ret) ath12k_warn(ab, "failed to update bcn tmpl during csa: %d\n", @@ -10945,16 +11021,6 @@ ath12k_mac_update_vif_chan(struct ath12k *ar, arvif->vdev_id, ret); continue; } - - ret = ath12k_mac_update_peer_puncturing_width(arvif->ar, arvif, - vifs[i].new_ctx->def); - if (ret) { - ath12k_warn(ar->ab, - "failed to update puncturing bitmap %02x and width %d: %d\n", - vifs[i].new_ctx->def.punctured, - vifs[i].new_ctx->def.width, ret); - continue; - } } /* Restart the internal monitor vdev on new channel */ From b94f523cc5a19108ff4687a4bce9e5d484f0f9c5 Mon Sep 17 00:00:00 2001 From: Thiraviyam Mariyappan Date: Tue, 7 Oct 2025 19:03:32 +0530 Subject: [PATCH 030/867] wifi: ath12k: Fix NSS value update in ext_rx_stats Currently, in ext_rx_stats, the NSS value is taken directly from the firmware, which results in incorrect mapping: 4x4, 3x3, 2x2, 1x1 SS are incorrectly updated as 3x3, 2x2, 1x1, 0x0 SS respectively. Fix the issue by incrementing the NSS value by 1 while updating the PPDU info to ensure accurate spatial stream statistics. Remove the redundant +1 increment in the radiotap header when monitor mode is enabled to prevent double counting. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.5-01651-QCAHKSWPL_SILICONZ-1 Signed-off-by: Thiraviyam Mariyappan Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20251007133332.1092178-1-thiraviyam.mariyappan@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/dp_mon.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_mon.c b/drivers/net/wireless/ath/ath12k/dp_mon.c index 009c495021489..39d1967584db8 100644 --- a/drivers/net/wireless/ath/ath12k/dp_mon.c +++ b/drivers/net/wireless/ath/ath12k/dp_mon.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2019-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include "dp_mon.h" @@ -105,7 +105,7 @@ static void ath12k_dp_mon_parse_vht_sig_a(const struct hal_rx_vht_sig_a_info *vh if (ppdu_info->is_stbc && nsts > 0) nsts = ((nsts + 1) >> 1) - 1; - ppdu_info->nss = u32_get_bits(nsts, VHT_SIG_SU_NSS_MASK); + ppdu_info->nss = u32_get_bits(nsts, VHT_SIG_SU_NSS_MASK) + 1; ppdu_info->bw = u32_get_bits(info0, HAL_RX_VHT_SIG_A_INFO_INFO0_BW); ppdu_info->beamformed = u32_get_bits(info1, HAL_RX_VHT_SIG_A_INFO_INFO1_BEAMFORMED); @@ -129,7 +129,7 @@ static void ath12k_dp_mon_parse_ht_sig(const struct hal_rx_ht_sig_info *ht_sig, ppdu_info->is_stbc = u32_get_bits(info1, HAL_RX_HT_SIG_INFO_INFO1_STBC); ppdu_info->ldpc = u32_get_bits(info1, HAL_RX_HT_SIG_INFO_INFO1_FEC_CODING); ppdu_info->gi = u32_get_bits(info1, HAL_RX_HT_SIG_INFO_INFO1_GI); - ppdu_info->nss = (ppdu_info->mcs >> 3); + ppdu_info->nss = (ppdu_info->mcs >> 3) + 1; } static void ath12k_dp_mon_parse_l_sig_b(const struct hal_rx_lsig_b_info *lsigb, @@ -233,7 +233,9 @@ ath12k_dp_mon_parse_he_sig_b2_ofdma(const struct hal_rx_he_sig_b2_ofdma_info *of value = value << HE_STA_ID_SHIFT; ppdu_info->he_data4 |= value; - ppdu_info->nss = u32_get_bits(info0, HAL_RX_HE_SIG_B2_OFDMA_INFO_INFO0_STA_NSTS); + ppdu_info->nss = + u32_get_bits(info0, + HAL_RX_HE_SIG_B2_OFDMA_INFO_INFO0_STA_NSTS) + 1; ppdu_info->beamformed = u32_get_bits(info0, HAL_RX_HE_SIG_B2_OFDMA_INFO_INFO0_STA_TXBF); } @@ -261,7 +263,9 @@ ath12k_dp_mon_parse_he_sig_b2_mu(const struct hal_rx_he_sig_b2_mu_info *he_sig_b value = value << HE_STA_ID_SHIFT; ppdu_info->he_data4 |= value; - ppdu_info->nss = u32_get_bits(info0, HAL_RX_HE_SIG_B2_MU_INFO_INFO0_STA_NSTS); + ppdu_info->nss = + u32_get_bits(info0, + HAL_RX_HE_SIG_B2_MU_INFO_INFO0_STA_NSTS) + 1; } static void @@ -553,7 +557,8 @@ static void ath12k_dp_mon_parse_he_sig_su(const struct hal_rx_he_sig_a_su_info * ppdu_info->is_stbc = u32_get_bits(info1, HAL_RX_HE_SIG_A_SU_INFO_INFO1_STBC); ppdu_info->beamformed = u32_get_bits(info1, HAL_RX_HE_SIG_A_SU_INFO_INFO1_TXBF); dcm = u32_get_bits(info0, HAL_RX_HE_SIG_A_SU_INFO_INFO0_DCM); - ppdu_info->nss = u32_get_bits(info0, HAL_RX_HE_SIG_A_SU_INFO_INFO0_NSTS); + ppdu_info->nss = u32_get_bits(info0, + HAL_RX_HE_SIG_A_SU_INFO_INFO0_NSTS) + 1; ppdu_info->dcm = dcm; } @@ -2179,7 +2184,7 @@ static void ath12k_dp_mon_update_radiotap(struct ath12k *ar, spin_unlock_bh(&ar->data_lock); rxs->flag |= RX_FLAG_MACTIME_START; - rxs->nss = ppduinfo->nss + 1; + rxs->nss = ppduinfo->nss; if (test_bit(WMI_TLV_SERVICE_HW_DB2DBM_CONVERSION_SUPPORT, ar->ab->wmi_ab.svc_map)) rxs->signal = ppduinfo->rssi_comb; From 8c21b32c2cc82224c7fc1a9f67318f3b1199744b Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 9 Oct 2025 14:16:55 -0700 Subject: [PATCH 031/867] wifi: ath12k: fix VHT MCS assignment While associating, firmware needs the peer's receive capability to calculate its own VHT transmit MCS. Currently, the host sends this information via mcs->rx_mcs_set field, but firmware actually reads it from mcs->tx_mcs_set field. This mismatch is incorrect. This issue has not caused failures so far because most peers advertise identical TX and RX capabilities. Fix this by assigning the value to tx_mcs_set as expected. Additionally, the rate control mask is intended to limit our transmit MCS, so it should also apply to the peer's receive capability. Update the logic accordingly. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: Baochen Qiang Signed-off-by: Pradeep Kumar Chitrapu Reviewed-by: Vasanthakumar Thiagarajan Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20251009211656.2386085-2-quic_pradeepc@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 7 +++---- drivers/net/wireless/ath/ath12k/wmi.c | 13 ++++++++----- drivers/net/wireless/ath/ath12k/wmi.h | 2 ++ 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 84473dbf22e13..3037076060760 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -2249,7 +2249,6 @@ static void ath12k_peer_assoc_h_vht(struct ath12k *ar, struct cfg80211_chan_def def; enum nl80211_band band; u16 *vht_mcs_mask; - u16 tx_mcs_map; u8 ampdu_factor; u8 max_nss, vht_mcs; int i, vht_nss, nss_idx; @@ -2340,10 +2339,10 @@ static void ath12k_peer_assoc_h_vht(struct ath12k *ar, arg->peer_nss = min(link_sta->rx_nss, max_nss); arg->rx_max_rate = __le16_to_cpu(vht_cap->vht_mcs.rx_highest); arg->rx_mcs_set = __le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map); - arg->tx_max_rate = __le16_to_cpu(vht_cap->vht_mcs.tx_highest); + arg->rx_mcs_set = ath12k_peer_assoc_h_vht_limit(arg->rx_mcs_set, vht_mcs_mask); - tx_mcs_map = __le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map); - arg->tx_mcs_set = ath12k_peer_assoc_h_vht_limit(tx_mcs_map, vht_mcs_mask); + arg->tx_max_rate = __le16_to_cpu(vht_cap->vht_mcs.tx_highest); + arg->tx_mcs_set = __le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map); /* In QCN9274 platform, VHT MCS rate 10 and 11 is enabled by default. * VHT MCS rate 10 and 11 is not supported in 11ac standard. diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index ff6b3d4ea8208..e76275bd6916f 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include #include @@ -2367,10 +2367,13 @@ int ath12k_wmi_send_peer_assoc_cmd(struct ath12k *ar, cmd->peer_bw_rxnss_override |= cpu_to_le32(arg->peer_bw_rxnss_override); if (arg->vht_capable) { - mcs->rx_max_rate = cpu_to_le32(arg->rx_max_rate); - mcs->rx_mcs_set = cpu_to_le32(arg->rx_mcs_set); - mcs->tx_max_rate = cpu_to_le32(arg->tx_max_rate); - mcs->tx_mcs_set = cpu_to_le32(arg->tx_mcs_set); + /* Firmware interprets mcs->tx_mcs_set field as peer's + * RX capability + */ + mcs->rx_max_rate = cpu_to_le32(arg->tx_max_rate); + mcs->rx_mcs_set = cpu_to_le32(arg->tx_mcs_set); + mcs->tx_max_rate = cpu_to_le32(arg->rx_max_rate); + mcs->tx_mcs_set = cpu_to_le32(arg->rx_mcs_set); } /* HE Rates */ diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index d9fd6a6b708dd..64bd968989c84 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -4204,8 +4204,10 @@ struct wmi_unit_test_cmd { struct ath12k_wmi_vht_rate_set_params { __le32 tlv_header; __le32 rx_max_rate; + /* MCS at which the peer can transmit */ __le32 rx_mcs_set; __le32 tx_max_rate; + /* MCS at which the peer can receive */ __le32 tx_mcs_set; __le32 tx_max_mcs_nss; } __packed; From 9c5f229b1312a31aff762b2111f6751e4e3722fe Mon Sep 17 00:00:00 2001 From: Pradeep Kumar Chitrapu Date: Thu, 9 Oct 2025 14:16:56 -0700 Subject: [PATCH 032/867] wifi: ath12k: fix TX and RX MCS rate configurations in HE mode Currently, the TX and RX MCS rate configurations per peer are reversed when sent to the firmware. As a result, RX MCS rates are configured for TX, and vice versa. This commit rectifies the configuration to match what the firmware expects. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: Pradeep Kumar Chitrapu Reviewed-by: Vasanthakumar Thiagarajan Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20251009211656.2386085-3-quic_pradeepc@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 3037076060760..d7bc19cea2a64 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -2624,9 +2624,10 @@ static void ath12k_peer_assoc_h_he(struct ath12k *ar, switch (link_sta->bandwidth) { case IEEE80211_STA_RX_BW_160: v = le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_160); + v = ath12k_peer_assoc_h_he_limit(v, he_mcs_mask); arg->peer_he_rx_mcs_set[WMI_HECAP_TXRX_MCS_NSS_IDX_160] = v; - v = ath12k_peer_assoc_h_he_limit(v, he_mcs_mask); + v = le16_to_cpu(he_cap->he_mcs_nss_supp.tx_mcs_160); arg->peer_he_tx_mcs_set[WMI_HECAP_TXRX_MCS_NSS_IDX_160] = v; arg->peer_he_mcs_count++; @@ -2636,10 +2637,10 @@ static void ath12k_peer_assoc_h_he(struct ath12k *ar, default: v = le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_80); + v = ath12k_peer_assoc_h_he_limit(v, he_mcs_mask); arg->peer_he_rx_mcs_set[WMI_HECAP_TXRX_MCS_NSS_IDX_80] = v; v = le16_to_cpu(he_cap->he_mcs_nss_supp.tx_mcs_80); - v = ath12k_peer_assoc_h_he_limit(v, he_mcs_mask); arg->peer_he_tx_mcs_set[WMI_HECAP_TXRX_MCS_NSS_IDX_80] = v; arg->peer_he_mcs_count++; From e5b670e5439bda09ea7e3dd3dd32edb2f367c0d3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 14 Oct 2025 14:06:05 +0000 Subject: [PATCH 033/867] net: remove obsolete WARN_ON(refcount_read(&sk->sk_refcnt) == 1) sk->sk_refcnt has been converted to refcount_t in 2017. __sock_put(sk) being refcount_dec(&sk->sk_refcnt), it will complain loudly if the current refcnt is 1 (or less) in a non racy way. We can remove four WARN_ON() in favor of the generic refcount_dec() check. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Xuanqiang Luo Link: https://patch.msgid.link/20251014140605.2982703-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 12 ++++-------- net/netlink/af_netlink.c | 4 +--- net/tipc/socket.c | 4 +--- 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index f0d00928db9e9..30ac2eb4ef9bf 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -830,11 +830,9 @@ static inline bool sk_del_node_init(struct sock *sk) { bool rc = __sk_del_node_init(sk); - if (rc) { - /* paranoid for a while -acme */ - WARN_ON(refcount_read(&sk->sk_refcnt) == 1); + if (rc) __sock_put(sk); - } + return rc; } #define sk_del_node_init_rcu(sk) sk_del_node_init(sk) @@ -852,11 +850,9 @@ static inline bool sk_nulls_del_node_init_rcu(struct sock *sk) { bool rc = __sk_nulls_del_node_init_rcu(sk); - if (rc) { - /* paranoid for a while -acme */ - WARN_ON(refcount_read(&sk->sk_refcnt) == 1); + if (rc) __sock_put(sk); - } + return rc; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2b46c0cd752a3..687a84c48882a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -596,10 +596,8 @@ static void netlink_remove(struct sock *sk) table = &nl_table[sk->sk_protocol]; if (!rhashtable_remove_fast(&table->hash, &nlk_sk(sk)->node, - netlink_rhashtable_params)) { - WARN_ON(refcount_read(&sk->sk_refcnt) == 1); + netlink_rhashtable_params)) __sock_put(sk); - } netlink_table_grab(); if (nlk_sk(sk)->subscriptions) { diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 1574a83384f88..bc614a1f019c2 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -3031,10 +3031,8 @@ static void tipc_sk_remove(struct tipc_sock *tsk) struct sock *sk = &tsk->sk; struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id); - if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) { - WARN_ON(refcount_read(&sk->sk_refcnt) == 1); + if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) __sock_put(sk); - } } static const struct rhashtable_params tsk_rht_params = { From e1f5bb196f0b0eee197e06d361f8ac5f091c2963 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 13 Oct 2025 10:23:06 -0700 Subject: [PATCH 034/867] net: bcmasp: Add support for PHY-based Wake-on-LAN If available, interrogate the PHY to find out whether we can use it for Wake-on-LAN. This can be a more power efficient way of implementing that feature, especially when the MAC is powered off in low power states. Signed-off-by: Florian Fainelli Link: https://patch.msgid.link/20251013172306.2250223-1-florian.fainelli@broadcom.com Signed-off-by: Jakub Kicinski --- .../ethernet/broadcom/asp2/bcmasp_ethtool.c | 34 +++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c index 63f1a8c3a7fbe..dd80ccfca19d5 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c @@ -163,11 +163,30 @@ static void bcmasp_set_msglevel(struct net_device *dev, u32 level) static void bcmasp_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct bcmasp_intf *intf = netdev_priv(dev); + struct bcmasp_priv *priv = intf->parent; + struct device *kdev = &priv->pdev->dev; + u32 phy_wolopts = 0; + + if (dev->phydev) { + phy_ethtool_get_wol(dev->phydev, wol); + phy_wolopts = wol->wolopts; + } + + /* MAC is not wake-up capable, return what the PHY does */ + if (!device_can_wakeup(kdev)) + return; + + /* Overlay MAC capabilities with that of the PHY queried before */ + wol->supported |= BCMASP_SUPPORTED_WAKE; + wol->wolopts |= intf->wolopts; + + /* Return the PHY configured magic password */ + if (phy_wolopts & WAKE_MAGICSECURE) + return; - wol->supported = BCMASP_SUPPORTED_WAKE; - wol->wolopts = intf->wolopts; memset(wol->sopass, 0, sizeof(wol->sopass)); + /* Otherwise the MAC one */ if (wol->wolopts & WAKE_MAGICSECURE) memcpy(wol->sopass, intf->sopass, sizeof(intf->sopass)); } @@ -177,10 +196,21 @@ static int bcmasp_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) struct bcmasp_intf *intf = netdev_priv(dev); struct bcmasp_priv *priv = intf->parent; struct device *kdev = &priv->pdev->dev; + int ret = 0; + + /* Try Wake-on-LAN from the PHY first */ + if (dev->phydev) { + ret = phy_ethtool_set_wol(dev->phydev, wol); + if (ret != -EOPNOTSUPP && wol->wolopts) + return ret; + } if (!device_can_wakeup(kdev)) return -EOPNOTSUPP; + if (wol->wolopts & ~BCMASP_SUPPORTED_WAKE) + return -EINVAL; + /* Interface Specific */ intf->wolopts = wol->wolopts; if (intf->wolopts & WAKE_MAGICSECURE) From f197902cd21ae833850679b216bb62c0d056bbb3 Mon Sep 17 00:00:00 2001 From: "Kory Maincent (Dent Project)" Date: Mon, 13 Oct 2025 16:05:31 +0200 Subject: [PATCH 035/867] net: pse-pd: pd692x0: Replace __free macro with explicit kfree calls Replace __free(kfree) with explicit kfree() calls to follow the net subsystem policy of avoiding automatic cleanup macros as described in the documentation. Signed-off-by: Kory Maincent Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251013-feature_pd692x0_reboot_keep_conf-v2-1-68ab082a93dd@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/pse-pd/pd692x0.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/pse-pd/pd692x0.c b/drivers/net/pse-pd/pd692x0.c index f4e91ba64a666..055e925c853ef 100644 --- a/drivers/net/pse-pd/pd692x0.c +++ b/drivers/net/pse-pd/pd692x0.c @@ -1200,9 +1200,9 @@ static void pd692x0_managers_free_pw_budget(struct pd692x0_priv *priv) static int pd692x0_setup_pi_matrix(struct pse_controller_dev *pcdev) { - struct pd692x0_manager *manager __free(kfree) = NULL; struct pd692x0_priv *priv = to_pd692x0_priv(pcdev); struct pd692x0_matrix port_matrix[PD692X0_MAX_PIS]; + struct pd692x0_manager *manager; int ret, nmanagers; /* Should we flash the port matrix */ @@ -1216,7 +1216,7 @@ static int pd692x0_setup_pi_matrix(struct pse_controller_dev *pcdev) ret = pd692x0_of_get_managers(priv, manager); if (ret < 0) - return ret; + goto err_free_manager; nmanagers = ret; ret = pd692x0_register_managers_regulator(priv, manager, nmanagers); @@ -1236,12 +1236,15 @@ static int pd692x0_setup_pi_matrix(struct pse_controller_dev *pcdev) goto err_managers_req_pw; pd692x0_of_put_managers(priv, manager, nmanagers); + kfree(manager); return 0; err_managers_req_pw: pd692x0_managers_free_pw_budget(priv); err_of_managers: pd692x0_of_put_managers(priv, manager, nmanagers); +err_free_manager: + kfree(manager); return ret; } From 6fa1f8b64a47edd7d8420d8fd1008507aee2853e Mon Sep 17 00:00:00 2001 From: "Kory Maincent (Dent Project)" Date: Mon, 13 Oct 2025 16:05:32 +0200 Subject: [PATCH 036/867] net: pse-pd: pd692x0: Separate configuration parsing from hardware setup Cache the port matrix configuration in driver private data to enable PSE controller reconfiguration. This refactoring separates device tree parsing from hardware configuration application, allowing settings to be reapplied without reparsing the device tree. This refactoring is a prerequisite for preserving PSE configuration across reboots to prevent power disruption to connected devices. Signed-off-by: Kory Maincent Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251013-feature_pd692x0_reboot_keep_conf-v2-2-68ab082a93dd@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/pse-pd/pd692x0.c | 115 +++++++++++++++++++++++------------ 1 file changed, 76 insertions(+), 39 deletions(-) diff --git a/drivers/net/pse-pd/pd692x0.c b/drivers/net/pse-pd/pd692x0.c index 055e925c853ef..782b1abf94cb1 100644 --- a/drivers/net/pse-pd/pd692x0.c +++ b/drivers/net/pse-pd/pd692x0.c @@ -85,6 +85,11 @@ enum { PD692X0_MSG_CNT }; +struct pd692x0_matrix { + u8 hw_port_a; + u8 hw_port_b; +}; + struct pd692x0_priv { struct i2c_client *client; struct pse_controller_dev pcdev; @@ -101,6 +106,8 @@ struct pd692x0_priv { enum ethtool_c33_pse_admin_state admin_state[PD692X0_MAX_PIS]; struct regulator_dev *manager_reg[PD692X0_MAX_MANAGERS]; int manager_pw_budget[PD692X0_MAX_MANAGERS]; + int nmanagers; + struct pd692x0_matrix *port_matrix; }; /* Template list of communication messages. The non-null bytes defined here @@ -809,11 +816,6 @@ struct pd692x0_manager { int nports; }; -struct pd692x0_matrix { - u8 hw_port_a; - u8 hw_port_b; -}; - static int pd692x0_of_get_ports_manager(struct pd692x0_priv *priv, struct pd692x0_manager *manager, @@ -903,7 +905,8 @@ pd692x0_of_get_managers(struct pd692x0_priv *priv, } of_node_put(managers_node); - return nmanagers; + priv->nmanagers = nmanagers; + return 0; out: for (i = 0; i < nmanagers; i++) { @@ -963,8 +966,7 @@ pd692x0_register_manager_regulator(struct device *dev, char *reg_name, static int pd692x0_register_managers_regulator(struct pd692x0_priv *priv, - const struct pd692x0_manager *manager, - int nmanagers) + const struct pd692x0_manager *manager) { struct device *dev = &priv->client->dev; size_t reg_name_len; @@ -975,7 +977,7 @@ pd692x0_register_managers_regulator(struct pd692x0_priv *priv, */ reg_name_len = strlen(dev_name(dev)) + 23; - for (i = 0; i < nmanagers; i++) { + for (i = 0; i < priv->nmanagers; i++) { static const char * const regulators[] = { "vaux5", "vaux3p3" }; struct regulator_dev *rdev; char *reg_name; @@ -1008,10 +1010,14 @@ pd692x0_register_managers_regulator(struct pd692x0_priv *priv, } static int -pd692x0_conf_manager_power_budget(struct pd692x0_priv *priv, int id, int pw) +pd692x0_conf_manager_power_budget(struct pd692x0_priv *priv, int id) { struct pd692x0_msg msg, buf; - int ret, pw_mW = pw / 1000; + int ret, pw_mW; + + pw_mW = priv->manager_pw_budget[id] / 1000; + if (!pw_mW) + return 0; msg = pd692x0_msg_template_list[PD692X0_MSG_GET_POWER_BANK]; msg.data[0] = id; @@ -1032,11 +1038,11 @@ pd692x0_conf_manager_power_budget(struct pd692x0_priv *priv, int id, int pw) } static int -pd692x0_configure_managers(struct pd692x0_priv *priv, int nmanagers) +pd692x0_req_managers_pw_budget(struct pd692x0_priv *priv) { int i, ret; - for (i = 0; i < nmanagers; i++) { + for (i = 0; i < priv->nmanagers; i++) { struct regulator *supply = priv->manager_reg[i]->supply; int pw_budget; @@ -1053,7 +1059,18 @@ pd692x0_configure_managers(struct pd692x0_priv *priv, int nmanagers) return ret; priv->manager_pw_budget[i] = pw_budget; - ret = pd692x0_conf_manager_power_budget(priv, i, pw_budget); + } + + return 0; +} + +static int +pd692x0_configure_managers(struct pd692x0_priv *priv) +{ + int i, ret; + + for (i = 0; i < priv->nmanagers; i++) { + ret = pd692x0_conf_manager_power_budget(priv, i); if (ret < 0) return ret; } @@ -1101,10 +1118,9 @@ pd692x0_set_port_matrix(const struct pse_pi_pairset *pairset, static int pd692x0_set_ports_matrix(struct pd692x0_priv *priv, - const struct pd692x0_manager *manager, - int nmanagers, - struct pd692x0_matrix port_matrix[PD692X0_MAX_PIS]) + const struct pd692x0_manager *manager) { + struct pd692x0_matrix *port_matrix = priv->port_matrix; struct pse_controller_dev *pcdev = &priv->pcdev; int i, ret; @@ -1117,7 +1133,7 @@ pd692x0_set_ports_matrix(struct pd692x0_priv *priv, /* Update with values for every PSE PIs */ for (i = 0; i < pcdev->nr_lines; i++) { ret = pd692x0_set_port_matrix(&pcdev->pi[i].pairset[0], - manager, nmanagers, + manager, priv->nmanagers, &port_matrix[i]); if (ret) { dev_err(&priv->client->dev, @@ -1126,7 +1142,7 @@ pd692x0_set_ports_matrix(struct pd692x0_priv *priv, } ret = pd692x0_set_port_matrix(&pcdev->pi[i].pairset[1], - manager, nmanagers, + manager, priv->nmanagers, &port_matrix[i]); if (ret) { dev_err(&priv->client->dev, @@ -1139,9 +1155,9 @@ pd692x0_set_ports_matrix(struct pd692x0_priv *priv, } static int -pd692x0_write_ports_matrix(struct pd692x0_priv *priv, - const struct pd692x0_matrix port_matrix[PD692X0_MAX_PIS]) +pd692x0_write_ports_matrix(struct pd692x0_priv *priv) { + struct pd692x0_matrix *port_matrix = priv->port_matrix; struct pd692x0_msg msg, buf; int ret, i; @@ -1166,13 +1182,32 @@ pd692x0_write_ports_matrix(struct pd692x0_priv *priv, return 0; } +static int pd692x0_hw_conf_init(struct pd692x0_priv *priv) +{ + int ret; + + /* Is PD692x0 ready to be configured? */ + if (priv->fw_state != PD692X0_FW_OK && + priv->fw_state != PD692X0_FW_COMPLETE) + return 0; + + ret = pd692x0_configure_managers(priv); + if (ret) + return ret; + + ret = pd692x0_write_ports_matrix(priv); + if (ret) + return ret; + + return 0; +} + static void pd692x0_of_put_managers(struct pd692x0_priv *priv, - struct pd692x0_manager *manager, - int nmanagers) + struct pd692x0_manager *manager) { int i, j; - for (i = 0; i < nmanagers; i++) { + for (i = 0; i < priv->nmanagers; i++) { for (j = 0; j < manager[i].nports; j++) of_node_put(manager[i].port_node[j]); of_node_put(manager[i].node); @@ -1201,48 +1236,50 @@ static void pd692x0_managers_free_pw_budget(struct pd692x0_priv *priv) static int pd692x0_setup_pi_matrix(struct pse_controller_dev *pcdev) { struct pd692x0_priv *priv = to_pd692x0_priv(pcdev); - struct pd692x0_matrix port_matrix[PD692X0_MAX_PIS]; + struct pd692x0_matrix *port_matrix; struct pd692x0_manager *manager; - int ret, nmanagers; - - /* Should we flash the port matrix */ - if (priv->fw_state != PD692X0_FW_OK && - priv->fw_state != PD692X0_FW_COMPLETE) - return 0; + int ret; manager = kcalloc(PD692X0_MAX_MANAGERS, sizeof(*manager), GFP_KERNEL); if (!manager) return -ENOMEM; + port_matrix = devm_kcalloc(&priv->client->dev, PD692X0_MAX_PIS, + sizeof(*port_matrix), GFP_KERNEL); + if (!port_matrix) { + ret = -ENOMEM; + goto err_free_manager; + } + priv->port_matrix = port_matrix; + ret = pd692x0_of_get_managers(priv, manager); if (ret < 0) goto err_free_manager; - nmanagers = ret; - ret = pd692x0_register_managers_regulator(priv, manager, nmanagers); + ret = pd692x0_register_managers_regulator(priv, manager); if (ret) goto err_of_managers; - ret = pd692x0_configure_managers(priv, nmanagers); + ret = pd692x0_req_managers_pw_budget(priv); if (ret) goto err_of_managers; - ret = pd692x0_set_ports_matrix(priv, manager, nmanagers, port_matrix); + ret = pd692x0_set_ports_matrix(priv, manager); if (ret) goto err_managers_req_pw; - ret = pd692x0_write_ports_matrix(priv, port_matrix); + ret = pd692x0_hw_conf_init(priv); if (ret) goto err_managers_req_pw; - pd692x0_of_put_managers(priv, manager, nmanagers); + pd692x0_of_put_managers(priv, manager); kfree(manager); return 0; err_managers_req_pw: pd692x0_managers_free_pw_budget(priv); err_of_managers: - pd692x0_of_put_managers(priv, manager, nmanagers); + pd692x0_of_put_managers(priv, manager); err_free_manager: kfree(manager); return ret; @@ -1647,7 +1684,7 @@ static enum fw_upload_err pd692x0_fw_poll_complete(struct fw_upload *fwl) return FW_UPLOAD_ERR_FW_INVALID; } - ret = pd692x0_setup_pi_matrix(&priv->pcdev); + ret = pd692x0_hw_conf_init(priv); if (ret < 0) { dev_err(&client->dev, "Error configuring ports matrix (%pe)\n", ERR_PTR(ret)); From 8f3d044b34fe99b894046edb84605456195cabc0 Mon Sep 17 00:00:00 2001 From: "Kory Maincent (Dent Project)" Date: Mon, 13 Oct 2025 16:05:33 +0200 Subject: [PATCH 037/867] net: pse-pd: pd692x0: Preserve PSE configuration across reboots Detect when PSE hardware is already configured (user byte == 42) and skip hardware initialization to prevent power interruption to connected devices during system reboots. Previously, the driver would always reconfigure the PSE hardware on probe, causing a port matrix reflash that resulted in temporary power loss to all connected devices. This change maintains power continuity by preserving existing configuration when the PSE has been previously initialized. Signed-off-by: Kory Maincent Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251013-feature_pd692x0_reboot_keep_conf-v2-3-68ab082a93dd@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/pse-pd/pd692x0.c | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/drivers/net/pse-pd/pd692x0.c b/drivers/net/pse-pd/pd692x0.c index 782b1abf94cb1..134435e900731 100644 --- a/drivers/net/pse-pd/pd692x0.c +++ b/drivers/net/pse-pd/pd692x0.c @@ -30,6 +30,8 @@ #define PD692X0_FW_MIN_VER 5 #define PD692X0_FW_PATCH_VER 5 +#define PD692X0_USER_BYTE 42 + enum pd692x0_fw_state { PD692X0_FW_UNKNOWN, PD692X0_FW_OK, @@ -80,6 +82,7 @@ enum { PD692X0_MSG_GET_PORT_PARAM, PD692X0_MSG_GET_POWER_BANK, PD692X0_MSG_SET_POWER_BANK, + PD692X0_MSG_SET_USER_BYTE, /* add new message above here */ PD692X0_MSG_CNT @@ -103,6 +106,7 @@ struct pd692x0_priv { bool last_cmd_key; unsigned long last_cmd_key_time; + bool cfg_saved; enum ethtool_c33_pse_admin_state admin_state[PD692X0_MAX_PIS]; struct regulator_dev *manager_reg[PD692X0_MAX_MANAGERS]; int manager_pw_budget[PD692X0_MAX_MANAGERS]; @@ -193,6 +197,12 @@ static const struct pd692x0_msg pd692x0_msg_template_list[PD692X0_MSG_CNT] = { .key = PD692X0_KEY_CMD, .sub = {0x07, 0x0b, 0x57}, }, + [PD692X0_MSG_SET_USER_BYTE] = { + .key = PD692X0_KEY_PRG, + .sub = {0x41, PD692X0_USER_BYTE}, + .data = {0x4e, 0x4e, 0x4e, 0x4e, + 0x4e, 0x4e, 0x4e, 0x4e}, + }, }; static u8 pd692x0_build_msg(struct pd692x0_msg *msg, u8 echo) @@ -1233,6 +1243,15 @@ static void pd692x0_managers_free_pw_budget(struct pd692x0_priv *priv) } } +static int +pd692x0_save_user_byte(struct pd692x0_priv *priv) +{ + struct pd692x0_msg msg, buf; + + msg = pd692x0_msg_template_list[PD692X0_MSG_SET_USER_BYTE]; + return pd692x0_sendrecv_msg(priv, &msg, &buf); +} + static int pd692x0_setup_pi_matrix(struct pse_controller_dev *pcdev) { struct pd692x0_priv *priv = to_pd692x0_priv(pcdev); @@ -1268,9 +1287,16 @@ static int pd692x0_setup_pi_matrix(struct pse_controller_dev *pcdev) if (ret) goto err_managers_req_pw; - ret = pd692x0_hw_conf_init(priv); - if (ret) - goto err_managers_req_pw; + /* Do not init the conf if it is already saved */ + if (!priv->cfg_saved) { + ret = pd692x0_hw_conf_init(priv); + if (ret) + goto err_managers_req_pw; + + ret = pd692x0_save_user_byte(priv); + if (ret) + goto err_managers_req_pw; + } pd692x0_of_put_managers(priv, manager); kfree(manager); @@ -1793,6 +1819,9 @@ static int pd692x0_i2c_probe(struct i2c_client *client) } } + if (buf.data[2] == PD692X0_USER_BYTE) + priv->cfg_saved = true; + priv->np = dev->of_node; priv->pcdev.nr_lines = PD692X0_MAX_PIS; priv->pcdev.owner = THIS_MODULE; From 9fbafbfa5b992187d6e4bc85dd0479eb660b3cc1 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 13 Oct 2025 15:58:49 +0200 Subject: [PATCH 038/867] dt-bindings: net: airoha: npu: Add AN7583 support Introduce AN7583 NPU support to Airoha EN7581 NPU device-tree bindings. Acked-by: Rob Herring (Arm) Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20251013-airoha-npu-7583-v3-1-00f748b5a0c7@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/airoha,en7581-npu.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/airoha,en7581-npu.yaml b/Documentation/devicetree/bindings/net/airoha,en7581-npu.yaml index c7644e6586d32..59c57f58116b5 100644 --- a/Documentation/devicetree/bindings/net/airoha,en7581-npu.yaml +++ b/Documentation/devicetree/bindings/net/airoha,en7581-npu.yaml @@ -18,6 +18,7 @@ properties: compatible: enum: - airoha,en7581-npu + - airoha,an7583-npu reg: maxItems: 1 From 0850ae496d534847ec2c26744521c1bce04ec59d Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 13 Oct 2025 15:58:50 +0200 Subject: [PATCH 039/867] net: airoha: npu: Add airoha_npu_soc_data struct Introduce airoha_npu_soc_data structure in order to generalize per-SoC NPU firmware info. Introduce airoha_npu_load_firmware utility routine. This is a preliminary patch in order to introduce AN7583 NPU support. Signed-off-by: Lorenzo Bianconi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251013-airoha-npu-7583-v3-2-00f748b5a0c7@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_npu.c | 77 ++++++++++++++++-------- 1 file changed, 51 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_npu.c b/drivers/net/ethernet/airoha/airoha_npu.c index 8c883f2b2d36b..41944cc5f6b06 100644 --- a/drivers/net/ethernet/airoha/airoha_npu.c +++ b/drivers/net/ethernet/airoha/airoha_npu.c @@ -103,6 +103,16 @@ enum { QDMA_WAN_PON_XDSL, }; +struct airoha_npu_fw { + const char *name; + int max_size; +}; + +struct airoha_npu_soc_data { + struct airoha_npu_fw fw_rv32; + struct airoha_npu_fw fw_data; +}; + #define MBOX_MSG_FUNC_ID GENMASK(14, 11) #define MBOX_MSG_STATIC_BUF BIT(5) #define MBOX_MSG_STATUS GENMASK(4, 2) @@ -182,49 +192,53 @@ static int airoha_npu_send_msg(struct airoha_npu *npu, int func_id, return ret; } -static int airoha_npu_run_firmware(struct device *dev, void __iomem *base, - struct resource *res) +static int airoha_npu_load_firmware(struct device *dev, void __iomem *addr, + const struct airoha_npu_fw *fw_info) { const struct firmware *fw; - void __iomem *addr; int ret; - ret = request_firmware(&fw, NPU_EN7581_FIRMWARE_RV32, dev); + ret = request_firmware(&fw, fw_info->name, dev); if (ret) return ret == -ENOENT ? -EPROBE_DEFER : ret; - if (fw->size > NPU_EN7581_FIRMWARE_RV32_MAX_SIZE) { + if (fw->size > fw_info->max_size) { dev_err(dev, "%s: fw size too overlimit (%zu)\n", - NPU_EN7581_FIRMWARE_RV32, fw->size); + fw_info->name, fw->size); ret = -E2BIG; goto out; } - addr = devm_ioremap_resource(dev, res); - if (IS_ERR(addr)) { - ret = PTR_ERR(addr); - goto out; - } - memcpy_toio(addr, fw->data, fw->size); +out: release_firmware(fw); - ret = request_firmware(&fw, NPU_EN7581_FIRMWARE_DATA, dev); - if (ret) - return ret == -ENOENT ? -EPROBE_DEFER : ret; + return ret; +} - if (fw->size > NPU_EN7581_FIRMWARE_DATA_MAX_SIZE) { - dev_err(dev, "%s: fw size too overlimit (%zu)\n", - NPU_EN7581_FIRMWARE_DATA, fw->size); - ret = -E2BIG; - goto out; - } +static int airoha_npu_run_firmware(struct device *dev, void __iomem *base, + struct resource *res) +{ + const struct airoha_npu_soc_data *soc; + void __iomem *addr; + int ret; - memcpy_toio(base + REG_NPU_LOCAL_SRAM, fw->data, fw->size); -out: - release_firmware(fw); + soc = of_device_get_match_data(dev); + if (!soc) + return -EINVAL; - return ret; + addr = devm_ioremap_resource(dev, res); + if (IS_ERR(addr)) + return PTR_ERR(addr); + + /* Load rv32 npu firmware */ + ret = airoha_npu_load_firmware(dev, addr, &soc->fw_rv32); + if (ret) + return ret; + + /* Load data npu firmware */ + return airoha_npu_load_firmware(dev, base + REG_NPU_LOCAL_SRAM, + &soc->fw_data); } static irqreturn_t airoha_npu_mbox_handler(int irq, void *npu_instance) @@ -597,8 +611,19 @@ void airoha_npu_put(struct airoha_npu *npu) } EXPORT_SYMBOL_GPL(airoha_npu_put); +static const struct airoha_npu_soc_data en7581_npu_soc_data = { + .fw_rv32 = { + .name = NPU_EN7581_FIRMWARE_RV32, + .max_size = NPU_EN7581_FIRMWARE_RV32_MAX_SIZE, + }, + .fw_data = { + .name = NPU_EN7581_FIRMWARE_DATA, + .max_size = NPU_EN7581_FIRMWARE_DATA_MAX_SIZE, + }, +}; + static const struct of_device_id of_airoha_npu_match[] = { - { .compatible = "airoha,en7581-npu" }, + { .compatible = "airoha,en7581-npu", .data = &en7581_npu_soc_data }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, of_airoha_npu_match); From 4478596f71d92060c9093bdf1d2d940881f41bcc Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 13 Oct 2025 15:58:51 +0200 Subject: [PATCH 040/867] net: airoha: npu: Add 7583 SoC support Introduce support for Airoha 7583 SoC NPU selecting proper firmware images. Signed-off-by: Lorenzo Bianconi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251013-airoha-npu-7583-v3-3-00f748b5a0c7@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_npu.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/ethernet/airoha/airoha_npu.c b/drivers/net/ethernet/airoha/airoha_npu.c index 41944cc5f6b06..68b7f9684dc7f 100644 --- a/drivers/net/ethernet/airoha/airoha_npu.c +++ b/drivers/net/ethernet/airoha/airoha_npu.c @@ -16,6 +16,8 @@ #define NPU_EN7581_FIRMWARE_DATA "airoha/en7581_npu_data.bin" #define NPU_EN7581_FIRMWARE_RV32 "airoha/en7581_npu_rv32.bin" +#define NPU_AN7583_FIRMWARE_DATA "airoha/an7583_npu_data.bin" +#define NPU_AN7583_FIRMWARE_RV32 "airoha/an7583_npu_rv32.bin" #define NPU_EN7581_FIRMWARE_RV32_MAX_SIZE 0x200000 #define NPU_EN7581_FIRMWARE_DATA_MAX_SIZE 0x10000 #define NPU_DUMP_SIZE 512 @@ -622,8 +624,20 @@ static const struct airoha_npu_soc_data en7581_npu_soc_data = { }, }; +static const struct airoha_npu_soc_data an7583_npu_soc_data = { + .fw_rv32 = { + .name = NPU_AN7583_FIRMWARE_RV32, + .max_size = NPU_EN7581_FIRMWARE_RV32_MAX_SIZE, + }, + .fw_data = { + .name = NPU_AN7583_FIRMWARE_DATA, + .max_size = NPU_EN7581_FIRMWARE_DATA_MAX_SIZE, + }, +}; + static const struct of_device_id of_airoha_npu_match[] = { { .compatible = "airoha,en7581-npu", .data = &en7581_npu_soc_data }, + { .compatible = "airoha,an7583-npu", .data = &an7583_npu_soc_data }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, of_airoha_npu_match); @@ -762,6 +776,8 @@ module_platform_driver(airoha_npu_driver); MODULE_FIRMWARE(NPU_EN7581_FIRMWARE_DATA); MODULE_FIRMWARE(NPU_EN7581_FIRMWARE_RV32); +MODULE_FIRMWARE(NPU_AN7583_FIRMWARE_DATA); +MODULE_FIRMWARE(NPU_AN7583_FIRMWARE_RV32); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Lorenzo Bianconi "); MODULE_DESCRIPTION("Airoha Network Processor Unit driver"); From 4bd451f4c2851eee7b6e17bb6fd6c9caaadbdc18 Mon Sep 17 00:00:00 2001 From: Dimitri Daskalakis Date: Mon, 13 Oct 2025 14:14:48 -0700 Subject: [PATCH 041/867] net: fbnic: Fix page chunking logic when PAGE_SIZE > 4K The HW always works on a 4K page size. When the OS supports larger pages, we fragment them across multiple BDQ descriptors. We were not properly incrementing the descriptor, which resulted in us specifying the last chunks id/addr and then 15 zero descriptors. This would cause packet loss and driver crashes. This is not a fix since the Kconfig prevents use outside of x86. Signed-off-by: Dimitri Daskalakis Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251013211449.1377054-2-dimitri.daskalakis1@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c index b1e8ce89870f7..57e18a68f5d23 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c @@ -887,6 +887,7 @@ static void fbnic_bd_prep(struct fbnic_ring *bdq, u16 id, netmem_ref netmem) *bdq_desc = cpu_to_le64(bd); bd += FIELD_PREP(FBNIC_BD_DESC_ADDR_MASK, 1) | FIELD_PREP(FBNIC_BD_DESC_ID_MASK, 1); + bdq_desc++; } while (--i); } From 75b350839b9e6a0ee73b5a4835a8c61e68851ae8 Mon Sep 17 00:00:00 2001 From: Dimitri Daskalakis Date: Mon, 13 Oct 2025 14:14:49 -0700 Subject: [PATCH 042/867] net: fbnic: Allow builds for all 64 bit architectures This enables aarch64 testing, but there's no reason we cannot support other architectures. Signed-off-by: Dimitri Daskalakis Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251013211449.1377054-3-dimitri.daskalakis1@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/meta/Kconfig b/drivers/net/ethernet/meta/Kconfig index 3ba527514f1e9..dff51f23d295e 100644 --- a/drivers/net/ethernet/meta/Kconfig +++ b/drivers/net/ethernet/meta/Kconfig @@ -19,7 +19,7 @@ if NET_VENDOR_META config FBNIC tristate "Meta Platforms Host Network Interface" - depends on X86_64 || COMPILE_TEST + depends on 64BIT || COMPILE_TEST depends on !S390 depends on MAX_SKB_FRAGS < 22 depends on PCI_MSI From 00922eeaca3c5c2001781bcad40e0bd54d0fdbb6 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Mon, 13 Oct 2025 16:30:49 -0500 Subject: [PATCH 043/867] dt-bindings: net: Convert amd,xgbe-seattle-v1a to DT schema Convert amd,xgbe-seattle-v1a binding to DT schema format. It's a straight-forward conversion. Signed-off-by: Rob Herring (Arm) Link: https://patch.msgid.link/20251013213049.686797-2-robh@kernel.org Signed-off-by: Paolo Abeni --- .../bindings/net/amd,xgbe-seattle-v1a.yaml | 147 ++++++++++++++++++ .../devicetree/bindings/net/amd-xgbe.txt | 76 --------- 2 files changed, 147 insertions(+), 76 deletions(-) create mode 100644 Documentation/devicetree/bindings/net/amd,xgbe-seattle-v1a.yaml delete mode 100644 Documentation/devicetree/bindings/net/amd-xgbe.txt diff --git a/Documentation/devicetree/bindings/net/amd,xgbe-seattle-v1a.yaml b/Documentation/devicetree/bindings/net/amd,xgbe-seattle-v1a.yaml new file mode 100644 index 0000000000000..006add8b6410a --- /dev/null +++ b/Documentation/devicetree/bindings/net/amd,xgbe-seattle-v1a.yaml @@ -0,0 +1,147 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/amd,xgbe-seattle-v1a.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: AMD XGBE Seattle v1a + +maintainers: + - Shyam Sundar S K + +allOf: + - $ref: /schemas/net/ethernet-controller.yaml# + +properties: + compatible: + const: amd,xgbe-seattle-v1a + + reg: + items: + - description: MAC registers + - description: PCS registers + - description: SerDes Rx/Tx registers + - description: SerDes integration registers (1/2) + - description: SerDes integration registers (2/2) + + interrupts: + description: Device interrupts. The first entry is the general device + interrupt. If amd,per-channel-interrupt is specified, each DMA channel + interrupt must be specified. The last entry is the PCS auto-negotiation + interrupt. + minItems: 2 + maxItems: 6 + + clocks: + items: + - description: DMA clock for the device + - description: PTP clock for the device + + clock-names: + items: + - const: dma_clk + - const: ptp_clk + + iommus: + maxItems: 1 + + phy-mode: true + + dma-coherent: true + + amd,per-channel-interrupt: + description: Indicates that Rx and Tx complete will generate a unique + interrupt for each DMA channel. + type: boolean + + amd,speed-set: + description: > + Speed capabilities of the device. + 0 = 1GbE and 10GbE + 1 = 2.5GbE and 10GbE + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1] + + amd,serdes-blwc: + description: Baseline wandering correction enablement for each speed. + $ref: /schemas/types.yaml#/definitions/uint32-array + minItems: 3 + maxItems: 3 + items: + enum: [0, 1] + + amd,serdes-cdr-rate: + description: CDR rate speed selection for each speed. + $ref: /schemas/types.yaml#/definitions/uint32-array + items: + - description: CDR rate for 1GbE + - description: CDR rate for 2.5GbE + - description: CDR rate for 10GbE + + amd,serdes-pq-skew: + description: PQ data sampling skew for each speed. + $ref: /schemas/types.yaml#/definitions/uint32-array + items: + - description: PQ skew for 1GbE + - description: PQ skew for 2.5GbE + - description: PQ skew for 10GbE + + amd,serdes-tx-amp: + description: TX amplitude boost for each speed. + $ref: /schemas/types.yaml#/definitions/uint32-array + items: + - description: TX amplitude for 1GbE + - description: TX amplitude for 2.5GbE + - description: TX amplitude for 10GbE + + amd,serdes-dfe-tap-config: + description: DFE taps available to run for each speed. + $ref: /schemas/types.yaml#/definitions/uint32-array + items: + - description: DFE taps available for 1GbE + - description: DFE taps available for 2.5GbE + - description: DFE taps available for 10GbE + + amd,serdes-dfe-tap-enable: + description: DFE taps to enable for each speed. + $ref: /schemas/types.yaml#/definitions/uint32-array + items: + - description: DFE taps to enable for 1GbE + - description: DFE taps to enable for 2.5GbE + - description: DFE taps to enable for 10GbE + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + - phy-mode + +unevaluatedProperties: false + +examples: + - | + ethernet@e0700000 { + compatible = "amd,xgbe-seattle-v1a"; + reg = <0xe0700000 0x80000>, + <0xe0780000 0x80000>, + <0xe1240800 0x00400>, + <0xe1250000 0x00060>, + <0xe1250080 0x00004>; + interrupts = <0 325 4>, + <0 326 1>, <0 327 1>, <0 328 1>, <0 329 1>, + <0 323 4>; + amd,per-channel-interrupt; + clocks = <&xgbe_dma_clk>, <&xgbe_ptp_clk>; + clock-names = "dma_clk", "ptp_clk"; + phy-mode = "xgmii"; + mac-address = [ 02 a1 a2 a3 a4 a5 ]; + amd,speed-set = <0>; + amd,serdes-blwc = <1>, <1>, <0>; + amd,serdes-cdr-rate = <2>, <2>, <7>; + amd,serdes-pq-skew = <10>, <10>, <30>; + amd,serdes-tx-amp = <15>, <15>, <10>; + amd,serdes-dfe-tap-config = <3>, <3>, <1>; + amd,serdes-dfe-tap-enable = <0>, <0>, <127>; + }; diff --git a/Documentation/devicetree/bindings/net/amd-xgbe.txt b/Documentation/devicetree/bindings/net/amd-xgbe.txt deleted file mode 100644 index 9c27dfcd11334..0000000000000 --- a/Documentation/devicetree/bindings/net/amd-xgbe.txt +++ /dev/null @@ -1,76 +0,0 @@ -* AMD 10GbE driver (amd-xgbe) - -Required properties: -- compatible: Should be "amd,xgbe-seattle-v1a" -- reg: Address and length of the register sets for the device - - MAC registers - - PCS registers - - SerDes Rx/Tx registers - - SerDes integration registers (1/2) - - SerDes integration registers (2/2) -- interrupts: Should contain the amd-xgbe interrupt(s). The first interrupt - listed is required and is the general device interrupt. If the optional - amd,per-channel-interrupt property is specified, then one additional - interrupt for each DMA channel supported by the device should be specified. - The last interrupt listed should be the PCS auto-negotiation interrupt. -- clocks: - - DMA clock for the amd-xgbe device (used for calculating the - correct Rx interrupt watchdog timer value on a DMA channel - for coalescing) - - PTP clock for the amd-xgbe device -- clock-names: Should be the names of the clocks - - "dma_clk" for the DMA clock - - "ptp_clk" for the PTP clock -- phy-mode: See ethernet.txt file in the same directory - -Optional properties: -- dma-coherent: Present if dma operations are coherent -- amd,per-channel-interrupt: Indicates that Rx and Tx complete will generate - a unique interrupt for each DMA channel - this requires an additional - interrupt be configured for each DMA channel -- amd,speed-set: Speed capabilities of the device - 0 - 1GbE and 10GbE (default) - 1 - 2.5GbE and 10GbE - -The MAC address will be determined using the optional properties defined in -ethernet.txt. - -The following optional properties are represented by an array with each -value corresponding to a particular speed. The first array value represents -the setting for the 1GbE speed, the second value for the 2.5GbE speed and -the third value for the 10GbE speed. All three values are required if the -property is used. -- amd,serdes-blwc: Baseline wandering correction enablement - 0 - Off - 1 - On -- amd,serdes-cdr-rate: CDR rate speed selection -- amd,serdes-pq-skew: PQ (data sampling) skew -- amd,serdes-tx-amp: TX amplitude boost -- amd,serdes-dfe-tap-config: DFE taps available to run -- amd,serdes-dfe-tap-enable: DFE taps to enable - -Example: - xgbe@e0700000 { - compatible = "amd,xgbe-seattle-v1a"; - reg = <0 0xe0700000 0 0x80000>, - <0 0xe0780000 0 0x80000>, - <0 0xe1240800 0 0x00400>, - <0 0xe1250000 0 0x00060>, - <0 0xe1250080 0 0x00004>; - interrupt-parent = <&gic>; - interrupts = <0 325 4>, - <0 326 1>, <0 327 1>, <0 328 1>, <0 329 1>, - <0 323 4>; - amd,per-channel-interrupt; - clocks = <&xgbe_dma_clk>, <&xgbe_ptp_clk>; - clock-names = "dma_clk", "ptp_clk"; - phy-mode = "xgmii"; - mac-address = [ 02 a1 a2 a3 a4 a5 ]; - amd,speed-set = <0>; - amd,serdes-blwc = <1>, <1>, <0>; - amd,serdes-cdr-rate = <2>, <2>, <7>; - amd,serdes-pq-skew = <10>, <10>, <30>; - amd,serdes-tx-amp = <15>, <15>, <10>; - amd,serdes-dfe-tap-config = <3>, <3>, <1>; - amd,serdes-dfe-tap-enable = <0>, <0>, <127>; - }; From af3fce9f1bb41cdca32b24e7f19a902b7fe2906a Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 14 Oct 2025 14:17:24 +0800 Subject: [PATCH 044/867] net: txgbe: expend SW-FW mailbox buffer size to identify QSFP module Recent firmware updates introduce additional fields in the mailbox message to provide more information for identifying 40G and 100G QSFP modules. To accommodate these new fields, expand the mailbox buffer size by 4 bytes. Without this change, drivers built against the updated firmware cannot properly identify modules due to mismatched mailbox message lengths. The old firmware version that used the smaller mailbox buffer has never been publicly released, so there are no backward-compatibility concerns. Signed-off-by: Jiawen Wu Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251014061726.36660-2-jiawenwu@trustnetic.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/wangxun/txgbe/txgbe_type.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 41915d7dd372a..9d53c7413f7b0 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -352,7 +352,9 @@ struct txgbe_sfp_id { u8 vendor_oui0; /* A0H 0x25 */ u8 vendor_oui1; /* A0H 0x26 */ u8 vendor_oui2; /* A0H 0x27 */ - u8 reserved[3]; + u8 transceiver_type; /* A0H 0x83 */ + u8 sff_opt1; /* A0H 0xC0 */ + u8 reserved[5]; }; struct txgbe_hic_i2c_read { From 1f863ce5c71276710a7689c88bf4003fa5173998 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 14 Oct 2025 14:17:25 +0800 Subject: [PATCH 045/867] net: txgbe: optimize the flow to setup PHY for AML devices To adapt to new firmware for AML devices, the driver should send the "SET_LINK_CMD" to the firmware only once when switching PHY interface mode, and no longer needs to re-trigger PHY configuration based on the RX signal interrupt (TXGBE_GPIOBIT_3). In previous firmware versions, the PHY was configured only after receiving "SET_LINK_CMD", and might remain incomplete if the RX signal was lost. To handle this case, the driver used TXGBE_GPIOBIT_3 interrupt to resend the command. This workaround is no longer necessary with the new firmware. And the unknown link speed is permitted in the mailbox buffer. Signed-off-by: Jiawen Wu Link: https://patch.msgid.link/20251014061726.36660-3-jiawenwu@trustnetic.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/wangxun/libwx/wx_type.h | 2 - .../net/ethernet/wangxun/txgbe/txgbe_aml.c | 50 ++++++------------- .../net/ethernet/wangxun/txgbe/txgbe_type.h | 1 + 3 files changed, 15 insertions(+), 38 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index d89b9b8a0a2ce..4880268b620e1 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -1271,8 +1271,6 @@ struct wx { /* PHY stuff */ bool notify_down; - int adv_speed; - int adv_duplex; unsigned int link; int speed; int duplex; diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c index dc87ccad96524..1da92431c3242 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c @@ -19,8 +19,8 @@ void txgbe_gpio_init_aml(struct wx *wx) { u32 status; - wr32(wx, WX_GPIO_INTTYPE_LEVEL, TXGBE_GPIOBIT_2 | TXGBE_GPIOBIT_3); - wr32(wx, WX_GPIO_INTEN, TXGBE_GPIOBIT_2 | TXGBE_GPIOBIT_3); + wr32(wx, WX_GPIO_INTTYPE_LEVEL, TXGBE_GPIOBIT_2); + wr32(wx, WX_GPIO_INTEN, TXGBE_GPIOBIT_2); status = rd32(wx, WX_GPIO_INTSTATUS); for (int i = 0; i < 6; i++) { @@ -42,11 +42,6 @@ irqreturn_t txgbe_gpio_irq_handler_aml(int irq, void *data) wr32(wx, WX_GPIO_EOI, TXGBE_GPIOBIT_2); wx_service_event_schedule(wx); } - if (status & TXGBE_GPIOBIT_3) { - set_bit(WX_FLAG_NEED_LINK_CONFIG, wx->flags); - wx_service_event_schedule(wx); - wr32(wx, WX_GPIO_EOI, TXGBE_GPIOBIT_3); - } wr32(wx, WX_GPIO_INTMASK, 0); return IRQ_HANDLED; @@ -96,6 +91,9 @@ static int txgbe_set_phy_link_hostif(struct wx *wx, int speed, int autoneg, int case SPEED_10000: buffer.speed = TXGBE_LINK_SPEED_10GB_FULL; break; + default: + buffer.speed = TXGBE_LINK_SPEED_UNKNOWN; + break; } buffer.fec_mode = TXGBE_PHY_FEC_AUTO; @@ -106,19 +104,18 @@ static int txgbe_set_phy_link_hostif(struct wx *wx, int speed, int autoneg, int WX_HI_COMMAND_TIMEOUT, true); } -static void txgbe_get_link_capabilities(struct wx *wx) +static void txgbe_get_link_capabilities(struct wx *wx, int *speed, int *duplex) { struct txgbe *txgbe = wx->priv; if (test_bit(PHY_INTERFACE_MODE_25GBASER, txgbe->sfp_interfaces)) - wx->adv_speed = SPEED_25000; + *speed = SPEED_25000; else if (test_bit(PHY_INTERFACE_MODE_10GBASER, txgbe->sfp_interfaces)) - wx->adv_speed = SPEED_10000; + *speed = SPEED_10000; else - wx->adv_speed = SPEED_UNKNOWN; + *speed = SPEED_UNKNOWN; - wx->adv_duplex = wx->adv_speed == SPEED_UNKNOWN ? - DUPLEX_HALF : DUPLEX_FULL; + *duplex = *speed == SPEED_UNKNOWN ? DUPLEX_HALF : DUPLEX_FULL; } static void txgbe_get_phy_link(struct wx *wx, int *speed) @@ -138,23 +135,11 @@ static void txgbe_get_phy_link(struct wx *wx, int *speed) int txgbe_set_phy_link(struct wx *wx) { - int speed, err; - u32 gpio; + int speed, duplex, err; - /* Check RX signal */ - gpio = rd32(wx, WX_GPIO_EXT); - if (gpio & TXGBE_GPIOBIT_3) - return -ENODEV; + txgbe_get_link_capabilities(wx, &speed, &duplex); - txgbe_get_link_capabilities(wx); - if (wx->adv_speed == SPEED_UNKNOWN) - return -ENODEV; - - txgbe_get_phy_link(wx, &speed); - if (speed == wx->adv_speed) - return 0; - - err = txgbe_set_phy_link_hostif(wx, wx->adv_speed, 0, wx->adv_duplex); + err = txgbe_set_phy_link_hostif(wx, speed, 0, duplex); if (err) { wx_err(wx, "Failed to setup link\n"); return err; @@ -230,14 +215,7 @@ int txgbe_identify_sfp(struct wx *wx) return -ENODEV; } - err = txgbe_sfp_to_linkmodes(wx, id); - if (err) - return err; - - if (gpio & TXGBE_GPIOBIT_3) - set_bit(WX_FLAG_NEED_LINK_CONFIG, wx->flags); - - return 0; + return txgbe_sfp_to_linkmodes(wx, id); } void txgbe_setup_link(struct wx *wx) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 9d53c7413f7b0..b9a4ba48f5b99 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -314,6 +314,7 @@ void txgbe_up(struct wx *wx); int txgbe_setup_tc(struct net_device *dev, u8 tc); void txgbe_do_reset(struct net_device *netdev); +#define TXGBE_LINK_SPEED_UNKNOWN 0 #define TXGBE_LINK_SPEED_10GB_FULL 4 #define TXGBE_LINK_SPEED_25GB_FULL 0x10 From a058de9262f4d0e58b8ed1d4a05758bbb92cc10e Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 14 Oct 2025 14:17:26 +0800 Subject: [PATCH 046/867] net: txgbe: rename txgbe_get_phy_link() The function txgbe_get_phy_link() is more appropriately named txgbe_get_mac_link(), since it reads the link status from the MAC register. Signed-off-by: Jiawen Wu Link: https://patch.msgid.link/20251014061726.36660-4-jiawenwu@trustnetic.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c index 1da92431c3242..35eebdb077616 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c @@ -118,7 +118,7 @@ static void txgbe_get_link_capabilities(struct wx *wx, int *speed, int *duplex) *duplex = *speed == SPEED_UNKNOWN ? DUPLEX_HALF : DUPLEX_FULL; } -static void txgbe_get_phy_link(struct wx *wx, int *speed) +static void txgbe_get_mac_link(struct wx *wx, int *speed) { u32 status; @@ -234,7 +234,7 @@ static void txgbe_get_link_state(struct phylink_config *config, struct wx *wx = phylink_to_wx(config); int speed; - txgbe_get_phy_link(wx, &speed); + txgbe_get_mac_link(wx, &speed); state->link = speed != SPEED_UNKNOWN; state->speed = speed; state->duplex = state->link ? DUPLEX_FULL : DUPLEX_UNKNOWN; From 4a997d49d92ad9dda603f60881faa6c800d435e9 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 14 Oct 2025 23:54:54 +0000 Subject: [PATCH 047/867] tcp: Save lock_sock() for memcg in inet_csk_accept(). If memcg is enabled, accept() acquires lock_sock() twice for each new TCP/MPTCP socket in inet_csk_accept() and __inet_accept(). Let's move memcg operations from inet_csk_accept() to __inet_accept(). Note that SCTP somehow allocates a new socket by sk_alloc() in sk->sk_prot->accept() and clones fields manually, instead of using sk_clone_lock(). mem_cgroup_sk_alloc() is called for SCTP before __inet_accept(), so I added the protocol check in __inet_accept(), but this can be removed once SCTP uses sk_clone_lock(). Signed-off-by: Kuniyuki Iwashima Signed-off-by: Martin KaFai Lau Reviewed-by: Shakeel Butt Reviewed-by: Eric Dumazet Acked-by: Roman Gushchin Link: https://patch.msgid.link/20251014235604.3057003-2-kuniyu@google.com --- net/ipv4/af_inet.c | 22 ++++++++++++++++++++++ net/ipv4/inet_connection_sock.c | 25 ------------------------- 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 3109c5ec38f39..e8771faa5bbfd 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -755,6 +755,28 @@ EXPORT_SYMBOL(inet_stream_connect); void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *newsk) { + /* TODO: use sk_clone_lock() in SCTP and remove protocol checks */ + if (mem_cgroup_sockets_enabled && + (!IS_ENABLED(CONFIG_IP_SCTP) || sk_is_tcp(newsk))) { + gfp_t gfp = GFP_KERNEL | __GFP_NOFAIL; + + mem_cgroup_sk_alloc(newsk); + + if (mem_cgroup_from_sk(newsk)) { + int amt; + + /* The socket has not been accepted yet, no need + * to look at newsk->sk_wmem_queued. + */ + amt = sk_mem_pages(newsk->sk_forward_alloc + + atomic_read(&newsk->sk_rmem_alloc)); + if (amt) + mem_cgroup_sk_charge(newsk, amt, gfp); + } + + kmem_cache_charge(newsk, gfp); + } + sock_rps_record_flow(newsk); WARN_ON(!((1 << newsk->sk_state) & (TCPF_ESTABLISHED | TCPF_SYN_RECV | diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index cdd1e12aac8c0..3b83b66b2284c 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -712,31 +712,6 @@ struct sock *inet_csk_accept(struct sock *sk, struct proto_accept_arg *arg) release_sock(sk); - if (mem_cgroup_sockets_enabled) { - gfp_t gfp = GFP_KERNEL | __GFP_NOFAIL; - int amt = 0; - - /* atomically get the memory usage, set and charge the - * newsk->sk_memcg. - */ - lock_sock(newsk); - - mem_cgroup_sk_alloc(newsk); - if (mem_cgroup_from_sk(newsk)) { - /* The socket has not been accepted yet, no need - * to look at newsk->sk_wmem_queued. - */ - amt = sk_mem_pages(newsk->sk_forward_alloc + - atomic_read(&newsk->sk_rmem_alloc)); - } - - if (amt) - mem_cgroup_sk_charge(newsk, amt, gfp); - kmem_cache_charge(newsk, gfp); - - release_sock(newsk); - } - if (req) reqsk_put(req); From 7c268eaeec6388b7bee36aef3fb5e62c9222ad3b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 14 Oct 2025 23:54:55 +0000 Subject: [PATCH 048/867] net: Allow opt-out from global protocol memory accounting. Some protocols (e.g., TCP, UDP) implement memory accounting for socket buffers and charge memory to per-protocol global counters pointed to by sk->sk_proto->memory_allocated. Sometimes, system processes do not want that limitation. For a similar purpose, there is SO_RESERVE_MEM for sockets under memcg. Also, by opting out of the per-protocol accounting, sockets under memcg can avoid paying costs for two orthogonal memory accounting mechanisms. A microbenchmark result is in the subsequent bpf patch. Let's allow opt-out from the per-protocol memory accounting if sk->sk_bypass_prot_mem is true. sk->sk_bypass_prot_mem and sk->sk_prot are placed in the same cache line, and sk_has_account() always fetches sk->sk_prot before accessing sk->sk_bypass_prot_mem, so there is no extra cache miss for this patch. The following patches will set sk->sk_bypass_prot_mem to true, and then, the per-protocol memory accounting will be skipped. Note that this does NOT disable memcg, but rather the per-protocol one. Another option not to use the hole in struct sock_common is create sk_prot variants like tcp_prot_bypass, but this would complicate SOCKMAP logic, tcp_bpf_prots etc. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Martin KaFai Lau Reviewed-by: Shakeel Butt Reviewed-by: Eric Dumazet Acked-by: Roman Gushchin Link: https://patch.msgid.link/20251014235604.3057003-3-kuniyu@google.com --- include/net/proto_memory.h | 3 +++ include/net/sock.h | 3 +++ include/net/tcp.h | 3 +++ net/core/sock.c | 32 +++++++++++++++++++++++++------- net/ipv4/tcp.c | 3 ++- net/ipv4/tcp_output.c | 7 ++++++- net/mptcp/protocol.c | 7 ++++--- net/tls/tls_device.c | 3 ++- 8 files changed, 48 insertions(+), 13 deletions(-) diff --git a/include/net/proto_memory.h b/include/net/proto_memory.h index 8e91a8fa31b52..ad6d703ce6fe1 100644 --- a/include/net/proto_memory.h +++ b/include/net/proto_memory.h @@ -35,6 +35,9 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) mem_cgroup_sk_under_memory_pressure(sk)) return true; + if (sk->sk_bypass_prot_mem) + return false; + return !!READ_ONCE(*sk->sk_prot->memory_pressure); } diff --git a/include/net/sock.h b/include/net/sock.h index 30ac2eb4ef9bf..415e7381aa505 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -118,6 +118,7 @@ typedef __u64 __bitwise __addrpair; * @skc_reuseport: %SO_REUSEPORT setting * @skc_ipv6only: socket is IPV6 only * @skc_net_refcnt: socket is using net ref counting + * @skc_bypass_prot_mem: bypass the per-protocol memory accounting for skb * @skc_bound_dev_if: bound device index if != 0 * @skc_bind_node: bind hash linkage for various protocol lookup tables * @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol @@ -174,6 +175,7 @@ struct sock_common { unsigned char skc_reuseport:1; unsigned char skc_ipv6only:1; unsigned char skc_net_refcnt:1; + unsigned char skc_bypass_prot_mem:1; int skc_bound_dev_if; union { struct hlist_node skc_bind_node; @@ -381,6 +383,7 @@ struct sock { #define sk_reuseport __sk_common.skc_reuseport #define sk_ipv6only __sk_common.skc_ipv6only #define sk_net_refcnt __sk_common.skc_net_refcnt +#define sk_bypass_prot_mem __sk_common.skc_bypass_prot_mem #define sk_bound_dev_if __sk_common.skc_bound_dev_if #define sk_bind_node __sk_common.skc_bind_node #define sk_prot __sk_common.skc_prot diff --git a/include/net/tcp.h b/include/net/tcp.h index 1e547138f4fb7..439e327fdbfad 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -303,6 +303,9 @@ static inline bool tcp_under_memory_pressure(const struct sock *sk) mem_cgroup_sk_under_memory_pressure(sk)) return true; + if (sk->sk_bypass_prot_mem) + return false; + return READ_ONCE(tcp_memory_pressure); } /* diff --git a/net/core/sock.c b/net/core/sock.c index 08ae20069b6d2..5bf208579c02b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1046,9 +1046,13 @@ static int sock_reserve_memory(struct sock *sk, int bytes) if (!charged) return -ENOMEM; + if (sk->sk_bypass_prot_mem) + goto success; + /* pre-charge to forward_alloc */ sk_memory_allocated_add(sk, pages); allocated = sk_memory_allocated(sk); + /* If the system goes into memory pressure with this * precharge, give up and return error. */ @@ -1057,6 +1061,8 @@ static int sock_reserve_memory(struct sock *sk, int bytes) mem_cgroup_sk_uncharge(sk, pages); return -ENOMEM; } + +success: sk_forward_alloc_add(sk, pages << PAGE_SHIFT); WRITE_ONCE(sk->sk_reserved_mem, @@ -3145,8 +3151,11 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag) if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation))) return true; - sk_enter_memory_pressure(sk); + if (!sk->sk_bypass_prot_mem) + sk_enter_memory_pressure(sk); + sk_stream_moderate_sndbuf(sk); + return false; } EXPORT_SYMBOL(sk_page_frag_refill); @@ -3263,10 +3272,12 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) { bool memcg_enabled = false, charged = false; struct proto *prot = sk->sk_prot; - long allocated; + long allocated = 0; - sk_memory_allocated_add(sk, amt); - allocated = sk_memory_allocated(sk); + if (!sk->sk_bypass_prot_mem) { + sk_memory_allocated_add(sk, amt); + allocated = sk_memory_allocated(sk); + } if (mem_cgroup_sk_enabled(sk)) { memcg_enabled = true; @@ -3275,6 +3286,9 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) goto suppress_allocation; } + if (!allocated) + return 1; + /* Under limit. */ if (allocated <= sk_prot_mem_limits(sk, 0)) { sk_leave_memory_pressure(sk); @@ -3353,7 +3367,8 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) trace_sock_exceed_buf_limit(sk, prot, allocated, kind); - sk_memory_allocated_sub(sk, amt); + if (allocated) + sk_memory_allocated_sub(sk, amt); if (charged) mem_cgroup_sk_uncharge(sk, amt); @@ -3392,11 +3407,14 @@ EXPORT_SYMBOL(__sk_mem_schedule); */ void __sk_mem_reduce_allocated(struct sock *sk, int amount) { - sk_memory_allocated_sub(sk, amount); - if (mem_cgroup_sk_enabled(sk)) mem_cgroup_sk_uncharge(sk, amount); + if (sk->sk_bypass_prot_mem) + return; + + sk_memory_allocated_sub(sk, amount); + if (sk_under_global_memory_pressure(sk) && (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) sk_leave_memory_pressure(sk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4d720aa09a4c6..54def27326f1a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -928,7 +928,8 @@ struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp, } __kfree_skb(skb); } else { - sk->sk_prot->enter_memory_pressure(sk); + if (!sk->sk_bypass_prot_mem) + tcp_enter_memory_pressure(sk); sk_stream_moderate_sndbuf(sk); } return NULL; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b94efb3050d2f..7f5df7a71f629 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3743,12 +3743,17 @@ void sk_forced_mem_schedule(struct sock *sk, int size) delta = size - sk->sk_forward_alloc; if (delta <= 0) return; + amt = sk_mem_pages(delta); sk_forward_alloc_add(sk, amt << PAGE_SHIFT); - sk_memory_allocated_add(sk, amt); if (mem_cgroup_sk_enabled(sk)) mem_cgroup_sk_charge(sk, amt, gfp_memcg_charge() | __GFP_NOFAIL); + + if (sk->sk_bypass_prot_mem) + return; + + sk_memory_allocated_add(sk, amt); } /* Send a FIN. The caller locks the socket for us. diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 0292162a14eed..94a5f6dcc5775 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1065,11 +1065,12 @@ static void mptcp_enter_memory_pressure(struct sock *sk) mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - if (first) + if (first && !ssk->sk_bypass_prot_mem) { tcp_enter_memory_pressure(ssk); - sk_stream_moderate_sndbuf(ssk); + first = false; + } - first = false; + sk_stream_moderate_sndbuf(ssk); } __mptcp_sync_sndbuf(sk); } diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index a64ae15b1a60d..caa2b5d246223 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -373,7 +373,8 @@ static int tls_do_allocation(struct sock *sk, if (!offload_ctx->open_record) { if (unlikely(!skb_page_frag_refill(prepend_size, pfrag, sk->sk_allocation))) { - READ_ONCE(sk->sk_prot)->enter_memory_pressure(sk); + if (!sk->sk_bypass_prot_mem) + READ_ONCE(sk->sk_prot)->enter_memory_pressure(sk); sk_stream_moderate_sndbuf(sk); return -ENOMEM; } From b46ab63181ff973ddce44ebc9ac24b269d42f481 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 14 Oct 2025 23:54:56 +0000 Subject: [PATCH 049/867] net: Introduce net.core.bypass_prot_mem sysctl. If a socket has sk->sk_bypass_prot_mem flagged, the socket opts out of the global protocol memory accounting. Let's control the flag by a new sysctl knob. The flag is written once during socket(2) and is inherited to child sockets. Tested with a script that creates local socket pairs and send()s a bunch of data without recv()ing. Setup: # mkdir /sys/fs/cgroup/test # echo $$ >> /sys/fs/cgroup/test/cgroup.procs # sysctl -q net.ipv4.tcp_mem="1000 1000 1000" # ulimit -n 524288 Without net.core.bypass_prot_mem, charged to tcp_mem & memcg # python3 pressure.py & # cat /sys/fs/cgroup/test/memory.stat | grep sock sock 22642688 <-------------------------------------- charged to memcg # cat /proc/net/sockstat| grep TCP TCP: inuse 2006 orphan 0 tw 0 alloc 2008 mem 5376 <-- charged to tcp_mem # ss -tn | head -n 5 State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 2000 0 127.0.0.1:34479 127.0.0.1:53188 ESTAB 2000 0 127.0.0.1:34479 127.0.0.1:49972 ESTAB 2000 0 127.0.0.1:34479 127.0.0.1:53868 ESTAB 2000 0 127.0.0.1:34479 127.0.0.1:53554 # nstat | grep Pressure || echo no pressure TcpExtTCPMemoryPressures 1 0.0 With net.core.bypass_prot_mem=1, charged to memcg only: # sysctl -q net.core.bypass_prot_mem=1 # python3 pressure.py & # cat /sys/fs/cgroup/test/memory.stat | grep sock sock 2757468160 <------------------------------------ charged to memcg # cat /proc/net/sockstat | grep TCP TCP: inuse 2006 orphan 0 tw 0 alloc 2008 mem 0 <- NOT charged to tcp_mem # ss -tn | head -n 5 State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 111000 0 127.0.0.1:36019 127.0.0.1:49026 ESTAB 110000 0 127.0.0.1:36019 127.0.0.1:45630 ESTAB 110000 0 127.0.0.1:36019 127.0.0.1:44870 ESTAB 111000 0 127.0.0.1:36019 127.0.0.1:45274 # nstat | grep Pressure || echo no pressure no pressure Signed-off-by: Kuniyuki Iwashima Signed-off-by: Martin KaFai Lau Reviewed-by: Shakeel Butt Reviewed-by: Eric Dumazet Acked-by: Roman Gushchin Link: https://patch.msgid.link/20251014235604.3057003-4-kuniyu@google.com --- Documentation/admin-guide/sysctl/net.rst | 8 ++++++++ include/net/netns/core.h | 1 + net/core/sock.c | 5 +++++ net/core/sysctl_net_core.c | 9 +++++++++ 4 files changed, 23 insertions(+) diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 40749b3cd3569..991773dcb9cfe 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -212,6 +212,14 @@ mem_pcpu_rsv Per-cpu reserved forward alloc cache size in page units. Default 1MB per CPU. +bypass_prot_mem +--------------- + +Skip charging socket buffers to the global per-protocol memory +accounting controlled by net.ipv4.tcp_mem, net.ipv4.udp_mem, etc. + +Default: 0 (off) + rmem_default ------------ diff --git a/include/net/netns/core.h b/include/net/netns/core.h index cb9c3e4cd7385..9ef3d70e5e9c0 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -17,6 +17,7 @@ struct netns_core { int sysctl_optmem_max; u8 sysctl_txrehash; u8 sysctl_tstamp_allow_data; + u8 sysctl_bypass_prot_mem; #ifdef CONFIG_PROC_FS struct prot_inuse __percpu *prot_inuse; diff --git a/net/core/sock.c b/net/core/sock.c index 5bf208579c02b..b78533fb92686 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2306,8 +2306,13 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, * why we need sk_prot_creator -acme */ sk->sk_prot = sk->sk_prot_creator = prot; + + if (READ_ONCE(net->core.sysctl_bypass_prot_mem)) + sk->sk_bypass_prot_mem = 1; + sk->sk_kern_sock = kern; sock_lock_init(sk); + sk->sk_net_refcnt = kern ? 0 : 1; if (likely(sk->sk_net_refcnt)) { get_net_track(net, &sk->ns_tracker, priority); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index f79137826d7f9..8d4decb2606fa 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -683,6 +683,15 @@ static struct ctl_table netns_core_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE }, + { + .procname = "bypass_prot_mem", + .data = &init_net.core.sysctl_bypass_prot_mem, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE + }, /* sysctl_core_net_init() will set the values after this * to readonly in network namespaces */ From 5ed17896a09e2cc3f07b7d8f6a8b1d6be54550f7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 14 Oct 2025 23:54:57 +0000 Subject: [PATCH 050/867] bpf: Support bpf_setsockopt() for BPF_CGROUP_INET_SOCK_CREATE. We will support flagging sk->sk_bypass_prot_mem via bpf_setsockopt() at the BPF_CGROUP_INET_SOCK_CREATE hook. BPF_CGROUP_INET_SOCK_CREATE is invoked by __cgroup_bpf_run_filter_sk() that passes a pointer to struct sock to the bpf prog as void *ctx. But there are no bpf_func_proto for bpf_setsockopt() that receives the ctx as a pointer to struct sock. Also, bpf_getsockopt() will be necessary for a cgroup with multiple bpf progs running. Let's add new bpf_setsockopt() and bpf_getsockopt() variants for BPF_CGROUP_INET_SOCK_CREATE. Note that inet_create() is not under lock_sock() and has the same semantics with bpf_lsm_unlocked_sockopt_hooks. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Martin KaFai Lau Acked-by: Roman Gushchin Link: https://patch.msgid.link/20251014235604.3057003-5-kuniyu@google.com --- net/core/filter.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index 76628df1fc82f..ed3f0e5360595 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5733,6 +5733,40 @@ static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = { .arg5_type = ARG_CONST_SIZE, }; +BPF_CALL_5(bpf_sock_create_setsockopt, struct sock *, sk, int, level, + int, optname, char *, optval, int, optlen) +{ + return __bpf_setsockopt(sk, level, optname, optval, optlen); +} + +static const struct bpf_func_proto bpf_sock_create_setsockopt_proto = { + .func = bpf_sock_create_setsockopt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg5_type = ARG_CONST_SIZE, +}; + +BPF_CALL_5(bpf_sock_create_getsockopt, struct sock *, sk, int, level, + int, optname, char *, optval, int, optlen) +{ + return __bpf_getsockopt(sk, level, optname, optval, optlen); +} + +static const struct bpf_func_proto bpf_sock_create_getsockopt_proto = { + .func = bpf_sock_create_getsockopt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_UNINIT_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, int, level, int, optname, char *, optval, int, optlen) { @@ -8062,6 +8096,20 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_storage_get_cg_sock_proto; case BPF_FUNC_ktime_get_coarse_ns: return &bpf_ktime_get_coarse_ns_proto; + case BPF_FUNC_setsockopt: + switch (prog->expected_attach_type) { + case BPF_CGROUP_INET_SOCK_CREATE: + return &bpf_sock_create_setsockopt_proto; + default: + return NULL; + } + case BPF_FUNC_getsockopt: + switch (prog->expected_attach_type) { + case BPF_CGROUP_INET_SOCK_CREATE: + return &bpf_sock_create_getsockopt_proto; + default: + return NULL; + } default: return bpf_base_func_proto(func_id, prog); } From 38163af068810b388f6723a681dfd8c7b3680d38 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 14 Oct 2025 23:54:58 +0000 Subject: [PATCH 051/867] bpf: Introduce SK_BPF_BYPASS_PROT_MEM. If a socket has sk->sk_bypass_prot_mem flagged, the socket opts out of the global protocol memory accounting. This is easily controlled by net.core.bypass_prot_mem sysctl, but it lacks flexibility. Let's support flagging (and clearing) sk->sk_bypass_prot_mem via bpf_setsockopt() at the BPF_CGROUP_INET_SOCK_CREATE hook. int val = 1; bpf_setsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM, &val, sizeof(val)); As with net.core.bypass_prot_mem, this is inherited to child sockets, and BPF always takes precedence over sysctl at socket(2) and accept(2). SK_BPF_BYPASS_PROT_MEM is only supported at BPF_CGROUP_INET_SOCK_CREATE and not supported on other hooks for some reasons: 1. UDP charges memory under sk->sk_receive_queue.lock instead of lock_sock() 2. Modifying the flag after skb is charged to sk requires such adjustment during bpf_setsockopt() and complicates the logic unnecessarily We can support other hooks later if a real use case justifies that. Most changes are inline and hard to trace, but a microbenchmark on __sk_mem_raise_allocated() during neper/tcp_stream showed that more samples completed faster with sk->sk_bypass_prot_mem == 1. This will be more visible under tcp_mem pressure (but it's not a fair comparison). # bpftrace -e 'kprobe:__sk_mem_raise_allocated { @start[tid] = nsecs; } kretprobe:__sk_mem_raise_allocated /@start[tid]/ { @end[tid] = nsecs - @start[tid]; @times = hist(@end[tid]); delete(@start[tid]); }' # tcp_stream -6 -F 1000 -N -T 256 Without bpf prog: [128, 256) 3846 | | [256, 512) 1505326 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| [512, 1K) 1371006 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | [1K, 2K) 198207 |@@@@@@ | [2K, 4K) 31199 |@ | With bpf prog in the next patch: (must be attached before tcp_stream) # bpftool prog load sk_bypass_prot_mem.bpf.o /sys/fs/bpf/test type cgroup/sock_create # bpftool cgroup attach /sys/fs/cgroup/test cgroup_inet_sock_create pinned /sys/fs/bpf/test [128, 256) 6413 | | [256, 512) 1868425 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| [512, 1K) 1101697 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | [1K, 2K) 117031 |@@@@ | [2K, 4K) 11773 | | Signed-off-by: Kuniyuki Iwashima Signed-off-by: Martin KaFai Lau Acked-by: Roman Gushchin Link: https://patch.msgid.link/20251014235604.3057003-6-kuniyu@google.com --- include/uapi/linux/bpf.h | 2 ++ net/core/filter.c | 37 ++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 1 + 3 files changed, 40 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6829936d33f58..6eb75ad900b13 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7200,6 +7200,8 @@ enum { TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */ TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */ SK_BPF_CB_FLAGS = 1009, /* Get or set sock ops flags in socket */ + SK_BPF_BYPASS_PROT_MEM = 1010, /* Get or Set sk->sk_bypass_prot_mem */ + }; enum { diff --git a/net/core/filter.c b/net/core/filter.c index ed3f0e5360595..16105f52927da 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5733,9 +5733,37 @@ static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = { .arg5_type = ARG_CONST_SIZE, }; +static int sk_bpf_set_get_bypass_prot_mem(struct sock *sk, + char *optval, int optlen, + bool getopt) +{ + int val; + + if (optlen != sizeof(int)) + return -EINVAL; + + if (!sk_has_account(sk)) + return -EOPNOTSUPP; + + if (getopt) { + *(int *)optval = sk->sk_bypass_prot_mem; + return 0; + } + + val = *(int *)optval; + if (val < 0 || val > 1) + return -EINVAL; + + sk->sk_bypass_prot_mem = val; + return 0; +} + BPF_CALL_5(bpf_sock_create_setsockopt, struct sock *, sk, int, level, int, optname, char *, optval, int, optlen) { + if (level == SOL_SOCKET && optname == SK_BPF_BYPASS_PROT_MEM) + return sk_bpf_set_get_bypass_prot_mem(sk, optval, optlen, false); + return __bpf_setsockopt(sk, level, optname, optval, optlen); } @@ -5753,6 +5781,15 @@ static const struct bpf_func_proto bpf_sock_create_setsockopt_proto = { BPF_CALL_5(bpf_sock_create_getsockopt, struct sock *, sk, int, level, int, optname, char *, optval, int, optlen) { + if (level == SOL_SOCKET && optname == SK_BPF_BYPASS_PROT_MEM) { + int err = sk_bpf_set_get_bypass_prot_mem(sk, optval, optlen, true); + + if (err) + memset(optval, 0, optlen); + + return err; + } + return __bpf_getsockopt(sk, level, optname, optval, optlen); } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6829936d33f58..9b17d937edf73 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -7200,6 +7200,7 @@ enum { TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */ TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */ SK_BPF_CB_FLAGS = 1009, /* Get or set sock ops flags in socket */ + SK_BPF_BYPASS_PROT_MEM = 1010, /* Get or Set sk->sk_bypass_prot_mem */ }; enum { From 5f941dd87b0a82dd690821c6e0f427db87a4453b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 14 Oct 2025 23:54:59 +0000 Subject: [PATCH 052/867] selftests/bpf: Add test for sk->sk_bypass_prot_mem. The test does the following for IPv4/IPv6 x TCP/UDP sockets with/without sk->sk_bypass_prot_mem, which can be turned on by net.core.bypass_prot_mem or bpf_setsockopt(SK_BPF_BYPASS_PROT_MEM). 1. Create socket pairs 2. Send NR_PAGES (32) of data (TCP consumes around 35 pages, and UDP consuems 66 pages due to skb overhead) 3. Read memory_allocated from sk->sk_prot->memory_allocated and sk->sk_prot->memory_per_cpu_fw_alloc 4. Check if unread data is charged to memory_allocated If sk->sk_bypass_prot_mem is set, memory_allocated should not be changed, but we allow a small error (up to 10 pages) in case other processes on the host use some amounts of TCP/UDP memory. The amount of allocated pages are buffered to per-cpu variable {tcp,udp}_memory_per_cpu_fw_alloc up to +/- net.core.mem_pcpu_rsv before reported to {tcp,udp}_memory_allocated. At 3., memory_allocated is calculated from the 2 variables at fentry of socket create function. We drain the receive queue only for UDP before close() because UDP recv queue is destroyed after RCU grace period. When I printed memory_allocated, UDP bypass cases sometimes saw the no-bypass case's leftover, but it's still in the small error range (<10 pages). bpf_trace_printk: memory_allocated: 0 <-- TCP no-bypass bpf_trace_printk: memory_allocated: 35 bpf_trace_printk: memory_allocated: 0 <-- TCP w/ sysctl bpf_trace_printk: memory_allocated: 0 bpf_trace_printk: memory_allocated: 0 <-- TCP w/ bpf bpf_trace_printk: memory_allocated: 0 bpf_trace_printk: memory_allocated: 0 <-- UDP no-bypass bpf_trace_printk: memory_allocated: 66 bpf_trace_printk: memory_allocated: 2 <-- UDP w/ sysctl (2 pages leftover) bpf_trace_printk: memory_allocated: 2 bpf_trace_printk: memory_allocated: 2 <-- UDP w/ bpf (2 pages leftover) bpf_trace_printk: memory_allocated: 2 We prefer finishing tests faster than oversleeping for call_rcu() + sk_destruct(). The test completes within 2s on QEMU (64 CPUs) w/ KVM. # time ./test_progs -t sk_bypass #371/1 sk_bypass_prot_mem/TCP :OK #371/2 sk_bypass_prot_mem/UDP :OK #371/3 sk_bypass_prot_mem/TCPv6:OK #371/4 sk_bypass_prot_mem/UDPv6:OK #371 sk_bypass_prot_mem:OK Summary: 1/4 PASSED, 0 SKIPPED, 0 FAILED real 0m1.481s user 0m0.181s sys 0m0.441s Signed-off-by: Kuniyuki Iwashima Signed-off-by: Martin KaFai Lau Acked-by: Roman Gushchin Link: https://patch.msgid.link/20251014235604.3057003-7-kuniyu@google.com --- .../bpf/prog_tests/sk_bypass_prot_mem.c | 292 ++++++++++++++++++ .../selftests/bpf/progs/sk_bypass_prot_mem.c | 104 +++++++ 2 files changed, 396 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c create mode 100644 tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c diff --git a/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c new file mode 100644 index 0000000000000..e4940583924b1 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2025 Google LLC */ + +#include +#include "sk_bypass_prot_mem.skel.h" +#include "network_helpers.h" + +#define NR_PAGES 32 +#define NR_SOCKETS 2 +#define BUF_TOTAL (NR_PAGES * 4096 / NR_SOCKETS) +#define BUF_SINGLE 1024 +#define NR_SEND (BUF_TOTAL / BUF_SINGLE) + +struct test_case { + char name[8]; + int family; + int type; + int (*create_sockets)(struct test_case *test_case, int sk[], int len); + long (*get_memory_allocated)(struct test_case *test_case, struct sk_bypass_prot_mem *skel); +}; + +static int tcp_create_sockets(struct test_case *test_case, int sk[], int len) +{ + int server, i, err = 0; + + server = start_server(test_case->family, test_case->type, NULL, 0, 0); + if (!ASSERT_GE(server, 0, "start_server_str")) + return server; + + /* Keep for-loop so we can change NR_SOCKETS easily. */ + for (i = 0; i < len; i += 2) { + sk[i] = connect_to_fd(server, 0); + if (sk[i] < 0) { + ASSERT_GE(sk[i], 0, "connect_to_fd"); + err = sk[i]; + break; + } + + sk[i + 1] = accept(server, NULL, NULL); + if (sk[i + 1] < 0) { + ASSERT_GE(sk[i + 1], 0, "accept"); + err = sk[i + 1]; + break; + } + } + + close(server); + + return err; +} + +static int udp_create_sockets(struct test_case *test_case, int sk[], int len) +{ + int i, j, err, rcvbuf = BUF_TOTAL; + + /* Keep for-loop so we can change NR_SOCKETS easily. */ + for (i = 0; i < len; i += 2) { + sk[i] = start_server(test_case->family, test_case->type, NULL, 0, 0); + if (sk[i] < 0) { + ASSERT_GE(sk[i], 0, "start_server"); + return sk[i]; + } + + sk[i + 1] = connect_to_fd(sk[i], 0); + if (sk[i + 1] < 0) { + ASSERT_GE(sk[i + 1], 0, "connect_to_fd"); + return sk[i + 1]; + } + + err = connect_fd_to_fd(sk[i], sk[i + 1], 0); + if (err) { + ASSERT_EQ(err, 0, "connect_fd_to_fd"); + return err; + } + + for (j = 0; j < 2; j++) { + err = setsockopt(sk[i + j], SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(int)); + if (err) { + ASSERT_EQ(err, 0, "setsockopt(SO_RCVBUF)"); + return err; + } + } + } + + return 0; +} + +static long get_memory_allocated(struct test_case *test_case, + bool *activated, long *memory_allocated) +{ + int sk; + + *activated = true; + + /* AF_INET and AF_INET6 share the same memory_allocated. + * tcp_init_sock() is called by AF_INET and AF_INET6, + * but udp_lib_init_sock() is inline. + */ + sk = socket(AF_INET, test_case->type, 0); + if (!ASSERT_GE(sk, 0, "get_memory_allocated")) + return -1; + + close(sk); + + return *memory_allocated; +} + +static long tcp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel) +{ + return get_memory_allocated(test_case, + &skel->bss->tcp_activated, + &skel->bss->tcp_memory_allocated); +} + +static long udp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel) +{ + return get_memory_allocated(test_case, + &skel->bss->udp_activated, + &skel->bss->udp_memory_allocated); +} + +static int check_bypass(struct test_case *test_case, + struct sk_bypass_prot_mem *skel, bool bypass) +{ + char buf[BUF_SINGLE] = {}; + long memory_allocated[2]; + int sk[NR_SOCKETS]; + int err, i, j; + + for (i = 0; i < ARRAY_SIZE(sk); i++) + sk[i] = -1; + + err = test_case->create_sockets(test_case, sk, ARRAY_SIZE(sk)); + if (err) + goto close; + + memory_allocated[0] = test_case->get_memory_allocated(test_case, skel); + + /* allocate pages >= NR_PAGES */ + for (i = 0; i < ARRAY_SIZE(sk); i++) { + for (j = 0; j < NR_SEND; j++) { + int bytes = send(sk[i], buf, sizeof(buf), 0); + + /* Avoid too noisy logs when something failed. */ + if (bytes != sizeof(buf)) { + ASSERT_EQ(bytes, sizeof(buf), "send"); + if (bytes < 0) { + err = bytes; + goto drain; + } + } + } + } + + memory_allocated[1] = test_case->get_memory_allocated(test_case, skel); + + if (bypass) + ASSERT_LE(memory_allocated[1], memory_allocated[0] + 10, "bypass"); + else + ASSERT_GT(memory_allocated[1], memory_allocated[0] + NR_PAGES, "no bypass"); + +drain: + if (test_case->type == SOCK_DGRAM) { + /* UDP starts purging sk->sk_receive_queue after one RCU + * grace period, then udp_memory_allocated goes down, + * so drain the queue before close(). + */ + for (i = 0; i < ARRAY_SIZE(sk); i++) { + for (j = 0; j < NR_SEND; j++) { + int bytes = recv(sk[i], buf, 1, MSG_DONTWAIT | MSG_TRUNC); + + if (bytes == sizeof(buf)) + continue; + if (bytes != -1 || errno != EAGAIN) + PRINT_FAIL("bytes: %d, errno: %s\n", bytes, strerror(errno)); + break; + } + } + } + +close: + for (i = 0; i < ARRAY_SIZE(sk); i++) { + if (sk[i] < 0) + break; + + close(sk[i]); + } + + return err; +} + +static void run_test(struct test_case *test_case) +{ + struct sk_bypass_prot_mem *skel; + struct nstoken *nstoken; + int cgroup, err; + + skel = sk_bypass_prot_mem__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + skel->bss->nr_cpus = libbpf_num_possible_cpus(); + + err = sk_bypass_prot_mem__attach(skel); + if (!ASSERT_OK(err, "attach")) + goto destroy_skel; + + cgroup = test__join_cgroup("/sk_bypass_prot_mem"); + if (!ASSERT_GE(cgroup, 0, "join_cgroup")) + goto destroy_skel; + + err = make_netns("sk_bypass_prot_mem"); + if (!ASSERT_EQ(err, 0, "make_netns")) + goto close_cgroup; + + nstoken = open_netns("sk_bypass_prot_mem"); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto remove_netns; + + err = check_bypass(test_case, skel, false); + if (!ASSERT_EQ(err, 0, "test_bypass(false)")) + goto close_netns; + + err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "1"); + if (!ASSERT_EQ(err, 0, "write_sysctl(1)")) + goto close_netns; + + err = check_bypass(test_case, skel, true); + if (!ASSERT_EQ(err, 0, "test_bypass(true by sysctl)")) + goto close_netns; + + err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "0"); + if (!ASSERT_EQ(err, 0, "write_sysctl(0)")) + goto close_netns; + + skel->links.sock_create = bpf_program__attach_cgroup(skel->progs.sock_create, cgroup); + if (!ASSERT_OK_PTR(skel->links.sock_create, "attach_cgroup(sock_create)")) + goto close_netns; + + err = check_bypass(test_case, skel, true); + ASSERT_EQ(err, 0, "test_bypass(true by bpf)"); + +close_netns: + close_netns(nstoken); +remove_netns: + remove_netns("sk_bypass_prot_mem"); +close_cgroup: + close(cgroup); +destroy_skel: + sk_bypass_prot_mem__destroy(skel); +} + +static struct test_case test_cases[] = { + { + .name = "TCP ", + .family = AF_INET, + .type = SOCK_STREAM, + .create_sockets = tcp_create_sockets, + .get_memory_allocated = tcp_get_memory_allocated, + }, + { + .name = "UDP ", + .family = AF_INET, + .type = SOCK_DGRAM, + .create_sockets = udp_create_sockets, + .get_memory_allocated = udp_get_memory_allocated, + }, + { + .name = "TCPv6", + .family = AF_INET6, + .type = SOCK_STREAM, + .create_sockets = tcp_create_sockets, + .get_memory_allocated = tcp_get_memory_allocated, + }, + { + .name = "UDPv6", + .family = AF_INET6, + .type = SOCK_DGRAM, + .create_sockets = udp_create_sockets, + .get_memory_allocated = udp_get_memory_allocated, + }, +}; + +void serial_test_sk_bypass_prot_mem(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { + if (test__start_subtest(test_cases[i].name)) + run_test(&test_cases[i]); + } +} diff --git a/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c b/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c new file mode 100644 index 0000000000000..09a00d11ffcc4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2025 Google LLC */ + +#include "bpf_tracing_net.h" +#include +#include +#include + +extern int tcp_memory_per_cpu_fw_alloc __ksym; +extern int udp_memory_per_cpu_fw_alloc __ksym; + +int nr_cpus; +bool tcp_activated, udp_activated; +long tcp_memory_allocated, udp_memory_allocated; + +struct sk_prot { + long *memory_allocated; + int *memory_per_cpu_fw_alloc; +}; + +static int drain_memory_per_cpu_fw_alloc(__u32 i, struct sk_prot *sk_prot_ctx) +{ + int *memory_per_cpu_fw_alloc; + + memory_per_cpu_fw_alloc = bpf_per_cpu_ptr(sk_prot_ctx->memory_per_cpu_fw_alloc, i); + if (memory_per_cpu_fw_alloc) + *sk_prot_ctx->memory_allocated += *memory_per_cpu_fw_alloc; + + return 0; +} + +static long get_memory_allocated(struct sock *_sk, int *memory_per_cpu_fw_alloc) +{ + struct sock *sk = bpf_core_cast(_sk, struct sock); + struct sk_prot sk_prot_ctx; + long memory_allocated; + + /* net_aligned_data.{tcp,udp}_memory_allocated was not available. */ + memory_allocated = sk->__sk_common.skc_prot->memory_allocated->counter; + + sk_prot_ctx.memory_allocated = &memory_allocated; + sk_prot_ctx.memory_per_cpu_fw_alloc = memory_per_cpu_fw_alloc; + + bpf_loop(nr_cpus, drain_memory_per_cpu_fw_alloc, &sk_prot_ctx, 0); + + return memory_allocated; +} + +static void fentry_init_sock(struct sock *sk, bool *activated, + long *memory_allocated, int *memory_per_cpu_fw_alloc) +{ + if (!*activated) + return; + + *memory_allocated = get_memory_allocated(sk, memory_per_cpu_fw_alloc); + *activated = false; +} + +SEC("fentry/tcp_init_sock") +int BPF_PROG(fentry_tcp_init_sock, struct sock *sk) +{ + fentry_init_sock(sk, &tcp_activated, + &tcp_memory_allocated, &tcp_memory_per_cpu_fw_alloc); + return 0; +} + +SEC("fentry/udp_init_sock") +int BPF_PROG(fentry_udp_init_sock, struct sock *sk) +{ + fentry_init_sock(sk, &udp_activated, + &udp_memory_allocated, &udp_memory_per_cpu_fw_alloc); + return 0; +} + +SEC("cgroup/sock_create") +int sock_create(struct bpf_sock *ctx) +{ + int err, val = 1; + + err = bpf_setsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM, + &val, sizeof(val)); + if (err) + goto err; + + val = 0; + + err = bpf_getsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM, + &val, sizeof(val)); + if (err) + goto err; + + if (val != 1) { + err = -EINVAL; + goto err; + } + + return 1; + +err: + bpf_set_retval(err); + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; From bd853a59a87ecf9fc060dd54b10a3a94e9786182 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Tue, 14 Oct 2025 19:57:43 -0700 Subject: [PATCH 053/867] net: amd-xgbe: use EOPNOTSUPP instead of ENOTSUPP in xgbe_phy_mii_read_c45 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MDIO read callback xgbe_phy_mii_read_c45() can propagate its return value up through phylink_mii_ioctl() to user space via netdev ioctls such as SIOCGMIIREG. Returning ENOTSUPP results in user space seeing "Unknown error", since ENOTSUPP is not a standard errno value. Replace ENOTSUPP with EOPNOTSUPP to align with the MDIO core’s usage and ensure user space receives a proper "Operation not supported" error instead of an unknown code. Signed-off-by: Alok Tiwari Reviewed-by: Simon Horman Acked-by: Shyam Sundar S K Link: https://patch.msgid.link/20251015025751.1532149-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index a56efc1bee339..35a381a83647d 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -668,7 +668,7 @@ static int xgbe_phy_mii_read_c45(struct mii_bus *mii, int addr, int devad, else if (phy_data->conn_type & XGBE_CONN_TYPE_MDIO) ret = xgbe_phy_mdio_mii_read_c45(pdata, addr, devad, reg); else - ret = -ENOTSUPP; + ret = -EOPNOTSUPP; xgbe_phy_put_comm_ownership(pdata); From 38c31c2620de4e570539a2a461eb62d0e7e692f7 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 15 Oct 2025 08:36:49 +0000 Subject: [PATCH 054/867] netdevsim: add ipsec hw_features Currently, netdevsim only sets dev->features, which makes the ESP features fixed. For example: # ethtool -k eni0np1 | grep esp tx-esp-segmentation: on [fixed] esp-hw-offload: on [fixed] esp-tx-csum-hw-offload: on [fixed] This patch adds the ESP features to hw_features, allowing them to be changed manually. For example: # ethtool -k eni0np1 | grep esp tx-esp-segmentation: on esp-hw-offload: on esp-tx-csum-hw-offload: on Suggested-by: Sabrina Dubroca Signed-off-by: Hangbin Liu Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20251015083649.54744-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/ipsec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/netdevsim/ipsec.c b/drivers/net/netdevsim/ipsec.c index 47cdee5577d46..36a1be4923d61 100644 --- a/drivers/net/netdevsim/ipsec.c +++ b/drivers/net/netdevsim/ipsec.c @@ -277,6 +277,7 @@ void nsim_ipsec_init(struct netdevsim *ns) NETIF_F_GSO_ESP) ns->netdev->features |= NSIM_ESP_FEATURES; + ns->netdev->hw_features |= NSIM_ESP_FEATURES; ns->netdev->hw_enc_features |= NSIM_ESP_FEATURES; ns->ipsec.pfile = debugfs_create_file("ipsec", 0400, From e1048520750dd9369ec97554ab308ff1ff932ec6 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Wed, 15 Oct 2025 02:01:17 -0700 Subject: [PATCH 055/867] Octeontx2-af: Fix pci_alloc_irq_vectors() return value check In cgx_probe() when pci_alloc_irq_vectors() fails the error value will be negative and that check is sufficient. err = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_MSIX); if (err < 0 || err != nvec) { ... } When pci_alloc_irq_vectors() fail to allocate nvec number of vectors, -ENOSPC is returned, so it would be safe to remove the check that compares err with nvec. Suggested-by: Paolo Abeni Signed-off-by: Harshit Mogalapalli Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251015090117.1557870-1-harshit.m.mogalapalli@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index ec0e11c77cbf2..42044cd810b1f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -1994,7 +1994,7 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) nvec = pci_msix_vec_count(cgx->pdev); err = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_MSIX); - if (err < 0 || err != nvec) { + if (err < 0) { dev_err(dev, "Request for %d msix vectors failed, err %d\n", nvec, err); goto err_release_regions; From f18c231fb12a9662bb9ebd7613e8f71fc497baf1 Mon Sep 17 00:00:00 2001 From: Jan Vaclav Date: Wed, 15 Oct 2025 12:10:02 +0200 Subject: [PATCH 056/867] net/hsr: add interlink to fill_info output Currently, it is possible to configure the interlink port, but no way to read it back from userspace. Add it to the output of hsr_fill_info(), so it can be read from userspace, for example: $ ip -d link show hsr0 12: hsr0: mtu ... ... hsr slave1 veth0 slave2 veth1 interlink veth2 ... Signed-off-by: Jan Vaclav Reviewed-by: Fernando Fernandez Mancera Link: https://patch.msgid.link/20251015101001.25670-2-jvaclav@redhat.com Signed-off-by: Jakub Kicinski --- net/hsr/hsr_netlink.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 4461adf696234..8511871307552 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -160,6 +160,12 @@ static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev) goto nla_put_failure; } + port = hsr_port_get_hsr(hsr, HSR_PT_INTERLINK); + if (port) { + if (nla_put_u32(skb, IFLA_HSR_INTERLINK, port->dev->ifindex)) + goto nla_put_failure; + } + if (nla_put(skb, IFLA_HSR_SUPERVISION_ADDR, ETH_ALEN, hsr->sup_multicast_addr) || nla_put_u16(skb, IFLA_HSR_SEQ_NR, hsr->sequence_nr)) From 12a7b7bc14273e19a57a49e0ab92e7eb635de8a4 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 15 Oct 2025 17:10:41 +0100 Subject: [PATCH 057/867] net: stmmac: dwc-qos-eth: move MDIO bus locking into stmmac_mdio Rather than dwc-qos-eth manipulating the MDIO bus lock directly, add helpers to the stmmac MDIO layer and use them in dwc-qos-eth. This improves my commit 87f43e6f06a2 ("net: stmmac: dwc-qos: calibrate tegra with mdio bus idle"). Signed-off-by: Russell King (Oracle) Reviewed-by: Maxime Chevallier Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/E1v945J-0000000AmeJ-1GOb@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c | 4 ++-- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 2 ++ drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 14 ++++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index e8539cad4602e..f1c2e35badf72 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -162,7 +162,7 @@ static void tegra_eqos_fix_speed(void *bsp_priv, int speed, unsigned int mode) priv = netdev_priv(dev_get_drvdata(eqos->dev)); /* Calibration should be done with the MDIO bus idle */ - mutex_lock(&priv->mii->mdio_lock); + stmmac_mdio_lock(priv); /* calibrate */ value = readl(eqos->regs + SDMEMCOMPPADCTRL); @@ -198,7 +198,7 @@ static void tegra_eqos_fix_speed(void *bsp_priv, int speed, unsigned int mode) value &= ~SDMEMCOMPPADCTRL_PAD_E_INPUT_OR_E_PWRD; writel(value, eqos->regs + SDMEMCOMPPADCTRL); - mutex_unlock(&priv->mii->mdio_lock); + stmmac_mdio_unlock(priv); } else { value = readl(eqos->regs + AUTO_CAL_CONFIG); value &= ~AUTO_CAL_CONFIG_ENABLE; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 7ca5477be390b..ec8bddc1c37fe 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -388,6 +388,8 @@ static inline bool stmmac_wol_enabled_phy(struct stmmac_priv *priv) int stmmac_mdio_unregister(struct net_device *ndev); int stmmac_mdio_register(struct net_device *ndev); int stmmac_mdio_reset(struct mii_bus *mii); +void stmmac_mdio_lock(struct stmmac_priv *priv); +void stmmac_mdio_unlock(struct stmmac_priv *priv); int stmmac_pcs_setup(struct net_device *ndev); void stmmac_pcs_clean(struct net_device *ndev); void stmmac_set_ethtool_ops(struct net_device *netdev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index f408737f6fc73..d62b2870899d5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -734,3 +734,17 @@ int stmmac_mdio_unregister(struct net_device *ndev) return 0; } + +void stmmac_mdio_lock(struct stmmac_priv *priv) +{ + if (priv->mii) + mutex_lock(&priv->mii->mdio_lock); +} +EXPORT_SYMBOL_GPL(stmmac_mdio_lock); + +void stmmac_mdio_unlock(struct stmmac_priv *priv) +{ + if (priv->mii) + mutex_unlock(&priv->mii->mdio_lock); +} +EXPORT_SYMBOL_GPL(stmmac_mdio_unlock); From 0bc832a54d274c15c54f2a55995a485e490309b9 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 15 Oct 2025 17:10:46 +0100 Subject: [PATCH 058/867] net: stmmac: place .mac_finish() method more appropriately Place the .mac_finish() initialiser and implementation after the .mac_config() initialiser and method which reflects the order that they appear in struct phylink_mac_ops, and the order in which they are called. This keeps logically similar code together. Signed-off-by: Russell King (Oracle) Reviewed-by: Maxime Chevallier Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/E1v945O-0000000AmeP-1k0t@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 650d75b73e0b0..3728afa701c6e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -859,6 +859,18 @@ static void stmmac_mac_config(struct phylink_config *config, unsigned int mode, /* Nothing to do, xpcs_config() handles everything */ } +static int stmmac_mac_finish(struct phylink_config *config, unsigned int mode, + phy_interface_t interface) +{ + struct net_device *ndev = to_net_dev(config->dev); + struct stmmac_priv *priv = netdev_priv(ndev); + + if (priv->plat->mac_finish) + priv->plat->mac_finish(ndev, priv->plat->bsp_priv, mode, interface); + + return 0; +} + static void stmmac_mac_link_down(struct phylink_config *config, unsigned int mode, phy_interface_t interface) { @@ -1053,27 +1065,15 @@ static int stmmac_mac_enable_tx_lpi(struct phylink_config *config, u32 timer, return 0; } -static int stmmac_mac_finish(struct phylink_config *config, unsigned int mode, - phy_interface_t interface) -{ - struct net_device *ndev = to_net_dev(config->dev); - struct stmmac_priv *priv = netdev_priv(ndev); - - if (priv->plat->mac_finish) - priv->plat->mac_finish(ndev, priv->plat->bsp_priv, mode, interface); - - return 0; -} - static const struct phylink_mac_ops stmmac_phylink_mac_ops = { .mac_get_caps = stmmac_mac_get_caps, .mac_select_pcs = stmmac_mac_select_pcs, .mac_config = stmmac_mac_config, + .mac_finish = stmmac_mac_finish, .mac_link_down = stmmac_mac_link_down, .mac_link_up = stmmac_mac_link_up, .mac_disable_tx_lpi = stmmac_mac_disable_tx_lpi, .mac_enable_tx_lpi = stmmac_mac_enable_tx_lpi, - .mac_finish = stmmac_mac_finish, }; /** From e82c64be9b45a1fec556c8fbd35f30ccaf852b6a Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 15 Oct 2025 17:10:51 +0100 Subject: [PATCH 059/867] net: stmmac: avoid PHY speed change when configuring MTU There is no need to do the speed-down, speed-up dance when changing the MTU as there is little power saving that can be gained from such a brief interval between these, and the autonegotiation they cause takes much longer. Move the calls to phylink_speed_up() and phylink_speed_down() into stmmac_open() and stmmac_release() respectively, reducing the work done in the __-variants of these functions. Signed-off-by: Russell King (Oracle) Reviewed-by: Maxime Chevallier Reviewed-by: Gatien Chevallier Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/E1v945T-0000000AmeV-2BvU@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 3728afa701c6e..500cfd19e6b5f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3963,8 +3963,6 @@ static int __stmmac_open(struct net_device *dev, stmmac_init_coalesce(priv); phylink_start(priv->phylink); - /* We may have called phylink_speed_down before */ - phylink_speed_up(priv->phylink); ret = stmmac_request_irq(dev); if (ret) @@ -4015,6 +4013,9 @@ static int stmmac_open(struct net_device *dev) kfree(dma_conf); + /* We may have called phylink_speed_down before */ + phylink_speed_up(priv->phylink); + return ret; err_disconnect_phy: @@ -4032,13 +4033,6 @@ static void __stmmac_release(struct net_device *dev) struct stmmac_priv *priv = netdev_priv(dev); u32 chan; - /* If the PHY or MAC has WoL enabled, then the PHY will not be - * suspended when phylink_stop() is called below. Set the PHY - * to its slowest speed to save power. - */ - if (device_may_wakeup(priv->device)) - phylink_speed_down(priv->phylink, false); - /* Stop and disconnect the PHY */ phylink_stop(priv->phylink); @@ -4078,6 +4072,13 @@ static int stmmac_release(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); + /* If the PHY or MAC has WoL enabled, then the PHY will not be + * suspended when phylink_stop() is called below. Set the PHY + * to its slowest speed to save power. + */ + if (device_may_wakeup(priv->device)) + phylink_speed_down(priv->phylink, false); + __stmmac_release(dev); phylink_disconnect_phy(priv->phylink); From 07d91ec99a8a37d9b9f6d8ec3f16b97622a74893 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 15 Oct 2025 17:10:56 +0100 Subject: [PATCH 060/867] net: stmmac: rearrange tc_init() To make future changes easier, rearrange the use of dma_cap->l3l4fnum vs priv->flow_entries_max. Always initialise priv->flow_entries_max from dma_cap->l3l4fnum, then use priv->flow_entries_max to determine whether we allocate priv->flow_entries and set it up. This change is safe because tc_init() is only called once from stmmac_dvr_probe(). Signed-off-by: Russell King (Oracle) Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/E1v945Y-0000000Ameb-2gDI@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 97e89a604abd7..ef65cf511f3e2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -262,10 +262,10 @@ static int tc_init(struct stmmac_priv *priv) unsigned int count; int ret, i; - if (dma_cap->l3l4fnum) { - priv->flow_entries_max = dma_cap->l3l4fnum; + priv->flow_entries_max = dma_cap->l3l4fnum; + if (priv->flow_entries_max) { priv->flow_entries = devm_kcalloc(priv->device, - dma_cap->l3l4fnum, + priv->flow_entries_max, sizeof(*priv->flow_entries), GFP_KERNEL); if (!priv->flow_entries) From 4a4094ba7ad2563a96f0482db53d042b695892b0 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 15 Oct 2025 17:11:01 +0100 Subject: [PATCH 061/867] net: stmmac: rename stmmac_phy_setup() to include phylink stmmac_phy_setup() does not set up any PHY, but does setup phylink. Rename this function to stmmac_phylink_setup() to reflect more what it is doing. Signed-off-by: Russell King (Oracle) Reviewed-by: Maxime Chevallier Reviewed-by: Gatien Chevallier Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/E1v945d-0000000Ameh-3Bs7@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 500cfd19e6b5f..c9fa965c85660 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1185,7 +1185,7 @@ static int stmmac_init_phy(struct net_device *dev) return 0; } -static int stmmac_phy_setup(struct stmmac_priv *priv) +static int stmmac_phylink_setup(struct stmmac_priv *priv) { struct stmmac_mdio_bus_data *mdio_bus_data; struct phylink_config *config; @@ -7642,7 +7642,7 @@ int stmmac_dvr_probe(struct device *device, if (ret) goto error_pcs_setup; - ret = stmmac_phy_setup(priv); + ret = stmmac_phylink_setup(priv); if (ret) { netdev_err(ndev, "failed to setup phy (%d)\n", ret); goto error_phy_setup; From 888bd0eca93c8c4ef4510d6b21bc43679e256167 Mon Sep 17 00:00:00 2001 From: Shangjuan Wei Date: Wed, 15 Oct 2025 19:40:41 +0800 Subject: [PATCH 062/867] dt-bindings: ethernet: eswin: Document for EIC7700 SoC Add ESWIN EIC7700 Ethernet controller, supporting clock configuration, delay adjustment and speed adaptive functions. Signed-off-by: Zhi Li Signed-off-by: Shangjuan Wei Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20251015114041.1166-1-weishangjuan@eswincomputing.com Signed-off-by: Jakub Kicinski --- .../bindings/net/eswin,eic7700-eth.yaml | 127 ++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml diff --git a/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml b/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml new file mode 100644 index 0000000000000..9ddbfe219ae2e --- /dev/null +++ b/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml @@ -0,0 +1,127 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/eswin,eic7700-eth.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Eswin EIC7700 SOC Eth Controller + +maintainers: + - Shuang Liang + - Zhi Li + - Shangjuan Wei + +description: + Platform glue layer implementation for STMMAC Ethernet driver. + +select: + properties: + compatible: + contains: + enum: + - eswin,eic7700-qos-eth + required: + - compatible + +allOf: + - $ref: snps,dwmac.yaml# + +properties: + compatible: + items: + - const: eswin,eic7700-qos-eth + - const: snps,dwmac-5.20 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + interrupt-names: + const: macirq + + clocks: + items: + - description: AXI clock + - description: Configuration clock + - description: GMAC main clock + - description: Tx clock + + clock-names: + items: + - const: axi + - const: cfg + - const: stmmaceth + - const: tx + + resets: + maxItems: 1 + + reset-names: + items: + - const: stmmaceth + + rx-internal-delay-ps: + enum: [0, 200, 600, 1200, 1600, 1800, 2000, 2200, 2400] + + tx-internal-delay-ps: + enum: [0, 200, 600, 1200, 1600, 1800, 2000, 2200, 2400] + + eswin,hsp-sp-csr: + $ref: /schemas/types.yaml#/definitions/phandle-array + items: + - description: Phandle to HSP(High-Speed Peripheral) device + - description: Offset of phy control register for internal + or external clock selection + - description: Offset of AXI clock controller Low-Power request + register + - description: Offset of register controlling TX/RX clock delay + description: | + High-Speed Peripheral device needed to configure clock selection, + clock low-power mode and clock delay. + +required: + - compatible + - reg + - clocks + - clock-names + - interrupts + - interrupt-names + - phy-mode + - resets + - reset-names + - rx-internal-delay-ps + - tx-internal-delay-ps + - eswin,hsp-sp-csr + +unevaluatedProperties: false + +examples: + - | + ethernet@50400000 { + compatible = "eswin,eic7700-qos-eth", "snps,dwmac-5.20"; + reg = <0x50400000 0x10000>; + clocks = <&d0_clock 186>, <&d0_clock 171>, <&d0_clock 40>, + <&d0_clock 193>; + clock-names = "axi", "cfg", "stmmaceth", "tx"; + interrupt-parent = <&plic>; + interrupts = <61>; + interrupt-names = "macirq"; + phy-mode = "rgmii-id"; + phy-handle = <&phy0>; + resets = <&reset 95>; + reset-names = "stmmaceth"; + rx-internal-delay-ps = <200>; + tx-internal-delay-ps = <200>; + eswin,hsp-sp-csr = <&hsp_sp_csr 0x100 0x108 0x118>; + snps,axi-config = <&stmmac_axi_setup>; + snps,aal; + snps,fixed-burst; + snps,tso; + stmmac_axi_setup: stmmac-axi-config { + snps,blen = <0 0 0 0 16 8 4>; + snps,rd_osr_lmt = <2>; + snps,wr_osr_lmt = <2>; + }; + }; From ea77dbbdbc4e0f95ad12d4a9179e1ca785f2cd2c Mon Sep 17 00:00:00 2001 From: Shangjuan Wei Date: Wed, 15 Oct 2025 19:41:01 +0800 Subject: [PATCH 063/867] net: stmmac: add Eswin EIC7700 glue driver Add Ethernet controller support for Eswin's eic7700 SoC. The driver implements hardware initialization, clock configuration, delay adjustment functions based on DWC Ethernet controller, and supports device tree configuration and platform driver integration. Signed-off-by: Zhi Li Signed-off-by: Shangjuan Wei Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251015114101.1218-1-weishangjuan@eswincomputing.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 9 + drivers/net/ethernet/stmicro/stmmac/Makefile | 1 + .../ethernet/stmicro/stmmac/dwmac-eic7700.c | 235 ++++++++++++++++++ 3 files changed, 245 insertions(+) create mode 100644 drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 9507131875b2c..716daa51df7e5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -67,6 +67,15 @@ config DWMAC_ANARION This selects the Anarion SoC glue layer support for the stmmac driver. +config DWMAC_EIC7700 + tristate "Support for Eswin eic7700 ethernet driver" + depends on OF && HAS_DMA && ARCH_ESWIN || COMPILE_TEST + help + This driver supports the Eswin EIC7700 Ethernet controller, + which integrates Synopsys DesignWare QoS features. It enables + high-speed networking with DMA acceleration and is optimized + for embedded systems. + config DWMAC_INGENIC tristate "Ingenic MAC support" default MACH_INGENIC diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 51e068e26ce49..ec56bcf2db621 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -14,6 +14,7 @@ stmmac-$(CONFIG_STMMAC_SELFTESTS) += stmmac_selftests.o # Ordering matters. Generic driver must be last. obj-$(CONFIG_STMMAC_PLATFORM) += stmmac-platform.o obj-$(CONFIG_DWMAC_ANARION) += dwmac-anarion.o +obj-$(CONFIG_DWMAC_EIC7700) += dwmac-eic7700.o obj-$(CONFIG_DWMAC_INGENIC) += dwmac-ingenic.o obj-$(CONFIG_DWMAC_IPQ806X) += dwmac-ipq806x.o obj-$(CONFIG_DWMAC_LPC18XX) += dwmac-lpc18xx.o diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c new file mode 100644 index 0000000000000..1dcf2037001ef --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Eswin DWC Ethernet linux driver + * + * Copyright 2025, Beijing ESWIN Computing Technology Co., Ltd. + * + * Authors: + * Zhi Li + * Shuang Liang + * Shangjuan Wei + */ + +#include +#include +#include +#include +#include +#include + +#include "stmmac_platform.h" + +/* eth_phy_ctrl_offset eth0:0x100 */ +#define EIC7700_ETH_TX_CLK_SEL BIT(16) +#define EIC7700_ETH_PHY_INTF_SELI BIT(0) + +/* eth_axi_lp_ctrl_offset eth0:0x108 */ +#define EIC7700_ETH_CSYSREQ_VAL BIT(0) + +/* + * TX/RX Clock Delay Bit Masks: + * - TX Delay: bits [14:8] — TX_CLK delay (unit: 0.1ns per bit) + * - RX Delay: bits [30:24] — RX_CLK delay (unit: 0.1ns per bit) + */ +#define EIC7700_ETH_TX_ADJ_DELAY GENMASK(14, 8) +#define EIC7700_ETH_RX_ADJ_DELAY GENMASK(30, 24) + +#define EIC7700_MAX_DELAY_UNIT 0x7F + +static const char * const eic7700_clk_names[] = { + "tx", "axi", "cfg", +}; + +struct eic7700_qos_priv { + struct plat_stmmacenet_data *plat_dat; +}; + +static int eic7700_clks_config(void *priv, bool enabled) +{ + struct eic7700_qos_priv *dwc = (struct eic7700_qos_priv *)priv; + struct plat_stmmacenet_data *plat = dwc->plat_dat; + int ret = 0; + + if (enabled) + ret = clk_bulk_prepare_enable(plat->num_clks, plat->clks); + else + clk_bulk_disable_unprepare(plat->num_clks, plat->clks); + + return ret; +} + +static int eic7700_dwmac_init(struct platform_device *pdev, void *priv) +{ + struct eic7700_qos_priv *dwc = priv; + + return eic7700_clks_config(dwc, true); +} + +static void eic7700_dwmac_exit(struct platform_device *pdev, void *priv) +{ + struct eic7700_qos_priv *dwc = priv; + + eic7700_clks_config(dwc, false); +} + +static int eic7700_dwmac_suspend(struct device *dev, void *priv) +{ + return pm_runtime_force_suspend(dev); +} + +static int eic7700_dwmac_resume(struct device *dev, void *priv) +{ + int ret; + + ret = pm_runtime_force_resume(dev); + if (ret) + dev_err(dev, "%s failed: %d\n", __func__, ret); + + return ret; +} + +static int eic7700_dwmac_probe(struct platform_device *pdev) +{ + struct plat_stmmacenet_data *plat_dat; + struct stmmac_resources stmmac_res; + struct eic7700_qos_priv *dwc_priv; + struct regmap *eic7700_hsp_regmap; + u32 eth_axi_lp_ctrl_offset; + u32 eth_phy_ctrl_offset; + u32 eth_phy_ctrl_regset; + u32 eth_rxd_dly_offset; + u32 eth_dly_param = 0; + u32 delay_ps; + int i, ret; + + ret = stmmac_get_platform_resources(pdev, &stmmac_res); + if (ret) + return dev_err_probe(&pdev->dev, ret, + "failed to get resources\n"); + + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); + if (IS_ERR(plat_dat)) + return dev_err_probe(&pdev->dev, PTR_ERR(plat_dat), + "dt configuration failed\n"); + + dwc_priv = devm_kzalloc(&pdev->dev, sizeof(*dwc_priv), GFP_KERNEL); + if (!dwc_priv) + return -ENOMEM; + + /* Read rx-internal-delay-ps and update rx_clk delay */ + if (!of_property_read_u32(pdev->dev.of_node, + "rx-internal-delay-ps", &delay_ps)) { + u32 val = min(delay_ps / 100, EIC7700_MAX_DELAY_UNIT); + + eth_dly_param &= ~EIC7700_ETH_RX_ADJ_DELAY; + eth_dly_param |= FIELD_PREP(EIC7700_ETH_RX_ADJ_DELAY, val); + } else { + return dev_err_probe(&pdev->dev, -EINVAL, + "missing required property rx-internal-delay-ps\n"); + } + + /* Read tx-internal-delay-ps and update tx_clk delay */ + if (!of_property_read_u32(pdev->dev.of_node, + "tx-internal-delay-ps", &delay_ps)) { + u32 val = min(delay_ps / 100, EIC7700_MAX_DELAY_UNIT); + + eth_dly_param &= ~EIC7700_ETH_TX_ADJ_DELAY; + eth_dly_param |= FIELD_PREP(EIC7700_ETH_TX_ADJ_DELAY, val); + } else { + return dev_err_probe(&pdev->dev, -EINVAL, + "missing required property tx-internal-delay-ps\n"); + } + + eic7700_hsp_regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "eswin,hsp-sp-csr"); + if (IS_ERR(eic7700_hsp_regmap)) + return dev_err_probe(&pdev->dev, + PTR_ERR(eic7700_hsp_regmap), + "Failed to get hsp-sp-csr regmap\n"); + + ret = of_property_read_u32_index(pdev->dev.of_node, + "eswin,hsp-sp-csr", + 1, ð_phy_ctrl_offset); + if (ret) + return dev_err_probe(&pdev->dev, ret, + "can't get eth_phy_ctrl_offset\n"); + + regmap_read(eic7700_hsp_regmap, eth_phy_ctrl_offset, + ð_phy_ctrl_regset); + eth_phy_ctrl_regset |= + (EIC7700_ETH_TX_CLK_SEL | EIC7700_ETH_PHY_INTF_SELI); + regmap_write(eic7700_hsp_regmap, eth_phy_ctrl_offset, + eth_phy_ctrl_regset); + + ret = of_property_read_u32_index(pdev->dev.of_node, + "eswin,hsp-sp-csr", + 2, ð_axi_lp_ctrl_offset); + if (ret) + return dev_err_probe(&pdev->dev, ret, + "can't get eth_axi_lp_ctrl_offset\n"); + + regmap_write(eic7700_hsp_regmap, eth_axi_lp_ctrl_offset, + EIC7700_ETH_CSYSREQ_VAL); + + ret = of_property_read_u32_index(pdev->dev.of_node, + "eswin,hsp-sp-csr", + 3, ð_rxd_dly_offset); + if (ret) + return dev_err_probe(&pdev->dev, ret, + "can't get eth_rxd_dly_offset\n"); + + regmap_write(eic7700_hsp_regmap, eth_rxd_dly_offset, + eth_dly_param); + + plat_dat->num_clks = ARRAY_SIZE(eic7700_clk_names); + plat_dat->clks = devm_kcalloc(&pdev->dev, + plat_dat->num_clks, + sizeof(*plat_dat->clks), + GFP_KERNEL); + if (!plat_dat->clks) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(eic7700_clk_names); i++) + plat_dat->clks[i].id = eic7700_clk_names[i]; + + ret = devm_clk_bulk_get_optional(&pdev->dev, + plat_dat->num_clks, + plat_dat->clks); + if (ret) + return dev_err_probe(&pdev->dev, ret, + "Failed to get clocks\n"); + + plat_dat->clk_tx_i = stmmac_pltfr_find_clk(plat_dat, "tx"); + plat_dat->set_clk_tx_rate = stmmac_set_clk_tx_rate; + plat_dat->clks_config = eic7700_clks_config; + plat_dat->bsp_priv = dwc_priv; + dwc_priv->plat_dat = plat_dat; + plat_dat->init = eic7700_dwmac_init; + plat_dat->exit = eic7700_dwmac_exit; + plat_dat->suspend = eic7700_dwmac_suspend; + plat_dat->resume = eic7700_dwmac_resume; + + return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); +} + +static const struct of_device_id eic7700_dwmac_match[] = { + { .compatible = "eswin,eic7700-qos-eth" }, + { } +}; +MODULE_DEVICE_TABLE(of, eic7700_dwmac_match); + +static struct platform_driver eic7700_dwmac_driver = { + .probe = eic7700_dwmac_probe, + .driver = { + .name = "eic7700-eth-dwmac", + .pm = &stmmac_pltfr_pm_ops, + .of_match_table = eic7700_dwmac_match, + }, +}; +module_platform_driver(eic7700_dwmac_driver); + +MODULE_AUTHOR("Zhi Li "); +MODULE_AUTHOR("Shuang Liang "); +MODULE_AUTHOR("Shangjuan Wei "); +MODULE_DESCRIPTION("Eswin eic7700 qos ethernet driver"); +MODULE_LICENSE("GPL"); From 01b6aca22bb9f8fbbebbf8bdbb80aadf11318e3d Mon Sep 17 00:00:00 2001 From: Sagar Cheluvegowda Date: Wed, 15 Oct 2025 18:26:12 +0200 Subject: [PATCH 064/867] dt-bindings: net: qcom: ethernet: Add interconnect properties Add documentation for the interconnect and interconnect-names properties required when voting for AHB and AXI buses. Suggested-by: Andrew Halaney Signed-off-by: Sagar Cheluvegowda Reviewed-by: Andrew Lunn Reviewed-by: Krzysztof Kozlowski Signed-off-by: Konrad Dybcio Link: https://patch.msgid.link/20251015-topic-qc_stmmac_icc_bindings-v5-1-da39126cff28@oss.qualcomm.com Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/qcom,ethqos.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/devicetree/bindings/net/qcom,ethqos.yaml b/Documentation/devicetree/bindings/net/qcom,ethqos.yaml index e7ee0d9efed83..423959cb928d9 100644 --- a/Documentation/devicetree/bindings/net/qcom,ethqos.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ethqos.yaml @@ -73,6 +73,14 @@ properties: dma-coherent: true + interconnects: + maxItems: 2 + + interconnect-names: + items: + - const: cpu-mac + - const: mac-mem + phys: true phy-names: From 56cef47c28dc7d40af8959ceb5a3f49e907e2a89 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 14 Oct 2025 17:19:02 +0000 Subject: [PATCH 065/867] selftests/net: packetdrill: unflake tcp_user_timeout_user-timeout-probe.pkt This test fails the first time I am running it after a fresh virtme-ng boot. tcp_user_timeout_user-timeout-probe.pkt:33: runtime error in write call: Expected result -1 but got 24 with errno 2 (No such file or directory) Tweaks the timings a bit, to reduce flakiness. Signed-off-by: Eric Dumazet Cc: Soham Chakradeo Cc: Willem de Bruijn Reviewed-by: Kuniyuki Iwashima Tested-by: Jamal Hadi Salim Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20251014171907.3554413-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- .../net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt index 183051ba0cae3..6882b8240a8a9 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt @@ -23,14 +23,16 @@ // install a qdisc dropping all packets +0 `tc qdisc delete dev tun0 root 2>/dev/null ; tc qdisc add dev tun0 root pfifo limit 0` + +0 write(4, ..., 24) = 24 // When qdisc is congested we retry every 500ms // (TCP_RESOURCE_PROBE_INTERVAL) and therefore // we retry 6 times before hitting 3s timeout. // First verify that the connection is alive: -+3.250 write(4, ..., 24) = 24 ++3 write(4, ..., 24) = 24 + // Now verify that shortly after that the socket is dead: - +.100 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out) ++1 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out) +0 %{ assert tcpi_probes == 6, tcpi_probes; \ assert tcpi_backoff == 0, tcpi_backoff }% From 5b2b7dec05f3ab80d7bb7fa09acd43222fab6aa2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 14 Oct 2025 17:19:03 +0000 Subject: [PATCH 066/867] net: add add indirect call wrapper in skb_release_head_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While stress testing UDP senders on a host with expensive indirect calls, I found cpus processing TX completions where showing a very high cost (20%) in sock_wfree() due to CONFIG_MITIGATION_RETPOLINE=y. Take care of TCP and UDP TX destructors and use INDIRECT_CALL_3() macro. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Toke Høiland-Jørgensen Tested-by: Jamal Hadi Salim Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20251014171907.3554413-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6be01454f262a..8eb3c58207243 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1136,7 +1136,16 @@ void skb_release_head_state(struct sk_buff *skb) skb_dst_drop(skb); if (skb->destructor) { DEBUG_NET_WARN_ON_ONCE(in_hardirq()); - skb->destructor(skb); +#ifdef CONFIG_INET + INDIRECT_CALL_3(skb->destructor, + tcp_wfree, __sock_wfree, sock_wfree, + skb); +#else + INDIRECT_CALL_1(skb->destructor, + sock_wfree, + skb); + +#endif } #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_conntrack_put(skb_nfct(skb)); From fe946a751d9b52b7c45ca34899723b314b79b249 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 14 Oct 2025 17:19:04 +0000 Subject: [PATCH 067/867] net/sched: act_mirred: add loop detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 0f022d32c3ec ("net/sched: Fix mirred deadlock on device recursion") added code in the fast path, even when act_mirred is not used. Prepare its revert by implementing loop detection in act_mirred. Adds an array of device pointers in struct netdev_xmit. tcf_mirred_is_act_redirect() can detect if the array already contains the target device. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Toke Høiland-Jørgensen Tested-by: Jamal Hadi Salim Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20251014171907.3554413-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice_xmit.h | 9 ++++- net/sched/act_mirred.c | 62 +++++++++++++--------------------- 2 files changed, 31 insertions(+), 40 deletions(-) diff --git a/include/linux/netdevice_xmit.h b/include/linux/netdevice_xmit.h index 813a19122ebbb..cc232508e695e 100644 --- a/include/linux/netdevice_xmit.h +++ b/include/linux/netdevice_xmit.h @@ -2,6 +2,12 @@ #ifndef _LINUX_NETDEVICE_XMIT_H #define _LINUX_NETDEVICE_XMIT_H +#if IS_ENABLED(CONFIG_NET_ACT_MIRRED) +#define MIRRED_NEST_LIMIT 4 +#endif + +struct net_device; + struct netdev_xmit { u16 recursion; u8 more; @@ -9,7 +15,8 @@ struct netdev_xmit { u8 skip_txqueue; #endif #if IS_ENABLED(CONFIG_NET_ACT_MIRRED) - u8 sched_mirred_nest; + u8 sched_mirred_nest; + struct net_device *sched_mirred_dev[MIRRED_NEST_LIMIT]; #endif #if IS_ENABLED(CONFIG_NF_DUP_NETDEV) u8 nf_dup_skb_recursion; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 5f01f567c934d..f27b583def78e 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -29,31 +29,6 @@ static LIST_HEAD(mirred_list); static DEFINE_SPINLOCK(mirred_list_lock); -#define MIRRED_NEST_LIMIT 4 - -#ifndef CONFIG_PREEMPT_RT -static u8 tcf_mirred_nest_level_inc_return(void) -{ - return __this_cpu_inc_return(softnet_data.xmit.sched_mirred_nest); -} - -static void tcf_mirred_nest_level_dec(void) -{ - __this_cpu_dec(softnet_data.xmit.sched_mirred_nest); -} - -#else -static u8 tcf_mirred_nest_level_inc_return(void) -{ - return current->net_xmit.sched_mirred_nest++; -} - -static void tcf_mirred_nest_level_dec(void) -{ - current->net_xmit.sched_mirred_nest--; -} -#endif - static bool tcf_mirred_is_act_redirect(int action) { return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR; @@ -439,44 +414,53 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, { struct tcf_mirred *m = to_mirred(a); int retval = READ_ONCE(m->tcf_action); - unsigned int nest_level; + struct netdev_xmit *xmit; bool m_mac_header_xmit; struct net_device *dev; - int m_eaction; + int i, m_eaction; u32 blockid; - nest_level = tcf_mirred_nest_level_inc_return(); - if (unlikely(nest_level > MIRRED_NEST_LIMIT)) { +#ifdef CONFIG_PREEMPT_RT + xmit = ¤t->net_xmit; +#else + xmit = this_cpu_ptr(&softnet_data.xmit); +#endif + if (unlikely(xmit->sched_mirred_nest >= MIRRED_NEST_LIMIT)) { net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", netdev_name(skb->dev)); - retval = TC_ACT_SHOT; - goto dec_nest_level; + return TC_ACT_SHOT; } tcf_lastuse_update(&m->tcf_tm); tcf_action_update_bstats(&m->common, skb); blockid = READ_ONCE(m->tcfm_blockid); - if (blockid) { - retval = tcf_blockcast(skb, m, blockid, res, retval); - goto dec_nest_level; - } + if (blockid) + return tcf_blockcast(skb, m, blockid, res, retval); dev = rcu_dereference_bh(m->tcfm_dev); if (unlikely(!dev)) { pr_notice_once("tc mirred: target device is gone\n"); tcf_action_inc_overlimit_qstats(&m->common); - goto dec_nest_level; + return retval; } + for (i = 0; i < xmit->sched_mirred_nest; i++) { + if (xmit->sched_mirred_dev[i] != dev) + continue; + pr_notice_once("tc mirred: loop on device %s\n", + netdev_name(dev)); + tcf_action_inc_overlimit_qstats(&m->common); + return retval; + } + + xmit->sched_mirred_dev[xmit->sched_mirred_nest++] = dev; m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit); m_eaction = READ_ONCE(m->tcfm_eaction); retval = tcf_mirred_to_dev(skb, m, dev, m_mac_header_xmit, m_eaction, retval); - -dec_nest_level: - tcf_mirred_nest_level_dec(); + xmit->sched_mirred_nest--; return retval; } From 178ca30889a13b555dddab7689fd2cc58c8e5dac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 14 Oct 2025 17:19:05 +0000 Subject: [PATCH 068/867] Revert "net/sched: Fix mirred deadlock on device recursion" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commits 0f022d32c3eca477fbf79a205243a6123ed0fe11 and 44180feaccf266d9b0b28cc4ceaac019817deb5c. Prior patch in this series implemented loop detection in act_mirred, we can remove q->owner to save some cycles in the fast path. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Toke Høiland-Jørgensen Reviewed-by: Victor Nogueira Tested-by: Jamal Hadi Salim Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20251014171907.3554413-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 1 - net/core/dev.c | 6 ------ net/sched/sch_generic.c | 2 -- 3 files changed, 9 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 738cd5b13c62f..32e9961570b46 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -117,7 +117,6 @@ struct Qdisc { struct qdisc_skb_head q; struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; - int owner; unsigned long state; unsigned long state2; /* must be written under qdisc spinlock */ struct Qdisc *next_sched; diff --git a/net/core/dev.c b/net/core/dev.c index 35010faf0b787..1d8e7a76d83b6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4167,10 +4167,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, return rc; } - if (unlikely(READ_ONCE(q->owner) == smp_processor_id())) { - kfree_skb_reason(skb, SKB_DROP_REASON_TC_RECLASSIFY_LOOP); - return NET_XMIT_DROP; - } /* * Heuristic to force contended enqueues to serialize on a * separate lock before trying to get qdisc main lock. @@ -4210,9 +4206,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_run_end(q); rc = NET_XMIT_SUCCESS; } else { - WRITE_ONCE(q->owner, smp_processor_id()); rc = dev_qdisc_enqueue(skb, q, &to_free, txq); - WRITE_ONCE(q->owner, -1); if (qdisc_run_begin(q)) { if (unlikely(contended)) { spin_unlock(&q->busylock); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 1e008a228ebdf..dfa8e8e667d24 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -679,7 +679,6 @@ struct Qdisc noop_qdisc = { .qlen = 0, .lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.skb_bad_txq.lock), }, - .owner = -1, }; EXPORT_SYMBOL(noop_qdisc); @@ -985,7 +984,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; sch->dev_queue = dev_queue; - sch->owner = -1; netdev_hold(dev, &sch->dev_tracker, GFP_KERNEL); refcount_set(&sch->refcnt, 1); From 526f5fb112f7c89c5a9b8b2f9870c8cb76ca4e42 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 14 Oct 2025 17:19:06 +0000 Subject: [PATCH 069/867] net: sched: claim one cache line in Qdisc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace state2 field with a boolean. Move it to a hole between qstats and state so that we shrink Qdisc by a full cache line. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Toke Høiland-Jørgensen Tested-by: Jamal Hadi Salim Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20251014171907.3554413-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 32e9961570b46..31561291bc92f 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -41,13 +41,6 @@ enum qdisc_state_t { __QDISC_STATE_DRAINING, }; -enum qdisc_state2_t { - /* Only for !TCQ_F_NOLOCK qdisc. Never access it directly. - * Use qdisc_run_begin/end() or qdisc_is_running() instead. - */ - __QDISC_STATE2_RUNNING, -}; - #define QDISC_STATE_MISSED BIT(__QDISC_STATE_MISSED) #define QDISC_STATE_DRAINING BIT(__QDISC_STATE_DRAINING) @@ -117,8 +110,8 @@ struct Qdisc { struct qdisc_skb_head q; struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; + bool running; /* must be written under qdisc spinlock */ unsigned long state; - unsigned long state2; /* must be written under qdisc spinlock */ struct Qdisc *next_sched; struct sk_buff_head skb_bad_txq; @@ -167,7 +160,7 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) return spin_is_locked(&qdisc->seqlock); - return test_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); + return READ_ONCE(qdisc->running); } static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc) @@ -210,7 +203,10 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) */ return spin_trylock(&qdisc->seqlock); } - return !__test_and_set_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); + if (READ_ONCE(qdisc->running)) + return false; + WRITE_ONCE(qdisc->running, true); + return true; } static inline void qdisc_run_end(struct Qdisc *qdisc) @@ -228,7 +224,7 @@ static inline void qdisc_run_end(struct Qdisc *qdisc) &qdisc->state))) __netif_schedule(qdisc); } else { - __clear_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); + WRITE_ONCE(qdisc->running, false); } } From 100dfa74cad9d4665cdcf0cc8e673b123a3ea910 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 14 Oct 2025 17:19:07 +0000 Subject: [PATCH 070/867] net: dev_queue_xmit() llist adoption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove busylock spinlock and use a lockless list (llist) to reduce spinlock contention to the minimum. Idea is that only one cpu might spin on the qdisc spinlock, while others simply add their skb in the llist. After this patch, we get a 300 % improvement on heavy TX workloads. - Sending twice the number of packets per second. - While consuming 50 % less cycles. Note that this also allows in the future to submit batches to various qdisc->enqueue() methods. Tested: - Dual Intel(R) Xeon(R) 6985P-C (480 hyper threads). - 100Gbit NIC, 30 TX queues with FQ packet scheduler. - echo 64 >/sys/kernel/slab/skbuff_small_head/cpu_partial (avoid contention in mm) - 240 concurrent "netperf -t UDP_STREAM -- -m 120 -n" Before: 16 Mpps (41 Mpps if each thread is pinned to a different cpu) vmstat 2 5 procs -----------memory---------- ---swap-- -----io---- -system-- ------cpu----- r b swpd free buff cache si so bi bo in cs us sy id wa st 243 0 0 2368988672 51036 1100852 0 0 146 1 242 60 0 9 91 0 0 244 0 0 2368988672 51036 1100852 0 0 536 10 487745 14718 0 52 48 0 0 244 0 0 2368988672 51036 1100852 0 0 512 0 503067 46033 0 52 48 0 0 244 0 0 2368988672 51036 1100852 0 0 512 0 494807 12107 0 52 48 0 0 244 0 0 2368988672 51036 1100852 0 0 702 26 492845 10110 0 52 48 0 0 Lock contention (1 second sample taken on 8 cores) perf lock record -C0-7 sleep 1; perf lock contention contended total wait max wait avg wait type caller 442111 6.79 s 162.47 ms 15.35 us spinlock dev_hard_start_xmit+0xcd 5961 9.57 ms 8.12 us 1.60 us spinlock __dev_queue_xmit+0x3a0 244 560.63 us 7.63 us 2.30 us spinlock do_softirq+0x5b 13 25.09 us 3.21 us 1.93 us spinlock net_tx_action+0xf8 If netperf threads are pinned, spinlock stress is very high. perf lock record -C0-7 sleep 1; perf lock contention contended total wait max wait avg wait type caller 964508 7.10 s 147.25 ms 7.36 us spinlock dev_hard_start_xmit+0xcd 201 268.05 us 4.65 us 1.33 us spinlock __dev_queue_xmit+0x3a0 12 26.05 us 3.84 us 2.17 us spinlock do_softirq+0x5b @__dev_queue_xmit_ns: [256, 512) 21 | | [512, 1K) 631 | | [1K, 2K) 27328 |@ | [2K, 4K) 265392 |@@@@@@@@@@@@@@@@ | [4K, 8K) 417543 |@@@@@@@@@@@@@@@@@@@@@@@@@@ | [8K, 16K) 826292 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| [16K, 32K) 733822 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | [32K, 64K) 19055 |@ | [64K, 128K) 17240 |@ | [128K, 256K) 25633 |@ | [256K, 512K) 4 | | After: 29 Mpps (57 Mpps if each thread is pinned to a different cpu) vmstat 2 5 procs -----------memory---------- ---swap-- -----io---- -system-- ------cpu----- r b swpd free buff cache si so bi bo in cs us sy id wa st 78 0 0 2369573632 32896 1350988 0 0 22 0 331 254 0 8 92 0 0 75 0 0 2369573632 32896 1350988 0 0 22 50 425713 280199 0 23 76 0 0 104 0 0 2369573632 32896 1350988 0 0 290 0 430238 298247 0 23 76 0 0 86 0 0 2369573632 32896 1350988 0 0 132 0 428019 291865 0 24 76 0 0 90 0 0 2369573632 32896 1350988 0 0 502 0 422498 278672 0 23 76 0 0 perf lock record -C0-7 sleep 1; perf lock contention contended total wait max wait avg wait type caller 2524 116.15 ms 486.61 us 46.02 us spinlock __dev_queue_xmit+0x55b 5821 107.18 ms 371.67 us 18.41 us spinlock dev_hard_start_xmit+0xcd 2377 9.73 ms 35.86 us 4.09 us spinlock ___slab_alloc+0x4e0 923 5.74 ms 20.91 us 6.22 us spinlock ___slab_alloc+0x5c9 121 3.42 ms 193.05 us 28.24 us spinlock net_tx_action+0xf8 6 564.33 us 167.60 us 94.05 us spinlock do_softirq+0x5b If netperf threads are pinned (~54 Mpps) perf lock record -C0-7 sleep 1; perf lock contention 32907 316.98 ms 195.98 us 9.63 us spinlock dev_hard_start_xmit+0xcd 4507 61.83 ms 212.73 us 13.72 us spinlock __dev_queue_xmit+0x554 2781 23.53 ms 40.03 us 8.46 us spinlock ___slab_alloc+0x5c9 3554 18.94 ms 34.69 us 5.33 us spinlock ___slab_alloc+0x4e0 233 9.09 ms 215.70 us 38.99 us spinlock do_softirq+0x5b 153 930.66 us 48.67 us 6.08 us spinlock net_tx_action+0xfd 84 331.10 us 14.22 us 3.94 us spinlock ___slab_alloc+0x5c9 140 323.71 us 9.94 us 2.31 us spinlock ___slab_alloc+0x4e0 @__dev_queue_xmit_ns: [128, 256) 1539830 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | [256, 512) 2299558 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| [512, 1K) 483936 |@@@@@@@@@@ | [1K, 2K) 265345 |@@@@@@ | [2K, 4K) 145463 |@@@ | [4K, 8K) 54571 |@ | [8K, 16K) 10270 | | [16K, 32K) 9385 | | [32K, 64K) 7749 | | [64K, 128K) 26799 | | [128K, 256K) 2665 | | [256K, 512K) 665 | | Signed-off-by: Eric Dumazet Reviewed-by: Toke Høiland-Jørgensen Reviewed-by: Kuniyuki Iwashima Tested-by: Jamal Hadi Salim Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20251014171907.3554413-7-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 4 +- net/core/dev.c | 91 ++++++++++++++++++++++++--------------- net/sched/sch_generic.c | 5 --- 3 files changed, 59 insertions(+), 41 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 31561291bc92f..94966692ccdf5 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -115,7 +115,9 @@ struct Qdisc { struct Qdisc *next_sched; struct sk_buff_head skb_bad_txq; - spinlock_t busylock ____cacheline_aligned_in_smp; + atomic_long_t defer_count ____cacheline_aligned_in_smp; + struct llist_head defer_list; + spinlock_t seqlock; struct rcu_head rcu; diff --git a/net/core/dev.c b/net/core/dev.c index 1d8e7a76d83b6..821e7c7189244 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4125,9 +4125,10 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, struct net_device *dev, struct netdev_queue *txq) { + struct sk_buff *next, *to_free = NULL; spinlock_t *root_lock = qdisc_lock(q); - struct sk_buff *to_free = NULL; - bool contended; + struct llist_node *ll_list, *first_n; + unsigned long defer_count = 0; int rc; qdisc_calculate_pkt_len(skb, q); @@ -4167,61 +4168,81 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, return rc; } - /* - * Heuristic to force contended enqueues to serialize on a - * separate lock before trying to get qdisc main lock. - * This permits qdisc->running owner to get the lock more - * often and dequeue packets faster. - * On PREEMPT_RT it is possible to preempt the qdisc owner during xmit - * and then other tasks will only enqueue packets. The packets will be - * sent after the qdisc owner is scheduled again. To prevent this - * scenario the task always serialize on the lock. + /* Open code llist_add(&skb->ll_node, &q->defer_list) + queue limit. + * In the try_cmpxchg() loop, we want to increment q->defer_count + * at most once to limit the number of skbs in defer_list. + * We perform the defer_count increment only if the list is not empty, + * because some arches have slow atomic_long_inc_return(). + */ + first_n = READ_ONCE(q->defer_list.first); + do { + if (first_n && !defer_count) { + defer_count = atomic_long_inc_return(&q->defer_count); + if (unlikely(defer_count > q->limit)) { + kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_DROP); + return NET_XMIT_DROP; + } + } + skb->ll_node.next = first_n; + } while (!try_cmpxchg(&q->defer_list.first, &first_n, &skb->ll_node)); + + /* If defer_list was not empty, we know the cpu which queued + * the first skb will process the whole list for us. */ - contended = qdisc_is_running(q) || IS_ENABLED(CONFIG_PREEMPT_RT); - if (unlikely(contended)) - spin_lock(&q->busylock); + if (first_n) + return NET_XMIT_SUCCESS; spin_lock(root_lock); + + ll_list = llist_del_all(&q->defer_list); + /* There is a small race because we clear defer_count not atomically + * with the prior llist_del_all(). This means defer_list could grow + * over q->limit. + */ + atomic_long_set(&q->defer_count, 0); + + ll_list = llist_reverse_order(ll_list); + if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { - __qdisc_drop(skb, &to_free); + llist_for_each_entry_safe(skb, next, ll_list, ll_node) + __qdisc_drop(skb, &to_free); rc = NET_XMIT_DROP; - } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && - qdisc_run_begin(q)) { + goto unlock; + } + if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && + !llist_next(ll_list) && qdisc_run_begin(q)) { /* * This is a work-conserving queue; there are no old skbs * waiting to be sent out; and the qdisc is not running - * xmit the skb directly. */ + DEBUG_NET_WARN_ON_ONCE(skb != llist_entry(ll_list, + struct sk_buff, + ll_node)); qdisc_bstats_update(q, skb); - - if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) { - if (unlikely(contended)) { - spin_unlock(&q->busylock); - contended = false; - } + if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) __qdisc_run(q); - } - qdisc_run_end(q); rc = NET_XMIT_SUCCESS; } else { - rc = dev_qdisc_enqueue(skb, q, &to_free, txq); - if (qdisc_run_begin(q)) { - if (unlikely(contended)) { - spin_unlock(&q->busylock); - contended = false; - } - __qdisc_run(q); - qdisc_run_end(q); + int count = 0; + + llist_for_each_entry_safe(skb, next, ll_list, ll_node) { + prefetch(next); + skb_mark_not_on_list(skb); + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); + count++; } + qdisc_run(q); + if (count != 1) + rc = NET_XMIT_SUCCESS; } +unlock: spin_unlock(root_lock); if (unlikely(to_free)) kfree_skb_list_reason(to_free, tcf_get_drop_reason(to_free)); - if (unlikely(contended)) - spin_unlock(&q->busylock); return rc; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index dfa8e8e667d24..d9a98d02a55fc 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -666,7 +666,6 @@ struct Qdisc noop_qdisc = { .ops = &noop_qdisc_ops, .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, - .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock), .gso_skb = { .next = (struct sk_buff *)&noop_qdisc.gso_skb, .prev = (struct sk_buff *)&noop_qdisc.gso_skb, @@ -970,10 +969,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, } } - spin_lock_init(&sch->busylock); - lockdep_set_class(&sch->busylock, - dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); - /* seqlock has the same scope of busylock, for NOLOCK qdisc */ spin_lock_init(&sch->seqlock); lockdep_set_class(&sch->seqlock, From f1150b779571da48031d1e643aeaa9d416cfeb60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Tue, 14 Oct 2025 17:25:02 +0200 Subject: [PATCH 071/867] dt-bindings: net: cdns,macb: sort compatibles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compatibles inside this enum are sorted-ish. Make it sorted. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Théo Lebrun Link: https://patch.msgid.link/20251014-macb-cleanup-v1-1-31cd266e22cd@bootlin.com Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/cdns,macb.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/net/cdns,macb.yaml b/Documentation/devicetree/bindings/net/cdns,macb.yaml index 1029786a855c5..02f14a0b72f9c 100644 --- a/Documentation/devicetree/bindings/net/cdns,macb.yaml +++ b/Documentation/devicetree/bindings/net/cdns,macb.yaml @@ -47,18 +47,18 @@ properties: - const: cdns,macb # Generic - enum: - - atmel,sama5d29-gem # GEM XL IP (10/100) on Atmel sama5d29 SoCs - atmel,sama5d2-gem # GEM IP (10/100) on Atmel sama5d2 SoCs + - atmel,sama5d29-gem # GEM XL IP (10/100) on Atmel sama5d29 SoCs - atmel,sama5d3-gem # Gigabit IP on Atmel sama5d3 SoCs - atmel,sama5d4-gem # GEM IP (10/100) on Atmel sama5d4 SoCs + - cdns,emac # Generic + - cdns,gem # Generic + - cdns,macb # Generic - cdns,np4-macb # NP4 SoC devices - microchip,sama7g5-emac # Microchip SAMA7G5 ethernet interface - microchip,sama7g5-gem # Microchip SAMA7G5 gigabit ethernet interface - raspberrypi,rp1-gem # Raspberry Pi RP1 gigabit ethernet interface - sifive,fu540-c000-gem # SiFive FU540-C000 SoC - - cdns,emac # Generic - - cdns,gem # Generic - - cdns,macb # Generic - items: - enum: From a23b0b79e974c921e0d642a8a5f37e76810eac25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Tue, 14 Oct 2025 17:25:03 +0200 Subject: [PATCH 072/867] net: macb: use BIT() macro for capability definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace all capabilities values by calls to the BIT() macro. Reviewed-by: Andrew Lunn Reviewed-by: Sean Anderson Signed-off-by: Théo Lebrun Link: https://patch.msgid.link/20251014-macb-cleanup-v1-2-31cd266e22cd@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb.h | 42 ++++++++++++++--------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 0830c48973aa0..869d02284707c 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -756,27 +756,27 @@ #define MACB_MAN_C45_CODE 2 /* Capability mask bits */ -#define MACB_CAPS_ISR_CLEAR_ON_WRITE 0x00000001 -#define MACB_CAPS_USRIO_HAS_CLKEN 0x00000002 -#define MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII 0x00000004 -#define MACB_CAPS_NO_GIGABIT_HALF 0x00000008 -#define MACB_CAPS_USRIO_DISABLED 0x00000010 -#define MACB_CAPS_JUMBO 0x00000020 -#define MACB_CAPS_GEM_HAS_PTP 0x00000040 -#define MACB_CAPS_BD_RD_PREFETCH 0x00000080 -#define MACB_CAPS_NEEDS_RSTONUBR 0x00000100 -#define MACB_CAPS_MIIONRGMII 0x00000200 -#define MACB_CAPS_NEED_TSUCLK 0x00000400 -#define MACB_CAPS_QUEUE_DISABLE 0x00000800 -#define MACB_CAPS_QBV 0x00001000 -#define MACB_CAPS_PCS 0x01000000 -#define MACB_CAPS_HIGH_SPEED 0x02000000 -#define MACB_CAPS_CLK_HW_CHG 0x04000000 -#define MACB_CAPS_MACB_IS_EMAC 0x08000000 -#define MACB_CAPS_FIFO_MODE 0x10000000 -#define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000 -#define MACB_CAPS_SG_DISABLED 0x40000000 -#define MACB_CAPS_MACB_IS_GEM 0x80000000 +#define MACB_CAPS_ISR_CLEAR_ON_WRITE BIT(0) +#define MACB_CAPS_USRIO_HAS_CLKEN BIT(1) +#define MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII BIT(2) +#define MACB_CAPS_NO_GIGABIT_HALF BIT(3) +#define MACB_CAPS_USRIO_DISABLED BIT(4) +#define MACB_CAPS_JUMBO BIT(5) +#define MACB_CAPS_GEM_HAS_PTP BIT(6) +#define MACB_CAPS_BD_RD_PREFETCH BIT(7) +#define MACB_CAPS_NEEDS_RSTONUBR BIT(8) +#define MACB_CAPS_MIIONRGMII BIT(9) +#define MACB_CAPS_NEED_TSUCLK BIT(10) +#define MACB_CAPS_QUEUE_DISABLE BIT(11) +#define MACB_CAPS_QBV BIT(12) +#define MACB_CAPS_PCS BIT(24) +#define MACB_CAPS_HIGH_SPEED BIT(25) +#define MACB_CAPS_CLK_HW_CHG BIT(26) +#define MACB_CAPS_MACB_IS_EMAC BIT(27) +#define MACB_CAPS_FIFO_MODE BIT(28) +#define MACB_CAPS_GIGABIT_MODE_AVAILABLE BIT(29) +#define MACB_CAPS_SG_DISABLED BIT(30) +#define MACB_CAPS_MACB_IS_GEM BIT(31) /* LSO settings */ #define MACB_LSO_UFO_ENABLE 0x01 From bd0b35ec835a177539922834fd662be6280a65fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Tue, 14 Oct 2025 17:25:04 +0200 Subject: [PATCH 073/867] net: macb: remove gap in MACB_CAPS_* flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MACB_CAPS_* are bit constants that get used in bp->caps. They occupy bits 0..12 + 24..31. Remove 11..23 gap by moving bits 24..31 to 13..20. Occupation bitfields: 31 29 27 25 23 21 19 17 15 13 11 09 07 05 03 01 30 28 26 24 22 20 18 16 14 12 10 08 06 04 02 00 -- Before ------------------------------------------------------ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 -- After ------------------------------------------------------- 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 Signed-off-by: Théo Lebrun Link: https://patch.msgid.link/20251014-macb-cleanup-v1-3-31cd266e22cd@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 869d02284707c..9d21ec482c8c6 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -769,14 +769,14 @@ #define MACB_CAPS_NEED_TSUCLK BIT(10) #define MACB_CAPS_QUEUE_DISABLE BIT(11) #define MACB_CAPS_QBV BIT(12) -#define MACB_CAPS_PCS BIT(24) -#define MACB_CAPS_HIGH_SPEED BIT(25) -#define MACB_CAPS_CLK_HW_CHG BIT(26) -#define MACB_CAPS_MACB_IS_EMAC BIT(27) -#define MACB_CAPS_FIFO_MODE BIT(28) -#define MACB_CAPS_GIGABIT_MODE_AVAILABLE BIT(29) -#define MACB_CAPS_SG_DISABLED BIT(30) -#define MACB_CAPS_MACB_IS_GEM BIT(31) +#define MACB_CAPS_PCS BIT(13) +#define MACB_CAPS_HIGH_SPEED BIT(14) +#define MACB_CAPS_CLK_HW_CHG BIT(15) +#define MACB_CAPS_MACB_IS_EMAC BIT(16) +#define MACB_CAPS_FIFO_MODE BIT(17) +#define MACB_CAPS_GIGABIT_MODE_AVAILABLE BIT(18) +#define MACB_CAPS_SG_DISABLED BIT(19) +#define MACB_CAPS_MACB_IS_GEM BIT(20) /* LSO settings */ #define MACB_LSO_UFO_ENABLE 0x01 From 80cf78c59a1a9640acf56e22ad18ca17521dea51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Tue, 14 Oct 2025 17:25:05 +0200 Subject: [PATCH 074/867] net: macb: Remove local variables clk_init and init in macb_probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove local variables clk_init and init. Those function pointers are always equivalent to macb_config->clk_init and macb_config->init. Reviewed-by: Sean Anderson Signed-off-by: Théo Lebrun Link: https://patch.msgid.link/20251014-macb-cleanup-v1-4-31cd266e22cd@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index ca2386b834737..dad1188ef9d87 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -5424,10 +5424,6 @@ static const struct macb_config default_gem_config = { static int macb_probe(struct platform_device *pdev) { const struct macb_config *macb_config = &default_gem_config; - int (*clk_init)(struct platform_device *, struct clk **, - struct clk **, struct clk **, struct clk **, - struct clk **) = macb_config->clk_init; - int (*init)(struct platform_device *) = macb_config->init; struct device_node *np = pdev->dev.of_node; struct clk *pclk, *hclk = NULL, *tx_clk = NULL, *rx_clk = NULL; struct clk *tsu_clk = NULL; @@ -5449,14 +5445,11 @@ static int macb_probe(struct platform_device *pdev) const struct of_device_id *match; match = of_match_node(macb_dt_ids, np); - if (match && match->data) { + if (match && match->data) macb_config = match->data; - clk_init = macb_config->clk_init; - init = macb_config->init; - } } - err = clk_init(pdev, &pclk, &hclk, &tx_clk, &rx_clk, &tsu_clk); + err = macb_config->clk_init(pdev, &pclk, &hclk, &tx_clk, &rx_clk, &tsu_clk); if (err) return err; @@ -5594,7 +5587,7 @@ static int macb_probe(struct platform_device *pdev) bp->phy_interface = interface; /* IP specific init */ - err = init(pdev); + err = macb_config->init(pdev); if (err) goto err_out_free_netdev; From d7a4a20abe25bb5a15358c8d20a0ab12b7a52f79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Tue, 14 Oct 2025 17:25:06 +0200 Subject: [PATCH 075/867] net: macb: drop macb_config NULL checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove NULL checks on macb_config as it is always valid: - either it is its default value &default_gem_config, - or it got overridden using match data. Reviewed-by: Sean Anderson Signed-off-by: Théo Lebrun Link: https://patch.msgid.link/20251014-macb-cleanup-v1-5-31cd266e22cd@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index dad1188ef9d87..33e99aab1dcb3 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -5485,15 +5485,13 @@ static int macb_probe(struct platform_device *pdev) } bp->num_queues = num_queues; bp->queue_mask = queue_mask; - if (macb_config) - bp->dma_burst_length = macb_config->dma_burst_length; + bp->dma_burst_length = macb_config->dma_burst_length; bp->pclk = pclk; bp->hclk = hclk; bp->tx_clk = tx_clk; bp->rx_clk = rx_clk; bp->tsu_clk = tsu_clk; - if (macb_config) - bp->jumbo_max_len = macb_config->jumbo_max_len; + bp->jumbo_max_len = macb_config->jumbo_max_len; if (!hw_is_gem(bp->regs, bp->native_io)) bp->max_tx_length = MACB_MAX_TX_LEN; From 94a164598d833aaee98a81b2910ac7bfea09e60f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Tue, 14 Oct 2025 17:25:07 +0200 Subject: [PATCH 076/867] net: macb: simplify macb_dma_desc_get_size() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit macb_dma_desc_get_size() does a switch on bp->hw_dma_cap and covers all four cases: 0, 64B, PTP, 64B+PTP. It also covers the #ifndef MACB_EXT_DESC separately, making it four codepaths. Instead, notice the descriptor size grows with enabled features and use plain if-statements on 64B and PTP flags. Signed-off-by: Théo Lebrun Link: https://patch.msgid.link/20251014-macb-cleanup-v1-6-31cd266e22cd@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 27 ++++++------------------ 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 33e99aab1dcb3..7f74e280a3351 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -121,29 +121,16 @@ struct sifive_fu540_macb_mgmt { */ static unsigned int macb_dma_desc_get_size(struct macb *bp) { + unsigned int desc_size = sizeof(struct macb_dma_desc); + #ifdef MACB_EXT_DESC - unsigned int desc_size; + if (bp->hw_dma_cap & HW_DMA_CAP_64B) + desc_size += sizeof(struct macb_dma_desc_64); + if (bp->hw_dma_cap & HW_DMA_CAP_PTP) + desc_size += sizeof(struct macb_dma_desc_ptp); +#endif - switch (bp->hw_dma_cap) { - case HW_DMA_CAP_64B: - desc_size = sizeof(struct macb_dma_desc) - + sizeof(struct macb_dma_desc_64); - break; - case HW_DMA_CAP_PTP: - desc_size = sizeof(struct macb_dma_desc) - + sizeof(struct macb_dma_desc_ptp); - break; - case HW_DMA_CAP_64B_PTP: - desc_size = sizeof(struct macb_dma_desc) - + sizeof(struct macb_dma_desc_64) - + sizeof(struct macb_dma_desc_ptp); - break; - default: - desc_size = sizeof(struct macb_dma_desc); - } return desc_size; -#endif - return sizeof(struct macb_dma_desc); } static unsigned int macb_adj_dma_desc_idx(struct macb *bp, unsigned int desc_idx) From 62e6c17463a7b361457193b4bf4e87de78534130 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Tue, 14 Oct 2025 17:25:08 +0200 Subject: [PATCH 077/867] net: macb: simplify macb_adj_dma_desc_idx() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function body uses a switch statement on bp->hw_dma_cap and handles its four possible values: 0, is_64b, is_ptp, is_64b && is_ptp. Instead, refactor by noticing that the return value is: desc_size * MULT with MULT = 3 if is_64b && is_ptp, 2 if is_64b || is_ptp, 1 otherwise. MULT can be expressed as: 1 + is_64b + is_ptp Signed-off-by: Théo Lebrun Link: https://patch.msgid.link/20251014-macb-cleanup-v1-7-31cd266e22cd@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 7f74e280a3351..44a411662786c 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -136,19 +136,13 @@ static unsigned int macb_dma_desc_get_size(struct macb *bp) static unsigned int macb_adj_dma_desc_idx(struct macb *bp, unsigned int desc_idx) { #ifdef MACB_EXT_DESC - switch (bp->hw_dma_cap) { - case HW_DMA_CAP_64B: - case HW_DMA_CAP_PTP: - desc_idx <<= 1; - break; - case HW_DMA_CAP_64B_PTP: - desc_idx *= 3; - break; - default: - break; - } -#endif + bool is_ptp = bp->hw_dma_cap & HW_DMA_CAP_PTP; + bool is_64b = bp->hw_dma_cap & HW_DMA_CAP_64B; + + return desc_idx * (1 + is_64b + is_ptp); +#else return desc_idx; +#endif } #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT From 731e991afb75c3b49da1e781e68a7c710e1d9e92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Tue, 14 Oct 2025 17:25:09 +0200 Subject: [PATCH 078/867] net: macb: move bp->hw_dma_cap flags to bp->caps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop bp->hw_dma_cap field and put its two flags into bp->caps. On my specific config (eyeq5_defconfig), bloat-o-meter indicates: - macb_main.o: Before=56251, After=56359, chg +0.19% - macb_ptp.o: Before= 3976, After= 3952, chg -0.60% Signed-off-by: Théo Lebrun Link: https://patch.msgid.link/20251014-macb-cleanup-v1-8-31cd266e22cd@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb.h | 10 ++------- drivers/net/ethernet/cadence/macb_main.c | 28 ++++++++++++------------ drivers/net/ethernet/cadence/macb_ptp.c | 16 ++++++++------ 3 files changed, 25 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 9d21ec482c8c6..b3a4c9534240a 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -777,6 +777,8 @@ #define MACB_CAPS_GIGABIT_MODE_AVAILABLE BIT(18) #define MACB_CAPS_SG_DISABLED BIT(19) #define MACB_CAPS_MACB_IS_GEM BIT(20) +#define MACB_CAPS_DMA_64B BIT(21) +#define MACB_CAPS_DMA_PTP BIT(22) /* LSO settings */ #define MACB_LSO_UFO_ENABLE 0x01 @@ -854,11 +856,6 @@ struct macb_dma_desc { }; #ifdef MACB_EXT_DESC -#define HW_DMA_CAP_32B 0 -#define HW_DMA_CAP_64B (1 << 0) -#define HW_DMA_CAP_PTP (1 << 1) -#define HW_DMA_CAP_64B_PTP (HW_DMA_CAP_64B | HW_DMA_CAP_PTP) - struct macb_dma_desc_64 { u32 addrh; u32 resvd; @@ -1349,9 +1346,6 @@ struct macb { struct phy *sgmii_phy; /* for ZynqMP SGMII mode */ -#ifdef MACB_EXT_DESC - uint8_t hw_dma_cap; -#endif spinlock_t tsu_clk_lock; /* gem tsu clock locking */ unsigned int tsu_rate; struct ptp_clock *ptp_clock; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 44a411662786c..44b96bf53ff6b 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -124,9 +124,9 @@ static unsigned int macb_dma_desc_get_size(struct macb *bp) unsigned int desc_size = sizeof(struct macb_dma_desc); #ifdef MACB_EXT_DESC - if (bp->hw_dma_cap & HW_DMA_CAP_64B) + if (bp->caps & MACB_CAPS_DMA_64B) desc_size += sizeof(struct macb_dma_desc_64); - if (bp->hw_dma_cap & HW_DMA_CAP_PTP) + if (bp->caps & MACB_CAPS_DMA_PTP) desc_size += sizeof(struct macb_dma_desc_ptp); #endif @@ -136,8 +136,8 @@ static unsigned int macb_dma_desc_get_size(struct macb *bp) static unsigned int macb_adj_dma_desc_idx(struct macb *bp, unsigned int desc_idx) { #ifdef MACB_EXT_DESC - bool is_ptp = bp->hw_dma_cap & HW_DMA_CAP_PTP; - bool is_64b = bp->hw_dma_cap & HW_DMA_CAP_64B; + bool is_ptp = bp->caps & MACB_CAPS_DMA_PTP; + bool is_64b = bp->caps & MACB_CAPS_DMA_64B; return desc_idx * (1 + is_64b + is_ptp); #else @@ -475,7 +475,7 @@ static void macb_init_buffers(struct macb *bp) #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT /* Single register for all queues' high 32 bits. */ - if (bp->hw_dma_cap & HW_DMA_CAP_64B) { + if (bp->caps & MACB_CAPS_DMA_64B) { macb_writel(bp, RBQPH, upper_32_bits(bp->queues[0].rx_ring_dma)); macb_writel(bp, TBQPH, @@ -1009,7 +1009,7 @@ static void macb_set_addr(struct macb *bp, struct macb_dma_desc *desc, dma_addr_ #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT struct macb_dma_desc_64 *desc_64; - if (bp->hw_dma_cap & HW_DMA_CAP_64B) { + if (bp->caps & MACB_CAPS_DMA_64B) { desc_64 = macb_64b_desc(bp, desc); desc_64->addrh = upper_32_bits(addr); /* The low bits of RX address contain the RX_USED bit, clearing @@ -1028,14 +1028,14 @@ static dma_addr_t macb_get_addr(struct macb *bp, struct macb_dma_desc *desc) #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT struct macb_dma_desc_64 *desc_64; - if (bp->hw_dma_cap & HW_DMA_CAP_64B) { + if (bp->caps & MACB_CAPS_DMA_64B) { desc_64 = macb_64b_desc(bp, desc); addr = ((u64)(desc_64->addrh) << 32); } #endif addr |= MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr)); #ifdef CONFIG_MACB_USE_HWSTAMP - if (bp->hw_dma_cap & HW_DMA_CAP_PTP) + if (bp->caps & MACB_CAPS_DMA_PTP) addr &= ~GEM_BIT(DMA_RXVALID); #endif return addr; @@ -2301,7 +2301,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev) #ifdef CONFIG_MACB_USE_HWSTAMP if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && - (bp->hw_dma_cap & HW_DMA_CAP_PTP)) + (bp->caps & MACB_CAPS_DMA_PTP)) skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; #endif @@ -2781,11 +2781,11 @@ static void macb_configure_dma(struct macb *bp) dmacfg &= ~GEM_BIT(ADDR64); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - if (bp->hw_dma_cap & HW_DMA_CAP_64B) + if (bp->caps & MACB_CAPS_DMA_64B) dmacfg |= GEM_BIT(ADDR64); #endif #ifdef CONFIG_MACB_USE_HWSTAMP - if (bp->hw_dma_cap & HW_DMA_CAP_PTP) + if (bp->caps & MACB_CAPS_DMA_PTP) dmacfg |= GEM_BIT(RXEXT) | GEM_BIT(TXEXT); #endif netdev_dbg(bp->dev, "Cadence configure DMA with 0x%08x\n", @@ -3563,7 +3563,7 @@ static int gem_get_ts_info(struct net_device *dev, { struct macb *bp = netdev_priv(dev); - if ((bp->hw_dma_cap & HW_DMA_CAP_PTP) == 0) { + if (!(bp->caps & MACB_CAPS_DMA_PTP)) { ethtool_op_get_ts_info(dev, info); return 0; } @@ -4351,7 +4351,7 @@ static void macb_configure_caps(struct macb *bp, "GEM doesn't support hardware ptp.\n"); else { #ifdef CONFIG_MACB_USE_HWSTAMP - bp->hw_dma_cap |= HW_DMA_CAP_PTP; + bp->caps |= MACB_CAPS_DMA_PTP; bp->ptp_info = &gem_ptp_info; #endif } @@ -5518,7 +5518,7 @@ static int macb_probe(struct platform_device *pdev) dev_err(&pdev->dev, "failed to set DMA mask\n"); goto err_out_free_netdev; } - bp->hw_dma_cap |= HW_DMA_CAP_64B; + bp->caps |= MACB_CAPS_DMA_64B; } #endif platform_set_drvdata(pdev, dev); diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c index a63bf29c4fa81..f4ab379f28493 100644 --- a/drivers/net/ethernet/cadence/macb_ptp.c +++ b/drivers/net/ethernet/cadence/macb_ptp.c @@ -28,14 +28,16 @@ static struct macb_dma_desc_ptp *macb_ptp_desc(struct macb *bp, struct macb_dma_desc *desc) { - if (bp->hw_dma_cap == HW_DMA_CAP_PTP) - return (struct macb_dma_desc_ptp *) - ((u8 *)desc + sizeof(struct macb_dma_desc)); - if (bp->hw_dma_cap == HW_DMA_CAP_64B_PTP) + if (!(bp->caps & MACB_CAPS_DMA_PTP)) + return NULL; + + if (bp->caps & MACB_CAPS_DMA_64B) return (struct macb_dma_desc_ptp *) ((u8 *)desc + sizeof(struct macb_dma_desc) + sizeof(struct macb_dma_desc_64)); - return NULL; + else + return (struct macb_dma_desc_ptp *) + ((u8 *)desc + sizeof(struct macb_dma_desc)); } static int gem_tsu_get_time(struct ptp_clock_info *ptp, struct timespec64 *ts, @@ -380,7 +382,7 @@ int gem_get_hwtst(struct net_device *dev, struct macb *bp = netdev_priv(dev); *tstamp_config = bp->tstamp_config; - if ((bp->hw_dma_cap & HW_DMA_CAP_PTP) == 0) + if (!(bp->caps & MACB_CAPS_DMA_PTP)) return -EOPNOTSUPP; return 0; @@ -407,7 +409,7 @@ int gem_set_hwtst(struct net_device *dev, struct macb *bp = netdev_priv(dev); u32 regval; - if ((bp->hw_dma_cap & HW_DMA_CAP_PTP) == 0) + if (!(bp->caps & MACB_CAPS_DMA_PTP)) return -EOPNOTSUPP; switch (tstamp_config->tx_type) { From 02d11c610555657f8524a56bab6856fa8d6ace71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Tue, 14 Oct 2025 17:25:10 +0200 Subject: [PATCH 079/867] net: macb: introduce DMA descriptor helpers (is 64bit? is PTP?) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce macb_dma64() and macb_dma_ptp() helper functions. Many codepaths are made simpler by dropping conditional compilation. This implies two additional changes: - Always compile related structure definitions inside . - MACB_EXT_DESC can be dropped as it is useless now. The common case: #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT struct macb_dma_desc_64 *desc_64; if (bp->hw_dma_cap & HW_DMA_CAP_64B) { desc_64 = macb_64b_desc(bp, desc); // ... } #endif Is replaced by: if (macb_dma64(bp)) { struct macb_dma_desc_64 *desc_64 = macb_64b_desc(bp, desc); // ... } Signed-off-by: Théo Lebrun Link: https://patch.msgid.link/20251014-macb-cleanup-v1-9-31cd266e22cd@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb.h | 18 +++++--- drivers/net/ethernet/cadence/macb_main.c | 55 +++++++----------------- drivers/net/ethernet/cadence/macb_ptp.c | 8 ++-- 3 files changed, 32 insertions(+), 49 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index b3a4c9534240a..f696b2ddc412b 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -15,10 +15,6 @@ #include #include -#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) || defined(CONFIG_MACB_USE_HWSTAMP) -#define MACB_EXT_DESC -#endif - #define MACB_GREGS_NBR 16 #define MACB_GREGS_VERSION 2 #define MACB_MAX_QUEUES 8 @@ -855,7 +851,6 @@ struct macb_dma_desc { u32 ctrl; }; -#ifdef MACB_EXT_DESC struct macb_dma_desc_64 { u32 addrh; u32 resvd; @@ -865,7 +860,6 @@ struct macb_dma_desc_ptp { u32 ts_1; u32 ts_2; }; -#endif /* DMA descriptor bitfields */ #define MACB_RX_USED_OFFSET 0 @@ -1437,6 +1431,18 @@ static inline u64 enst_max_hw_interval(u32 speed_mbps) ENST_TIME_GRANULARITY_NS * 1000, (speed_mbps)); } +static inline bool macb_dma64(struct macb *bp) +{ + return IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && + bp->caps & MACB_CAPS_DMA_64B; +} + +static inline bool macb_dma_ptp(struct macb *bp) +{ + return IS_ENABLED(CONFIG_MACB_USE_HWSTAMP) && + bp->caps & MACB_CAPS_DMA_PTP; +} + /** * struct macb_platform_data - platform data for MACB Ethernet used for PCI registration * @pclk: platform clock diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 44b96bf53ff6b..9db419b94d0b4 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -123,35 +123,24 @@ static unsigned int macb_dma_desc_get_size(struct macb *bp) { unsigned int desc_size = sizeof(struct macb_dma_desc); -#ifdef MACB_EXT_DESC - if (bp->caps & MACB_CAPS_DMA_64B) + if (macb_dma64(bp)) desc_size += sizeof(struct macb_dma_desc_64); - if (bp->caps & MACB_CAPS_DMA_PTP) + if (macb_dma_ptp(bp)) desc_size += sizeof(struct macb_dma_desc_ptp); -#endif return desc_size; } static unsigned int macb_adj_dma_desc_idx(struct macb *bp, unsigned int desc_idx) { -#ifdef MACB_EXT_DESC - bool is_ptp = bp->caps & MACB_CAPS_DMA_PTP; - bool is_64b = bp->caps & MACB_CAPS_DMA_64B; - - return desc_idx * (1 + is_64b + is_ptp); -#else - return desc_idx; -#endif + return desc_idx * (1 + macb_dma64(bp) + macb_dma_ptp(bp)); } -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT static struct macb_dma_desc_64 *macb_64b_desc(struct macb *bp, struct macb_dma_desc *desc) { return (struct macb_dma_desc_64 *)((void *)desc + sizeof(struct macb_dma_desc)); } -#endif /* Ring buffer accessors */ static unsigned int macb_tx_ring_wrap(struct macb *bp, unsigned int index) @@ -473,15 +462,13 @@ static void macb_init_buffers(struct macb *bp) struct macb_queue *queue; unsigned int q; -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT /* Single register for all queues' high 32 bits. */ - if (bp->caps & MACB_CAPS_DMA_64B) { + if (macb_dma64(bp)) { macb_writel(bp, RBQPH, upper_32_bits(bp->queues[0].rx_ring_dma)); macb_writel(bp, TBQPH, upper_32_bits(bp->queues[0].tx_ring_dma)); } -#endif for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { queue_writel(queue, RBQP, lower_32_bits(queue->rx_ring_dma)); @@ -1006,10 +993,9 @@ static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb, int budge static void macb_set_addr(struct macb *bp, struct macb_dma_desc *desc, dma_addr_t addr) { -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - struct macb_dma_desc_64 *desc_64; + if (macb_dma64(bp)) { + struct macb_dma_desc_64 *desc_64; - if (bp->caps & MACB_CAPS_DMA_64B) { desc_64 = macb_64b_desc(bp, desc); desc_64->addrh = upper_32_bits(addr); /* The low bits of RX address contain the RX_USED bit, clearing @@ -1018,26 +1004,23 @@ static void macb_set_addr(struct macb *bp, struct macb_dma_desc *desc, dma_addr_ */ dma_wmb(); } -#endif + desc->addr = lower_32_bits(addr); } static dma_addr_t macb_get_addr(struct macb *bp, struct macb_dma_desc *desc) { dma_addr_t addr = 0; -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - struct macb_dma_desc_64 *desc_64; - if (bp->caps & MACB_CAPS_DMA_64B) { + if (macb_dma64(bp)) { + struct macb_dma_desc_64 *desc_64; + desc_64 = macb_64b_desc(bp, desc); addr = ((u64)(desc_64->addrh) << 32); } -#endif addr |= MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr)); -#ifdef CONFIG_MACB_USE_HWSTAMP - if (bp->caps & MACB_CAPS_DMA_PTP) + if (macb_dma_ptp(bp)) addr &= ~GEM_BIT(DMA_RXVALID); -#endif return addr; } @@ -2299,11 +2282,9 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev) return ret; } -#ifdef CONFIG_MACB_USE_HWSTAMP - if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && - (bp->caps & MACB_CAPS_DMA_PTP)) + if (macb_dma_ptp(bp) && + (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; -#endif is_lso = (skb_shinfo(skb)->gso_size != 0); @@ -2780,14 +2761,10 @@ static void macb_configure_dma(struct macb *bp) dmacfg &= ~GEM_BIT(TXCOEN); dmacfg &= ~GEM_BIT(ADDR64); -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - if (bp->caps & MACB_CAPS_DMA_64B) + if (macb_dma64(bp)) dmacfg |= GEM_BIT(ADDR64); -#endif -#ifdef CONFIG_MACB_USE_HWSTAMP - if (bp->caps & MACB_CAPS_DMA_PTP) + if (macb_dma_ptp(bp)) dmacfg |= GEM_BIT(RXEXT) | GEM_BIT(TXEXT); -#endif netdev_dbg(bp->dev, "Cadence configure DMA with 0x%08x\n", dmacfg); gem_writel(bp, DMACFG, dmacfg); @@ -3563,7 +3540,7 @@ static int gem_get_ts_info(struct net_device *dev, { struct macb *bp = netdev_priv(dev); - if (!(bp->caps & MACB_CAPS_DMA_PTP)) { + if (!macb_dma_ptp(bp)) { ethtool_op_get_ts_info(dev, info); return 0; } diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c index f4ab379f28493..c9e77819196e1 100644 --- a/drivers/net/ethernet/cadence/macb_ptp.c +++ b/drivers/net/ethernet/cadence/macb_ptp.c @@ -28,10 +28,10 @@ static struct macb_dma_desc_ptp *macb_ptp_desc(struct macb *bp, struct macb_dma_desc *desc) { - if (!(bp->caps & MACB_CAPS_DMA_PTP)) + if (!macb_dma_ptp(bp)) return NULL; - if (bp->caps & MACB_CAPS_DMA_64B) + if (macb_dma64(bp)) return (struct macb_dma_desc_ptp *) ((u8 *)desc + sizeof(struct macb_dma_desc) + sizeof(struct macb_dma_desc_64)); @@ -382,7 +382,7 @@ int gem_get_hwtst(struct net_device *dev, struct macb *bp = netdev_priv(dev); *tstamp_config = bp->tstamp_config; - if (!(bp->caps & MACB_CAPS_DMA_PTP)) + if (!macb_dma_ptp(bp)) return -EOPNOTSUPP; return 0; @@ -409,7 +409,7 @@ int gem_set_hwtst(struct net_device *dev, struct macb *bp = netdev_priv(dev); u32 regval; - if (!(bp->caps & MACB_CAPS_DMA_PTP)) + if (!macb_dma_ptp(bp)) return -EOPNOTSUPP; switch (tstamp_config->tx_type) { From 39a913db6a47fd988353cf83f0ccd6a1ee6e2db3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Tue, 14 Oct 2025 17:25:11 +0200 Subject: [PATCH 080/867] net: macb: remove bp->queue_mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The low 16 bits of GEM_DCFG6 tell us which queues are enabled in HW. In theory, there could be holes in the bitfield. In practice, the macb driver would fail if there were holes as most loops iterate upon bp->num_queues. Only macb_init() iterated correctly. - Drop bp->queue_mask field. - Error out at probe if a hole is in the queue mask. - Rely upon bp->num_queues for iteration. - As we drop the queue_mask probe local variable, fix RCT. - Compute queue_mask on the fly for TAPRIO using bp->num_queues. Signed-off-by: Théo Lebrun Link: https://patch.msgid.link/20251014-macb-cleanup-v1-10-31cd266e22cd@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb.h | 1 - drivers/net/ethernet/cadence/macb_main.c | 69 +++++++++++++----------- 2 files changed, 37 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index f696b2ddc412b..5b7d4cdb204d8 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -1290,7 +1290,6 @@ struct macb { unsigned int tx_ring_size; unsigned int num_queues; - unsigned int queue_mask; struct macb_queue queues[MACB_MAX_QUEUES]; spinlock_t lock; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 9db419b94d0b4..98e28d51a6e12 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4066,6 +4066,8 @@ static int macb_taprio_setup_replace(struct net_device *ndev, struct macb *bp = netdev_priv(ndev); struct ethtool_link_ksettings kset; struct macb_queue *queue; + u32 queue_mask; + u8 queue_id; size_t i; int err; @@ -4117,8 +4119,9 @@ static int macb_taprio_setup_replace(struct net_device *ndev, goto cleanup; } - /* gate_mask must not select queues outside the valid queue_mask */ - if (entry->gate_mask & ~bp->queue_mask) { + /* gate_mask must not select queues outside the valid queues */ + queue_id = order_base_2(entry->gate_mask); + if (queue_id >= bp->num_queues) { netdev_err(ndev, "Entry %zu: gate_mask 0x%x exceeds queue range (max_queues=%d)\n", i, entry->gate_mask, bp->num_queues); err = -EINVAL; @@ -4152,7 +4155,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev, goto cleanup; } - enst_queue[i].queue_id = order_base_2(entry->gate_mask); + enst_queue[i].queue_id = queue_id; enst_queue[i].start_time_mask = (start_time_sec << GEM_START_TIME_SEC_OFFSET) | start_time_nsec; @@ -4180,8 +4183,9 @@ static int macb_taprio_setup_replace(struct net_device *ndev, /* All validations passed - proceed with hardware configuration */ scoped_guard(spinlock_irqsave, &bp->lock) { /* Disable ENST queues if running before configuring */ + queue_mask = BIT_U32(bp->num_queues) - 1; gem_writel(bp, ENST_CONTROL, - bp->queue_mask << GEM_ENST_DISABLE_QUEUE_OFFSET); + queue_mask << GEM_ENST_DISABLE_QUEUE_OFFSET); for (i = 0; i < conf->num_entries; i++) { queue = &bp->queues[enst_queue[i].queue_id]; @@ -4210,15 +4214,16 @@ static void macb_taprio_destroy(struct net_device *ndev) { struct macb *bp = netdev_priv(ndev); struct macb_queue *queue; - u32 enst_disable_mask; + u32 queue_mask; unsigned int q; netdev_reset_tc(ndev); - enst_disable_mask = bp->queue_mask << GEM_ENST_DISABLE_QUEUE_OFFSET; + queue_mask = BIT_U32(bp->num_queues) - 1; scoped_guard(spinlock_irqsave, &bp->lock) { /* Single disable command for all queues */ - gem_writel(bp, ENST_CONTROL, enst_disable_mask); + gem_writel(bp, ENST_CONTROL, + queue_mask << GEM_ENST_DISABLE_QUEUE_OFFSET); /* Clear all queue ENST registers in batch */ for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { @@ -4341,26 +4346,25 @@ static void macb_configure_caps(struct macb *bp, dev_dbg(&bp->pdev->dev, "Cadence caps 0x%08x\n", bp->caps); } -static void macb_probe_queues(void __iomem *mem, - bool native_io, - unsigned int *queue_mask, - unsigned int *num_queues) +static int macb_probe_queues(struct device *dev, void __iomem *mem, bool native_io) { - *queue_mask = 0x1; - *num_queues = 1; + /* BIT(0) is never set but queue 0 always exists. */ + unsigned int queue_mask = 0x1; - /* is it macb or gem ? - * - * We need to read directly from the hardware here because - * we are early in the probe process and don't have the - * MACB_CAPS_MACB_IS_GEM flag positioned - */ - if (!hw_is_gem(mem, native_io)) - return; + /* Use hw_is_gem() as MACB_CAPS_MACB_IS_GEM is not yet positioned. */ + if (hw_is_gem(mem, native_io)) { + if (native_io) + queue_mask |= __raw_readl(mem + GEM_DCFG6) & 0xFF; + else + queue_mask |= readl_relaxed(mem + GEM_DCFG6) & 0xFF; + + if (fls(queue_mask) != ffz(queue_mask)) { + dev_err(dev, "queue mask %#x has a hole\n", queue_mask); + return -EINVAL; + } + } - /* bit 0 is never set but queue 0 always exists */ - *queue_mask |= readl_relaxed(mem + GEM_DCFG6) & 0xff; - *num_queues = hweight32(*queue_mask); + return hweight32(queue_mask); } static void macb_clks_disable(struct clk *pclk, struct clk *hclk, struct clk *tx_clk, @@ -4478,10 +4482,7 @@ static int macb_init(struct platform_device *pdev) * register mapping but we don't want to test the queue index then * compute the corresponding register offset at run time. */ - for (hw_q = 0, q = 0; hw_q < MACB_MAX_QUEUES; ++hw_q) { - if (!(bp->queue_mask & (1 << hw_q))) - continue; - + for (hw_q = 0, q = 0; hw_q < bp->num_queues; ++hw_q) { queue = &bp->queues[q]; queue->bp = bp; spin_lock_init(&queue->tx_ptr_lock); @@ -5385,14 +5386,14 @@ static int macb_probe(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node; struct clk *pclk, *hclk = NULL, *tx_clk = NULL, *rx_clk = NULL; struct clk *tsu_clk = NULL; - unsigned int queue_mask, num_queues; - bool native_io; phy_interface_t interface; struct net_device *dev; struct resource *regs; u32 wtrmrk_rst_val; void __iomem *mem; struct macb *bp; + int num_queues; + bool native_io; int err, val; mem = devm_platform_get_and_ioremap_resource(pdev, 0, ®s); @@ -5418,7 +5419,12 @@ static int macb_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); native_io = hw_is_native_io(mem); - macb_probe_queues(mem, native_io, &queue_mask, &num_queues); + num_queues = macb_probe_queues(&pdev->dev, mem, native_io); + if (num_queues < 0) { + err = num_queues; + goto err_disable_clocks; + } + dev = alloc_etherdev_mq(sizeof(*bp), num_queues); if (!dev) { err = -ENOMEM; @@ -5442,7 +5448,6 @@ static int macb_probe(struct platform_device *pdev) bp->macb_reg_writel = hw_writel; } bp->num_queues = num_queues; - bp->queue_mask = queue_mask; bp->dma_burst_length = macb_config->dma_burst_length; bp->pclk = pclk; bp->hclk = hclk; From f26c6438a285157e24973b4188d75d4674def46c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Tue, 14 Oct 2025 17:25:12 +0200 Subject: [PATCH 081/867] net: macb: replace min() with umin() calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Whenever min(a, b) is used with a and b unsigned variables or literals, `make W=2` complains. Change four min() calls into umin(). stderr extract (GCC 11.2.0, MIPS Codescape): ./include/linux/minmax.h:68:57: warning: comparison is always true due to limited range of data type [-Wtype-limits] 68 | #define __is_nonneg(ux) statically_true((long long)(ux) >= 0) | ^~ drivers/net/ethernet/cadence/macb_main.c:2299:26: note: in expansion of macro ‘min’ 2299 | hdrlen = min(skb_headlen(skb), bp->max_tx_length); | ^~~ Signed-off-by: Théo Lebrun Link: https://patch.msgid.link/20251014-macb-cleanup-v1-11-31cd266e22cd@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 98e28d51a6e12..6c6bc6aa23c71 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -2035,7 +2035,7 @@ static unsigned int macb_tx_map(struct macb *bp, count++; tx_head++; - size = min(len, bp->max_tx_length); + size = umin(len, bp->max_tx_length); } /* Then, map paged data from fragments */ @@ -2045,7 +2045,7 @@ static unsigned int macb_tx_map(struct macb *bp, len = skb_frag_size(frag); offset = 0; while (len) { - size = min(len, bp->max_tx_length); + size = umin(len, bp->max_tx_length); entry = macb_tx_ring_wrap(bp, tx_head); tx_skb = &queue->tx_skb[entry]; @@ -2301,7 +2301,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_BUSY; } } else - hdrlen = min(skb_headlen(skb), bp->max_tx_length); + hdrlen = umin(skb_headlen(skb), bp->max_tx_length); #if defined(DEBUG) && defined(VERBOSE_DEBUG) netdev_vdbg(bp->dev, @@ -4573,8 +4573,8 @@ static int macb_init(struct platform_device *pdev) * each 4-tuple define requires 1 T2 screener reg + 3 compare regs */ reg = gem_readl(bp, DCFG8); - bp->max_tuples = min((GEM_BFEXT(SCR2CMP, reg) / 3), - GEM_BFEXT(T2SCR, reg)); + bp->max_tuples = umin((GEM_BFEXT(SCR2CMP, reg) / 3), + GEM_BFEXT(T2SCR, reg)); INIT_LIST_HEAD(&bp->rx_fs_list.list); if (bp->max_tuples > 0) { /* also needs one ethtype match to check IPv4 */ From 027202adf07952d9b4d7a3199fe1014a537e7a81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Tue, 14 Oct 2025 17:25:13 +0200 Subject: [PATCH 082/867] net: macb: drop `entry` local variable in macb_tx_map() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pattern: entry = macb_tx_ring_wrap(bp, i); tx_skb = &queue->tx_skb[entry]; is the exact definition of: macb_tx_skb(queue, i); The pattern: entry = macb_tx_ring_wrap(bp, i); desc = macb_tx_desc(queue, entry); is redundant because macb_tx_desc() calls macb_tx_ring_wrap(). One explicit call to macb_tx_ring_wrap() is still required for checking if it is the last buffer (TX_WRAP case). Signed-off-by: Théo Lebrun Link: https://patch.msgid.link/20251014-macb-cleanup-v1-12-31cd266e22cd@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 6c6bc6aa23c71..08e541d8f8e68 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1989,7 +1989,7 @@ static unsigned int macb_tx_map(struct macb *bp, unsigned int hdrlen) { dma_addr_t mapping; - unsigned int len, entry, i, tx_head = queue->tx_head; + unsigned int len, i, tx_head = queue->tx_head; struct macb_tx_skb *tx_skb = NULL; struct macb_dma_desc *desc; unsigned int offset, size, count = 0; @@ -2015,8 +2015,7 @@ static unsigned int macb_tx_map(struct macb *bp, offset = 0; while (len) { - entry = macb_tx_ring_wrap(bp, tx_head); - tx_skb = &queue->tx_skb[entry]; + tx_skb = macb_tx_skb(queue, tx_head); mapping = dma_map_single(&bp->pdev->dev, skb->data + offset, @@ -2046,8 +2045,7 @@ static unsigned int macb_tx_map(struct macb *bp, offset = 0; while (len) { size = umin(len, bp->max_tx_length); - entry = macb_tx_ring_wrap(bp, tx_head); - tx_skb = &queue->tx_skb[entry]; + tx_skb = macb_tx_skb(queue, tx_head); mapping = skb_frag_dma_map(&bp->pdev->dev, frag, offset, size, DMA_TO_DEVICE); @@ -2084,9 +2082,8 @@ static unsigned int macb_tx_map(struct macb *bp, * to set the end of TX queue */ i = tx_head; - entry = macb_tx_ring_wrap(bp, i); ctrl = MACB_BIT(TX_USED); - desc = macb_tx_desc(queue, entry); + desc = macb_tx_desc(queue, i); desc->ctrl = ctrl; if (lso_ctrl) { @@ -2106,16 +2103,15 @@ static unsigned int macb_tx_map(struct macb *bp, do { i--; - entry = macb_tx_ring_wrap(bp, i); - tx_skb = &queue->tx_skb[entry]; - desc = macb_tx_desc(queue, entry); + tx_skb = macb_tx_skb(queue, i); + desc = macb_tx_desc(queue, i); ctrl = (u32)tx_skb->size; if (eof) { ctrl |= MACB_BIT(TX_LAST); eof = 0; } - if (unlikely(entry == (bp->tx_ring_size - 1))) + if (unlikely(macb_tx_ring_wrap(bp, i) == bp->tx_ring_size - 1)) ctrl |= MACB_BIT(TX_WRAP); /* First descriptor is header descriptor */ From b5fe4f3e5912d79832bc37b207234aaf84c1b789 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Tue, 14 Oct 2025 17:25:14 +0200 Subject: [PATCH 083/867] net: macb: drop `count` local variable in macb_tx_map() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Local variable `count` is useless: it counts number of DMA descriptors used and returns it. But the return value is only checked for error. Drop counting the number of DMA descriptors and return a usual negative-if-error integer. Signed-off-by: Théo Lebrun Link: https://patch.msgid.link/20251014-macb-cleanup-v1-13-31cd266e22cd@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 08e541d8f8e68..dd3b13fa30471 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1992,7 +1992,7 @@ static unsigned int macb_tx_map(struct macb *bp, unsigned int len, i, tx_head = queue->tx_head; struct macb_tx_skb *tx_skb = NULL; struct macb_dma_desc *desc; - unsigned int offset, size, count = 0; + unsigned int offset, size; unsigned int f, nr_frags = skb_shinfo(skb)->nr_frags; unsigned int eof = 1, mss_mfs = 0; u32 ctrl, lso_ctrl = 0, seq_ctrl = 0; @@ -2031,7 +2031,6 @@ static unsigned int macb_tx_map(struct macb *bp, len -= size; offset += size; - count++; tx_head++; size = umin(len, bp->max_tx_length); @@ -2060,7 +2059,6 @@ static unsigned int macb_tx_map(struct macb *bp, len -= size; offset += size; - count++; tx_head++; } } @@ -2139,7 +2137,7 @@ static unsigned int macb_tx_map(struct macb *bp, queue->tx_head = tx_head; - return count; + return 0; dma_error: netdev_err(bp->dev, "TX DMA map failed\n"); @@ -2150,7 +2148,7 @@ static unsigned int macb_tx_map(struct macb *bp, macb_tx_unmap(bp, tx_skb, 0); } - return 0; + return -ENOMEM; } static netdev_features_t macb_features_check(struct sk_buff *skb, @@ -2336,7 +2334,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev) } /* Map socket buffer for DMA transfer */ - if (!macb_tx_map(bp, queue, skb, hdrlen)) { + if (macb_tx_map(bp, queue, skb, hdrlen)) { dev_kfree_skb_any(skb); goto unlock; } From 1ce9662e31fdae96df543b22ecbc1b48f6db753a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Tue, 14 Oct 2025 17:25:15 +0200 Subject: [PATCH 084/867] net: macb: apply reverse christmas tree in macb_tx_map() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The arguments grew over time; follow conventions and apply reverse christmas tree (RCT). Signed-off-by: Théo Lebrun Link: https://patch.msgid.link/20251014-macb-cleanup-v1-14-31cd266e22cd@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index dd3b13fa30471..e15fcdd43d778 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1988,14 +1988,14 @@ static unsigned int macb_tx_map(struct macb *bp, struct sk_buff *skb, unsigned int hdrlen) { - dma_addr_t mapping; + unsigned int f, nr_frags = skb_shinfo(skb)->nr_frags; unsigned int len, i, tx_head = queue->tx_head; + u32 ctrl, lso_ctrl = 0, seq_ctrl = 0; + unsigned int eof = 1, mss_mfs = 0; struct macb_tx_skb *tx_skb = NULL; struct macb_dma_desc *desc; unsigned int offset, size; - unsigned int f, nr_frags = skb_shinfo(skb)->nr_frags; - unsigned int eof = 1, mss_mfs = 0; - u32 ctrl, lso_ctrl = 0, seq_ctrl = 0; + dma_addr_t mapping; /* LSO */ if (skb_shinfo(skb)->gso_size != 0) { From 8ebeef3d01c8b9e5807afdf1d38547f4625d0e4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Tue, 14 Oct 2025 17:25:16 +0200 Subject: [PATCH 085/867] net: macb: sort #includes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sort #include preprocessor directives. Reviewed-by: Andrew Lunn Reviewed-by: Sean Anderson Signed-off-by: Théo Lebrun Link: https://patch.msgid.link/20251014-macb-cleanup-v1-15-31cd266e22cd@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 37 ++++++++++++------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index e15fcdd43d778..214f543af3b8f 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -6,36 +6,37 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include +#include #include +#include #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include -#include #include +#include +#include +#include +#include +#include +#include #include -#include -#include -#include -#include #include #include #include -#include -#include -#include -#include #include +#include +#include #include #include #include -#include -#include +#include +#include +#include +#include #include #include "macb.h" From 9271d0ea07c27e6f482f20b615c2a4bba991e68c Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 6 Aug 2025 11:38:07 +0200 Subject: [PATCH 086/867] can: m_can: add support for optional reset This patch has been split from the original series [1]. In some SoCs (observed on the STM32MP15) the M_CAN IP core keeps the CAN state and CAN error counters over an internal reset cycle. The STM32MP15 SoC provides an external reset, which is shared between both M_CAN cores. Add support for an optional external reset. Take care of shared resets, de-assert reset during the probe phase in m_can_class_register() and while the interface is up, assert the reset otherwise. [1] https://lore.kernel.org/all/20250923-m_can-fix-state-handling-v3-0-06d8baccadbf@pengutronix.de Reviewed-by: Philipp Zabel Reviewed-by: Markus Schneider-Pargmann Link: https://patch.msgid.link/20251008-m_can-add-reset-v1-1-49f0bbf820c4@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 27 ++++++++++++++++++++++++--- drivers/net/can/m_can/m_can.h | 1 + 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index ad4f577c1ef78..48b7a67336b5e 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "m_can.h" @@ -1827,6 +1828,7 @@ static int m_can_close(struct net_device *dev) close_candev(dev); + reset_control_assert(cdev->rst); m_can_clk_stop(cdev); phy_power_off(cdev->transceiver); @@ -2069,11 +2071,15 @@ static int m_can_open(struct net_device *dev) if (err) goto out_phy_power_off; + err = reset_control_deassert(cdev->rst); + if (err) + goto exit_disable_clks; + /* open the can device */ err = open_candev(dev); if (err) { netdev_err(dev, "failed to open can device\n"); - goto exit_disable_clks; + goto out_reset_control_assert; } if (cdev->is_peripheral) @@ -2129,6 +2135,8 @@ static int m_can_open(struct net_device *dev) else napi_disable(&cdev->napi); close_candev(dev); +out_reset_control_assert: + reset_control_assert(cdev->rst); exit_disable_clks: m_can_clk_stop(cdev); out_phy_power_off: @@ -2417,15 +2425,24 @@ int m_can_class_register(struct m_can_classdev *cdev) } } + cdev->rst = devm_reset_control_get_optional_shared(cdev->dev, NULL); + if (IS_ERR(cdev->rst)) + return dev_err_probe(cdev->dev, PTR_ERR(cdev->rst), + "Failed to get reset line\n"); + ret = m_can_clk_start(cdev); if (ret) return ret; + ret = reset_control_deassert(cdev->rst); + if (ret) + goto clk_disable; + if (cdev->is_peripheral) { ret = can_rx_offload_add_manual(cdev->net, &cdev->offload, NAPI_POLL_WEIGHT); if (ret) - goto clk_disable; + goto out_reset_control_assert; } if (!cdev->net->irq) { @@ -2454,8 +2471,10 @@ int m_can_class_register(struct m_can_classdev *cdev) KBUILD_MODNAME, cdev->net->irq, cdev->version); /* Probe finished - * Stop clocks. They will be reactivated once the M_CAN device is opened + * Assert reset and stop clocks. + * They will be reactivated once the M_CAN device is opened */ + reset_control_assert(cdev->rst); m_can_clk_stop(cdev); return 0; @@ -2463,6 +2482,8 @@ int m_can_class_register(struct m_can_classdev *cdev) rx_offload_del: if (cdev->is_peripheral) can_rx_offload_del(&cdev->offload); +out_reset_control_assert: + reset_control_assert(cdev->rst); clk_disable: m_can_clk_stop(cdev); diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h index bd4746c63af3f..7b7600697c6bc 100644 --- a/drivers/net/can/m_can/m_can.h +++ b/drivers/net/can/m_can/m_can.h @@ -86,6 +86,7 @@ struct m_can_classdev { struct device *dev; struct clk *hclk; struct clk *cclk; + struct reset_control *rst; struct workqueue_struct *tx_wq; struct phy *transceiver; From f968a24cad3da72fdff12a0ae5ac0b679439cca1 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Fri, 3 Oct 2025 12:16:38 +0900 Subject: [PATCH 087/867] can: treewide: remove can_change_mtu() can_change_mtu() became obsolete by commit 23049938605b ("can: populate the minimum and maximum MTU values"). Now that net_device->min_mtu and net_device->max_mtu are populated, all the checks are already done by dev_validate_mtu() in net/core/dev.c. Remove the net_device_ops->ndo_change_mtu() callback of all the physical interfaces, then remove can_change_mtu(). Only keep the vcan_change_mtu() and vxcan_change_mtu() because the virtual interfaces use their own different MTU logic. The only functional change this patch introduces is that now the user will be able to change the MTU even if the interface is up. This does not matter for Classical CAN and CAN FD because their MTU range is composed of only one value, respectively CAN_MTU and CANFD_MTU. For the upcoming CAN XL, the MTU will be configurable within the CANXL_MIN_MTU to CANXL_MAX_MTU range at any time, even if the interface is up. This is consistent with the other net protocols and does not contradict ISO 11898-1:2024 as having a modifiable MTU is a kernel extension. Signed-off-by: Vincent Mailhol Link: https://patch.msgid.link/20251003-remove-can_change_mtu-v1-1-337f8bc21181@kernel.org Signed-off-by: Marc Kleine-Budde --- drivers/net/can/at91_can.c | 1 - drivers/net/can/bxcan.c | 1 - drivers/net/can/c_can/c_can_main.c | 1 - drivers/net/can/can327.c | 1 - drivers/net/can/cc770/cc770.c | 1 - drivers/net/can/ctucanfd/ctucanfd_base.c | 1 - drivers/net/can/dev/dev.c | 38 ------------------- drivers/net/can/esd/esd_402_pci-core.c | 1 - drivers/net/can/flexcan/flexcan-core.c | 1 - drivers/net/can/grcan.c | 1 - drivers/net/can/ifi_canfd/ifi_canfd.c | 1 - drivers/net/can/janz-ican3.c | 1 - .../can/kvaser_pciefd/kvaser_pciefd_core.c | 1 - drivers/net/can/m_can/m_can.c | 1 - drivers/net/can/mscan/mscan.c | 1 - drivers/net/can/peak_canfd/peak_canfd.c | 1 - drivers/net/can/rcar/rcar_can.c | 1 - drivers/net/can/rcar/rcar_canfd.c | 1 - .../net/can/rockchip/rockchip_canfd-core.c | 1 - drivers/net/can/sja1000/sja1000.c | 1 - drivers/net/can/slcan/slcan-core.c | 1 - drivers/net/can/softing/softing_main.c | 1 - drivers/net/can/spi/hi311x.c | 1 - drivers/net/can/spi/mcp251x.c | 1 - .../net/can/spi/mcp251xfd/mcp251xfd-core.c | 1 - drivers/net/can/sun4i_can.c | 1 - drivers/net/can/ti_hecc.c | 1 - drivers/net/can/usb/ems_usb.c | 1 - drivers/net/can/usb/esd_usb.c | 1 - drivers/net/can/usb/etas_es58x/es58x_core.c | 1 - drivers/net/can/usb/f81604.c | 1 - drivers/net/can/usb/gs_usb.c | 1 - .../net/can/usb/kvaser_usb/kvaser_usb_core.c | 1 - drivers/net/can/usb/mcba_usb.c | 1 - drivers/net/can/usb/nct6694_canfd.c | 1 - drivers/net/can/usb/peak_usb/pcan_usb_core.c | 1 - drivers/net/can/usb/ucan.c | 1 - drivers/net/can/usb/usb_8dev.c | 1 - drivers/net/can/xilinx_can.c | 1 - include/linux/can/dev.h | 1 - 40 files changed, 77 deletions(-) diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c index 191707d7e3dac..c2a3a4eef5b28 100644 --- a/drivers/net/can/at91_can.c +++ b/drivers/net/can/at91_can.c @@ -948,7 +948,6 @@ static const struct net_device_ops at91_netdev_ops = { .ndo_open = at91_open, .ndo_stop = at91_close, .ndo_start_xmit = at91_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops at91_ethtool_ops = { diff --git a/drivers/net/can/bxcan.c b/drivers/net/can/bxcan.c index bfc60eb33dc37..9c3af70498143 100644 --- a/drivers/net/can/bxcan.c +++ b/drivers/net/can/bxcan.c @@ -881,7 +881,6 @@ static const struct net_device_ops bxcan_netdev_ops = { .ndo_open = bxcan_open, .ndo_stop = bxcan_stop, .ndo_start_xmit = bxcan_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops bxcan_ethtool_ops = { diff --git a/drivers/net/can/c_can/c_can_main.c b/drivers/net/can/c_can/c_can_main.c index cc371d0c9f3c7..3702cac7fbf0f 100644 --- a/drivers/net/can/c_can/c_can_main.c +++ b/drivers/net/can/c_can/c_can_main.c @@ -1362,7 +1362,6 @@ static const struct net_device_ops c_can_netdev_ops = { .ndo_open = c_can_open, .ndo_stop = c_can_close, .ndo_start_xmit = c_can_start_xmit, - .ndo_change_mtu = can_change_mtu, }; int register_c_can_dev(struct net_device *dev) diff --git a/drivers/net/can/can327.c b/drivers/net/can/can327.c index 24af639610309..b66fc16aedd2c 100644 --- a/drivers/net/can/can327.c +++ b/drivers/net/can/can327.c @@ -849,7 +849,6 @@ static const struct net_device_ops can327_netdev_ops = { .ndo_open = can327_netdev_open, .ndo_stop = can327_netdev_close, .ndo_start_xmit = can327_netdev_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops can327_ethtool_ops = { diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c index 30909f3aab576..8d5abd643c068 100644 --- a/drivers/net/can/cc770/cc770.c +++ b/drivers/net/can/cc770/cc770.c @@ -834,7 +834,6 @@ static const struct net_device_ops cc770_netdev_ops = { .ndo_open = cc770_open, .ndo_stop = cc770_close, .ndo_start_xmit = cc770_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops cc770_ethtool_ops = { diff --git a/drivers/net/can/ctucanfd/ctucanfd_base.c b/drivers/net/can/ctucanfd/ctucanfd_base.c index 8bd3f0fc385c3..1e6b9e3dc2fea 100644 --- a/drivers/net/can/ctucanfd/ctucanfd_base.c +++ b/drivers/net/can/ctucanfd/ctucanfd_base.c @@ -1301,7 +1301,6 @@ static const struct net_device_ops ctucan_netdev_ops = { .ndo_open = ctucan_open, .ndo_stop = ctucan_close, .ndo_start_xmit = ctucan_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops ctucan_ethtool_ops = { diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c index 15ccedbb3f8dd..0cc3d008adb35 100644 --- a/drivers/net/can/dev/dev.c +++ b/drivers/net/can/dev/dev.c @@ -359,44 +359,6 @@ void can_set_default_mtu(struct net_device *dev) } } -/* changing MTU and control mode for CAN/CANFD devices */ -int can_change_mtu(struct net_device *dev, int new_mtu) -{ - struct can_priv *priv = netdev_priv(dev); - u32 ctrlmode_static = can_get_static_ctrlmode(priv); - - /* Do not allow changing the MTU while running */ - if (dev->flags & IFF_UP) - return -EBUSY; - - /* allow change of MTU according to the CANFD ability of the device */ - switch (new_mtu) { - case CAN_MTU: - /* 'CANFD-only' controllers can not switch to CAN_MTU */ - if (ctrlmode_static & CAN_CTRLMODE_FD) - return -EINVAL; - - priv->ctrlmode &= ~CAN_CTRLMODE_FD; - break; - - case CANFD_MTU: - /* check for potential CANFD ability */ - if (!(priv->ctrlmode_supported & CAN_CTRLMODE_FD) && - !(ctrlmode_static & CAN_CTRLMODE_FD)) - return -EINVAL; - - priv->ctrlmode |= CAN_CTRLMODE_FD; - break; - - default: - return -EINVAL; - } - - WRITE_ONCE(dev->mtu, new_mtu); - return 0; -} -EXPORT_SYMBOL_GPL(can_change_mtu); - /* helper to define static CAN controller features at device creation time */ int can_set_static_ctrlmode(struct net_device *dev, u32 static_mode) { diff --git a/drivers/net/can/esd/esd_402_pci-core.c b/drivers/net/can/esd/esd_402_pci-core.c index 5d6d2828cd045..05adecae63757 100644 --- a/drivers/net/can/esd/esd_402_pci-core.c +++ b/drivers/net/can/esd/esd_402_pci-core.c @@ -86,7 +86,6 @@ static const struct net_device_ops pci402_acc_netdev_ops = { .ndo_open = acc_open, .ndo_stop = acc_close, .ndo_start_xmit = acc_start_xmit, - .ndo_change_mtu = can_change_mtu, .ndo_eth_ioctl = can_eth_ioctl_hwts, }; diff --git a/drivers/net/can/flexcan/flexcan-core.c b/drivers/net/can/flexcan/flexcan-core.c index 06d5d35fc1b55..f5d22c61503fa 100644 --- a/drivers/net/can/flexcan/flexcan-core.c +++ b/drivers/net/can/flexcan/flexcan-core.c @@ -1867,7 +1867,6 @@ static const struct net_device_ops flexcan_netdev_ops = { .ndo_open = flexcan_open, .ndo_stop = flexcan_close, .ndo_start_xmit = flexcan_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static int register_flexcandev(struct net_device *dev) diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c index c5784d9779ef5..3b1b09943436f 100644 --- a/drivers/net/can/grcan.c +++ b/drivers/net/can/grcan.c @@ -1561,7 +1561,6 @@ static const struct net_device_ops grcan_netdev_ops = { .ndo_open = grcan_open, .ndo_stop = grcan_close, .ndo_start_xmit = grcan_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops grcan_ethtool_ops = { diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c index 2eeee65f606f3..0f83335e4d075 100644 --- a/drivers/net/can/ifi_canfd/ifi_canfd.c +++ b/drivers/net/can/ifi_canfd/ifi_canfd.c @@ -944,7 +944,6 @@ static const struct net_device_ops ifi_canfd_netdev_ops = { .ndo_open = ifi_canfd_open, .ndo_stop = ifi_canfd_close, .ndo_start_xmit = ifi_canfd_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops ifi_canfd_ethtool_ops = { diff --git a/drivers/net/can/janz-ican3.c b/drivers/net/can/janz-ican3.c index bfa5cbe88017d..1efdd1fd8caa4 100644 --- a/drivers/net/can/janz-ican3.c +++ b/drivers/net/can/janz-ican3.c @@ -1752,7 +1752,6 @@ static const struct net_device_ops ican3_netdev_ops = { .ndo_open = ican3_open, .ndo_stop = ican3_stop, .ndo_start_xmit = ican3_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops ican3_ethtool_ops = { diff --git a/drivers/net/can/kvaser_pciefd/kvaser_pciefd_core.c b/drivers/net/can/kvaser_pciefd/kvaser_pciefd_core.c index 0880023611beb..705f9bb74cd23 100644 --- a/drivers/net/can/kvaser_pciefd/kvaser_pciefd_core.c +++ b/drivers/net/can/kvaser_pciefd/kvaser_pciefd_core.c @@ -904,7 +904,6 @@ static const struct net_device_ops kvaser_pciefd_netdev_ops = { .ndo_stop = kvaser_pciefd_stop, .ndo_eth_ioctl = can_eth_ioctl_hwts, .ndo_start_xmit = kvaser_pciefd_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static int kvaser_pciefd_set_phys_id(struct net_device *netdev, diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 48b7a67336b5e..873f5991fc5ab 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2148,7 +2148,6 @@ static const struct net_device_ops m_can_netdev_ops = { .ndo_open = m_can_open, .ndo_stop = m_can_close, .ndo_start_xmit = m_can_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static int m_can_get_coalesce(struct net_device *dev, diff --git a/drivers/net/can/mscan/mscan.c b/drivers/net/can/mscan/mscan.c index 8c2a7bc64d3d7..39c7aa2a0b2f8 100644 --- a/drivers/net/can/mscan/mscan.c +++ b/drivers/net/can/mscan/mscan.c @@ -607,7 +607,6 @@ static const struct net_device_ops mscan_netdev_ops = { .ndo_open = mscan_open, .ndo_stop = mscan_close, .ndo_start_xmit = mscan_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops mscan_ethtool_ops = { diff --git a/drivers/net/can/peak_canfd/peak_canfd.c b/drivers/net/can/peak_canfd/peak_canfd.c index b5bc80ac7876a..a53c9d347b7b3 100644 --- a/drivers/net/can/peak_canfd/peak_canfd.c +++ b/drivers/net/can/peak_canfd/peak_canfd.c @@ -773,7 +773,6 @@ static const struct net_device_ops peak_canfd_netdev_ops = { .ndo_stop = peak_canfd_close, .ndo_eth_ioctl = peak_eth_ioctl, .ndo_start_xmit = peak_canfd_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static int peak_get_ts_info(struct net_device *dev, diff --git a/drivers/net/can/rcar/rcar_can.c b/drivers/net/can/rcar/rcar_can.c index 5f85f4e272054..fc3df328e877c 100644 --- a/drivers/net/can/rcar/rcar_can.c +++ b/drivers/net/can/rcar/rcar_can.c @@ -635,7 +635,6 @@ static const struct net_device_ops rcar_can_netdev_ops = { .ndo_open = rcar_can_open, .ndo_stop = rcar_can_close, .ndo_start_xmit = rcar_can_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops rcar_can_ethtool_ops = { diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c index 45d36adb51b78..49ab65274b51d 100644 --- a/drivers/net/can/rcar/rcar_canfd.c +++ b/drivers/net/can/rcar/rcar_canfd.c @@ -1818,7 +1818,6 @@ static const struct net_device_ops rcar_canfd_netdev_ops = { .ndo_open = rcar_canfd_open, .ndo_stop = rcar_canfd_close, .ndo_start_xmit = rcar_canfd_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops rcar_canfd_ethtool_ops = { diff --git a/drivers/net/can/rockchip/rockchip_canfd-core.c b/drivers/net/can/rockchip/rockchip_canfd-core.c index 046f0a0ae4d4b..29de0c01e4edc 100644 --- a/drivers/net/can/rockchip/rockchip_canfd-core.c +++ b/drivers/net/can/rockchip/rockchip_canfd-core.c @@ -761,7 +761,6 @@ static const struct net_device_ops rkcanfd_netdev_ops = { .ndo_open = rkcanfd_open, .ndo_stop = rkcanfd_stop, .ndo_start_xmit = rkcanfd_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static int __maybe_unused rkcanfd_runtime_suspend(struct device *dev) diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index 4d245857ef1ce..acfa49db3907e 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -697,7 +697,6 @@ static const struct net_device_ops sja1000_netdev_ops = { .ndo_open = sja1000_open, .ndo_stop = sja1000_close, .ndo_start_xmit = sja1000_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops sja1000_ethtool_ops = { diff --git a/drivers/net/can/slcan/slcan-core.c b/drivers/net/can/slcan/slcan-core.c index 58ff2ec1d9757..cd789e178d34b 100644 --- a/drivers/net/can/slcan/slcan-core.c +++ b/drivers/net/can/slcan/slcan-core.c @@ -774,7 +774,6 @@ static const struct net_device_ops slcan_netdev_ops = { .ndo_open = slcan_netdev_open, .ndo_stop = slcan_netdev_close, .ndo_start_xmit = slcan_netdev_xmit, - .ndo_change_mtu = can_change_mtu, }; /****************************************** diff --git a/drivers/net/can/softing/softing_main.c b/drivers/net/can/softing/softing_main.c index 278ee8722770c..79bc64395ac46 100644 --- a/drivers/net/can/softing/softing_main.c +++ b/drivers/net/can/softing/softing_main.c @@ -609,7 +609,6 @@ static const struct net_device_ops softing_netdev_ops = { .ndo_open = softing_netdev_open, .ndo_stop = softing_netdev_stop, .ndo_start_xmit = softing_netdev_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops softing_ethtool_ops = { diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c index 6d4b643e135fd..e00d3dbc4cf43 100644 --- a/drivers/net/can/spi/hi311x.c +++ b/drivers/net/can/spi/hi311x.c @@ -799,7 +799,6 @@ static const struct net_device_ops hi3110_netdev_ops = { .ndo_open = hi3110_open, .ndo_stop = hi3110_stop, .ndo_start_xmit = hi3110_hard_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops hi3110_ethtool_ops = { diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index b797e08499d70..1e54e1a22702a 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -1270,7 +1270,6 @@ static const struct net_device_ops mcp251x_netdev_ops = { .ndo_open = mcp251x_open, .ndo_stop = mcp251x_stop, .ndo_start_xmit = mcp251x_hard_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops mcp251x_ethtool_ops = { diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 7450ea42c1ea5..9402530ba3d48 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1715,7 +1715,6 @@ static const struct net_device_ops mcp251xfd_netdev_ops = { .ndo_stop = mcp251xfd_stop, .ndo_start_xmit = mcp251xfd_start_xmit, .ndo_eth_ioctl = can_eth_ioctl_hwts, - .ndo_change_mtu = can_change_mtu, }; static void diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c index 53bfd873de9bd..6fcb301ef611d 100644 --- a/drivers/net/can/sun4i_can.c +++ b/drivers/net/can/sun4i_can.c @@ -768,7 +768,6 @@ static const struct net_device_ops sun4ican_netdev_ops = { .ndo_open = sun4ican_open, .ndo_stop = sun4ican_close, .ndo_start_xmit = sun4ican_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops sun4ican_ethtool_ops = { diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index e6d6661a908ab..1d3dbf28b1057 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -829,7 +829,6 @@ static const struct net_device_ops ti_hecc_netdev_ops = { .ndo_open = ti_hecc_open, .ndo_stop = ti_hecc_close, .ndo_start_xmit = ti_hecc_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops ti_hecc_ethtool_ops = { diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 5355bac4dccbe..de8e212a1366b 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -885,7 +885,6 @@ static const struct net_device_ops ems_usb_netdev_ops = { .ndo_open = ems_usb_open, .ndo_stop = ems_usb_close, .ndo_start_xmit = ems_usb_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops ems_usb_ethtool_ops = { diff --git a/drivers/net/can/usb/esd_usb.c b/drivers/net/can/usb/esd_usb.c index 9bc1824d7be6a..08da507faef4a 100644 --- a/drivers/net/can/usb/esd_usb.c +++ b/drivers/net/can/usb/esd_usb.c @@ -1011,7 +1011,6 @@ static const struct net_device_ops esd_usb_netdev_ops = { .ndo_open = esd_usb_open, .ndo_stop = esd_usb_close, .ndo_start_xmit = esd_usb_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops esd_usb_ethtool_ops = { diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c index adc91873c083f..47d9e03f30449 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_core.c +++ b/drivers/net/can/usb/etas_es58x/es58x_core.c @@ -1977,7 +1977,6 @@ static const struct net_device_ops es58x_netdev_ops = { .ndo_stop = es58x_stop, .ndo_start_xmit = es58x_start_xmit, .ndo_eth_ioctl = can_eth_ioctl_hwts, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops es58x_ethtool_ops = { diff --git a/drivers/net/can/usb/f81604.c b/drivers/net/can/usb/f81604.c index e0cfa1460b0b8..efe61ece79ea2 100644 --- a/drivers/net/can/usb/f81604.c +++ b/drivers/net/can/usb/f81604.c @@ -1052,7 +1052,6 @@ static const struct net_device_ops f81604_netdev_ops = { .ndo_open = f81604_open, .ndo_stop = f81604_close, .ndo_start_xmit = f81604_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct can_bittiming_const f81604_bittiming_const = { diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 69b8d6da651bf..30608901a9748 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -1101,7 +1101,6 @@ static const struct net_device_ops gs_usb_netdev_ops = { .ndo_open = gs_can_open, .ndo_stop = gs_can_close, .ndo_start_xmit = gs_can_start_xmit, - .ndo_change_mtu = can_change_mtu, .ndo_eth_ioctl = gs_can_eth_ioctl, }; diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c index 90e77fa0ff4a5..89e22b66f9192 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c @@ -786,7 +786,6 @@ static const struct net_device_ops kvaser_usb_netdev_ops = { .ndo_stop = kvaser_usb_close, .ndo_eth_ioctl = can_eth_ioctl_hwts, .ndo_start_xmit = kvaser_usb_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops kvaser_usb_ethtool_ops = { diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 1f9b915094e64..41c0a1c399bf3 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -761,7 +761,6 @@ static const struct net_device_ops mcba_netdev_ops = { .ndo_open = mcba_usb_open, .ndo_stop = mcba_usb_close, .ndo_start_xmit = mcba_usb_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops mcba_ethtool_ops = { diff --git a/drivers/net/can/usb/nct6694_canfd.c b/drivers/net/can/usb/nct6694_canfd.c index 8deff16491a1a..dd6df2ec3742e 100644 --- a/drivers/net/can/usb/nct6694_canfd.c +++ b/drivers/net/can/usb/nct6694_canfd.c @@ -690,7 +690,6 @@ static const struct net_device_ops nct6694_canfd_netdev_ops = { .ndo_open = nct6694_canfd_open, .ndo_stop = nct6694_canfd_close, .ndo_start_xmit = nct6694_canfd_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops nct6694_canfd_ethtool_ops = { diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index c74302ca7cee9..94b1d7f15d27d 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -814,7 +814,6 @@ static const struct net_device_ops peak_usb_netdev_ops = { .ndo_stop = peak_usb_ndo_stop, .ndo_eth_ioctl = peak_eth_ioctl, .ndo_start_xmit = peak_usb_ndo_start_xmit, - .ndo_change_mtu = can_change_mtu, }; /* CAN-USB devices generally handle 32-bit CAN channel IDs. diff --git a/drivers/net/can/usb/ucan.c b/drivers/net/can/usb/ucan.c index 07406daf7c88e..de61d9da99e35 100644 --- a/drivers/net/can/usb/ucan.c +++ b/drivers/net/can/usb/ucan.c @@ -1233,7 +1233,6 @@ static const struct net_device_ops ucan_netdev_ops = { .ndo_open = ucan_open, .ndo_stop = ucan_close, .ndo_start_xmit = ucan_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops ucan_ethtool_ops = { diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index 8a5596ce4e463..7449328f7cd72 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -868,7 +868,6 @@ static const struct net_device_ops usb_8dev_netdev_ops = { .ndo_open = usb_8dev_open, .ndo_stop = usb_8dev_close, .ndo_start_xmit = usb_8dev_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops usb_8dev_ethtool_ops = { diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index a25a3ca62c12e..43d7f22820b88 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -1702,7 +1702,6 @@ static const struct net_device_ops xcan_netdev_ops = { .ndo_open = xcan_open, .ndo_stop = xcan_close, .ndo_start_xmit = xcan_start_xmit, - .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops xcan_ethtool_ops = { diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index a2229a61ccde8..0fe8f80f223e2 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -127,7 +127,6 @@ struct can_priv *safe_candev_priv(struct net_device *dev); int open_candev(struct net_device *dev); void close_candev(struct net_device *dev); void can_set_default_mtu(struct net_device *dev); -int can_change_mtu(struct net_device *dev, int new_mtu); int __must_check can_set_static_ctrlmode(struct net_device *dev, u32 static_mode); int can_eth_ioctl_hwts(struct net_device *netdev, struct ifreq *ifr, int cmd); From 73cc2882b644e5302237cc6ee1f8885cfb387245 Mon Sep 17 00:00:00 2001 From: "Markus Schneider-Pargmann (TI.com)" Date: Wed, 1 Oct 2025 16:30:19 +0200 Subject: [PATCH 088/867] dt-bindings: can: m_can: Add wakeup properties The pins associated with m_can have to have a special configuration to be able to wakeup the SoC from some system states. This configuration is described in the wakeup pinctrl state while the default state describes the default configuration. Also add the sleep state which is already in use by some devicetrees. Also m_can can be a wakeup-source if capable of wakeup. Signed-off-by: Markus Schneider-Pargmann (TI.com) Reviewed-by: Dhruva Gole Reviewed-by: Rob Herring (Arm) Link: https://patch.msgid.link/20251001-topic-mcan-wakeup-source-v6-12-v10-1-4ab508ac5d1e@baylibre.com Signed-off-by: Marc Kleine-Budde --- .../bindings/net/can/bosch,m_can.yaml | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml index 61ef60d8f1c78..2c9d37975bedd 100644 --- a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml +++ b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml @@ -109,6 +109,26 @@ properties: maximum: 32 minItems: 1 + pinctrl-0: + description: Default pinctrl state + + pinctrl-1: + description: Can be "sleep" or "wakeup" pinctrl state + + pinctrl-2: + description: Can be "sleep" or "wakeup" pinctrl state + + pinctrl-names: + description: + When present should contain at least "default" describing the default pin + states. Other states are "sleep" which describes the pinstate when + sleeping and "wakeup" describing the pins if wakeup is enabled. + minItems: 1 + items: + - const: default + - enum: [ sleep, wakeup ] + - const: wakeup + power-domains: description: Power domain provider node and an args specifier containing @@ -125,6 +145,11 @@ properties: minItems: 1 maxItems: 2 + wakeup-source: + $ref: /schemas/types.yaml#/definitions/phandle-array + description: + List of phandles to system idle states in which mcan can wakeup the system. + required: - compatible - reg From 04d5826b074e09c69895fc247ce13925bd7a7284 Mon Sep 17 00:00:00 2001 From: "Markus Schneider-Pargmann (TI.com)" Date: Wed, 1 Oct 2025 16:30:20 +0200 Subject: [PATCH 089/867] can: m_can: Map WoL to device_set_wakeup_enable In some devices the pins of the m_can module can act as a wakeup source. This patch helps do that by connecting the PHY_WAKE WoL option to device_set_wakeup_enable. By marking this device as being wakeup enabled, this setting can be used by platform code to decide which sleep or poweroff mode to use. Also this prepares the driver for the next patch in which the pinctrl settings are changed depending on the desired wakeup source. Reviewed-by: Vincent Mailhol Reviewed-by: Kendall Willis Signed-off-by: Markus Schneider-Pargmann (TI.com) Link: https://patch.msgid.link/20251001-topic-mcan-wakeup-source-v6-12-v10-2-4ab508ac5d1e@baylibre.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 37 +++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 873f5991fc5ab..f00bdec3246a1 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2238,6 +2238,36 @@ static int m_can_set_coalesce(struct net_device *dev, return 0; } +static void m_can_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) +{ + struct m_can_classdev *cdev = netdev_priv(dev); + + wol->supported = device_can_wakeup(cdev->dev) ? WAKE_PHY : 0; + wol->wolopts = device_may_wakeup(cdev->dev) ? WAKE_PHY : 0; +} + +static int m_can_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) +{ + struct m_can_classdev *cdev = netdev_priv(dev); + bool wol_enable = !!(wol->wolopts & WAKE_PHY); + int ret; + + if (wol->wolopts & ~WAKE_PHY) + return -EINVAL; + + if (wol_enable == device_may_wakeup(cdev->dev)) + return 0; + + ret = device_set_wakeup_enable(cdev->dev, wol_enable); + if (ret) { + netdev_err(cdev->net, "Failed to set wakeup enable %pE\n", + ERR_PTR(ret)); + return ret; + } + + return 0; +} + static const struct ethtool_ops m_can_ethtool_ops_coalescing = { .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS_IRQ | ETHTOOL_COALESCE_RX_MAX_FRAMES_IRQ | @@ -2247,10 +2277,14 @@ static const struct ethtool_ops m_can_ethtool_ops_coalescing = { .get_ts_info = ethtool_op_get_ts_info, .get_coalesce = m_can_get_coalesce, .set_coalesce = m_can_set_coalesce, + .get_wol = m_can_get_wol, + .set_wol = m_can_set_wol, }; static const struct ethtool_ops m_can_ethtool_ops = { .get_ts_info = ethtool_op_get_ts_info, + .get_wol = m_can_get_wol, + .set_wol = m_can_set_wol, }; static int register_m_can_dev(struct m_can_classdev *cdev) @@ -2377,6 +2411,9 @@ struct m_can_classdev *m_can_class_allocate_dev(struct device *dev, goto out; } + if (dev->of_node && of_property_read_bool(dev->of_node, "wakeup-source")) + device_set_wakeup_capable(dev, true); + /* Get TX FIFO size * Defines the total amount of echo buffers for loopback */ From 148e125d4e6fa8ff03f91d9070ad87f7b4f5c610 Mon Sep 17 00:00:00 2001 From: "Markus Schneider-Pargmann (TI.com)" Date: Wed, 1 Oct 2025 16:30:21 +0200 Subject: [PATCH 090/867] can: m_can: Return ERR_PTR on error in allocation We have more detailed error values available, return them in the core driver and the calling drivers to return proper errors to callers. Reviewed-by: Vincent Mailhol Reviewed-by: Dhruva Gole Reviewed-by: Kendall Willis Signed-off-by: Markus Schneider-Pargmann (TI.com) Link: https://patch.msgid.link/20251001-topic-mcan-wakeup-source-v6-12-v10-3-4ab508ac5d1e@baylibre.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 6 +++--- drivers/net/can/m_can/m_can_pci.c | 4 ++-- drivers/net/can/m_can/m_can_platform.c | 4 ++-- drivers/net/can/m_can/tcan4x5x-core.c | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index f00bdec3246a1..10b5862b48801 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2408,7 +2408,7 @@ struct m_can_classdev *m_can_class_allocate_dev(struct device *dev, sizeof(mram_config_vals) / 4); if (ret) { dev_err(dev, "Could not get Message RAM configuration."); - goto out; + return ERR_PTR(ret); } if (dev->of_node && of_property_read_bool(dev->of_node, "wakeup-source")) @@ -2423,7 +2423,7 @@ struct m_can_classdev *m_can_class_allocate_dev(struct device *dev, net_dev = alloc_candev(sizeof_priv, tx_fifo_size); if (!net_dev) { dev_err(dev, "Failed to allocate CAN device"); - goto out; + return ERR_PTR(-ENOMEM); } class_dev = netdev_priv(net_dev); @@ -2433,7 +2433,7 @@ struct m_can_classdev *m_can_class_allocate_dev(struct device *dev, m_can_of_parse_mram(class_dev, mram_config_vals); spin_lock_init(&class_dev->tx_handling_spinlock); -out: + return class_dev; } EXPORT_SYMBOL_GPL(m_can_class_allocate_dev); diff --git a/drivers/net/can/m_can/m_can_pci.c b/drivers/net/can/m_can/m_can_pci.c index 9ad7419f88f83..eb31ed1f96449 100644 --- a/drivers/net/can/m_can/m_can_pci.c +++ b/drivers/net/can/m_can/m_can_pci.c @@ -111,8 +111,8 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) mcan_class = m_can_class_allocate_dev(&pci->dev, sizeof(struct m_can_pci_priv)); - if (!mcan_class) - return -ENOMEM; + if (IS_ERR(mcan_class)) + return PTR_ERR(mcan_class); priv = cdev_to_priv(mcan_class); diff --git a/drivers/net/can/m_can/m_can_platform.c b/drivers/net/can/m_can/m_can_platform.c index 4a412add2b8d3..56da411878af0 100644 --- a/drivers/net/can/m_can/m_can_platform.c +++ b/drivers/net/can/m_can/m_can_platform.c @@ -87,8 +87,8 @@ static int m_can_plat_probe(struct platform_device *pdev) mcan_class = m_can_class_allocate_dev(&pdev->dev, sizeof(struct m_can_plat_priv)); - if (!mcan_class) - return -ENOMEM; + if (IS_ERR(mcan_class)) + return PTR_ERR(mcan_class); priv = cdev_to_priv(mcan_class); diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c index 39b0b5277b11f..31cc9d0abd453 100644 --- a/drivers/net/can/m_can/tcan4x5x-core.c +++ b/drivers/net/can/m_can/tcan4x5x-core.c @@ -416,8 +416,8 @@ static int tcan4x5x_can_probe(struct spi_device *spi) mcan_class = m_can_class_allocate_dev(&spi->dev, sizeof(struct tcan4x5x_priv)); - if (!mcan_class) - return -ENOMEM; + if (IS_ERR(mcan_class)) + return PTR_ERR(mcan_class); ret = m_can_check_mram_cfg(mcan_class, TCAN4X5X_MRAM_SIZE); if (ret) From a77a29775373a91ad1ca0375e0ab1b1b294d53bb Mon Sep 17 00:00:00 2001 From: "Markus Schneider-Pargmann (TI.com)" Date: Wed, 1 Oct 2025 16:30:22 +0200 Subject: [PATCH 091/867] can: m_can: Support pinctrl wakeup state TI AM62x SoC requires a wakeup flag being set in pinctrl when mcan pins act as a wakeup source. Add support to select the wakeup state if WOL is enabled. Signed-off-by: Markus Schneider-Pargmann (TI.com) Link: https://patch.msgid.link/20251001-topic-mcan-wakeup-source-v6-12-v10-4-4ab508ac5d1e@baylibre.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 69 ++++++++++++++++++++++++++++++++++- drivers/net/can/m_can/m_can.h | 3 ++ 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 10b5862b48801..8569596ae830a 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2265,7 +2265,26 @@ static int m_can_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) return ret; } + if (!IS_ERR_OR_NULL(cdev->pinctrl_state_wakeup)) { + if (wol_enable) + ret = pinctrl_select_state(cdev->pinctrl, cdev->pinctrl_state_wakeup); + else + ret = pinctrl_pm_select_default_state(cdev->dev); + + if (ret) { + netdev_err(cdev->net, "Failed to select pinctrl state %pE\n", + ERR_PTR(ret)); + goto err_wakeup_enable; + } + } + return 0; + +err_wakeup_enable: + /* Revert wakeup enable */ + device_set_wakeup_enable(cdev->dev, !wol_enable); + + return ret; } static const struct ethtool_ops m_can_ethtool_ops_coalescing = { @@ -2393,6 +2412,42 @@ int m_can_class_get_clocks(struct m_can_classdev *cdev) } EXPORT_SYMBOL_GPL(m_can_class_get_clocks); +static bool m_can_class_wakeup_pinctrl_enabled(struct m_can_classdev *class_dev) +{ + return device_may_wakeup(class_dev->dev) && class_dev->pinctrl_state_wakeup; +} + +static int m_can_class_parse_pinctrl(struct m_can_classdev *class_dev) +{ + struct device *dev = class_dev->dev; + int ret; + + class_dev->pinctrl = devm_pinctrl_get(dev); + if (IS_ERR(class_dev->pinctrl)) { + ret = PTR_ERR(class_dev->pinctrl); + class_dev->pinctrl = NULL; + + if (ret == -ENODEV) + return 0; + + return dev_err_probe(dev, ret, "Failed to get pinctrl\n"); + } + + class_dev->pinctrl_state_wakeup = + pinctrl_lookup_state(class_dev->pinctrl, "wakeup"); + if (IS_ERR(class_dev->pinctrl_state_wakeup)) { + ret = PTR_ERR(class_dev->pinctrl_state_wakeup); + class_dev->pinctrl_state_wakeup = NULL; + + if (ret == -ENODEV) + return 0; + + return dev_err_probe(dev, ret, "Failed to lookup pinctrl wakeup state\n"); + } + + return 0; +} + struct m_can_classdev *m_can_class_allocate_dev(struct device *dev, int sizeof_priv) { @@ -2434,7 +2489,15 @@ struct m_can_classdev *m_can_class_allocate_dev(struct device *dev, m_can_of_parse_mram(class_dev, mram_config_vals); spin_lock_init(&class_dev->tx_handling_spinlock); + ret = m_can_class_parse_pinctrl(class_dev); + if (ret) + goto err_free_candev; + return class_dev; + +err_free_candev: + free_candev(net_dev); + return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(m_can_class_allocate_dev); @@ -2563,7 +2626,8 @@ int m_can_class_suspend(struct device *dev) cdev->can.state = CAN_STATE_SLEEPING; } - pinctrl_pm_select_sleep_state(dev); + if (!m_can_class_wakeup_pinctrl_enabled(cdev)) + pinctrl_pm_select_sleep_state(dev); return ret; } @@ -2575,7 +2639,8 @@ int m_can_class_resume(struct device *dev) struct net_device *ndev = cdev->net; int ret = 0; - pinctrl_pm_select_default_state(dev); + if (!m_can_class_wakeup_pinctrl_enabled(cdev)) + pinctrl_pm_select_default_state(dev); if (netif_running(ndev)) { ret = m_can_clk_start(cdev); diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h index 7b7600697c6bc..f2f89687bbd21 100644 --- a/drivers/net/can/m_can/m_can.h +++ b/drivers/net/can/m_can/m_can.h @@ -129,6 +129,9 @@ struct m_can_classdev { struct mram_cfg mcfg[MRAM_CFG_NUM]; struct hrtimer hrtimer; + + struct pinctrl *pinctrl; + struct pinctrl_state *pinctrl_state_wakeup; }; struct m_can_classdev *m_can_class_allocate_dev(struct device *dev, int sizeof_priv); From c6dcc2b321cccf1fc2a18480bb547956fdb2b2e0 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 8 Aug 2025 15:29:34 +0200 Subject: [PATCH 092/867] can: m_can: m_can_init_ram(): make static Since commit eaacfeaca7ad ("can: m_can: Call the RAM init directly from m_can_chip_config") m_can_init_ram() is not used outside of m_can.c. Mark as static and remove the EXPORT_SYMBOL_GPL(). Link: https://patch.msgid.link/20251008-m_can-cleanups-v1-1-1784a18eaa84@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 43 +++++++++++++++++------------------ drivers/net/can/m_can/m_can.h | 1 - 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 8569596ae830a..9f4002f3481e5 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1380,6 +1380,27 @@ static const struct can_bittiming_const m_can_data_bittiming_const_31X = { .brp_inc = 1, }; +static int m_can_init_ram(struct m_can_classdev *cdev) +{ + int end, i, start; + int err = 0; + + /* initialize the entire Message RAM in use to avoid possible + * ECC/parity checksum errors when reading an uninitialized buffer + */ + start = cdev->mcfg[MRAM_SIDF].off; + end = cdev->mcfg[MRAM_TXB].off + + cdev->mcfg[MRAM_TXB].num * TXB_ELEMENT_SIZE; + + for (i = start; i < end; i += 4) { + err = m_can_fifo_write_no_off(cdev, i, 0x0); + if (err) + break; + } + + return err; +} + static int m_can_set_bittiming(struct net_device *dev) { struct m_can_classdev *cdev = netdev_priv(dev); @@ -2374,28 +2395,6 @@ static void m_can_of_parse_mram(struct m_can_classdev *cdev, cdev->mcfg[MRAM_TXB].off, cdev->mcfg[MRAM_TXB].num); } -int m_can_init_ram(struct m_can_classdev *cdev) -{ - int end, i, start; - int err = 0; - - /* initialize the entire Message RAM in use to avoid possible - * ECC/parity checksum errors when reading an uninitialized buffer - */ - start = cdev->mcfg[MRAM_SIDF].off; - end = cdev->mcfg[MRAM_TXB].off + - cdev->mcfg[MRAM_TXB].num * TXB_ELEMENT_SIZE; - - for (i = start; i < end; i += 4) { - err = m_can_fifo_write_no_off(cdev, i, 0x0); - if (err) - break; - } - - return err; -} -EXPORT_SYMBOL_GPL(m_can_init_ram); - int m_can_class_get_clocks(struct m_can_classdev *cdev) { int ret = 0; diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h index f2f89687bbd21..4743342b2fba3 100644 --- a/drivers/net/can/m_can/m_can.h +++ b/drivers/net/can/m_can/m_can.h @@ -139,7 +139,6 @@ void m_can_class_free_dev(struct net_device *net); int m_can_class_register(struct m_can_classdev *cdev); void m_can_class_unregister(struct m_can_classdev *cdev); int m_can_class_get_clocks(struct m_can_classdev *cdev); -int m_can_init_ram(struct m_can_classdev *priv); int m_can_check_mram_cfg(struct m_can_classdev *cdev, u32 mram_max_size); int m_can_class_suspend(struct device *dev); From 60af9dbb63fb077cc71496cf5ada82261be0bb25 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 7 Aug 2025 20:14:55 +0200 Subject: [PATCH 093/867] can: m_can: hrtimer_callback(): rename to m_can_polling_timer() The original use of struct m_can_classdev::hrtimer was to support polling for devices without IRQ, with the timer function called hrtimer_callback(). Commit 07f25091ca02 ("can: m_can: Implement receive coalescing") uses the hrtimer for software-supported IRQ coalescence, with the timer function called m_can_coalescing_timer(). To improve the readability of the driver, rename hrtimer_callback() to m_can_polling_timer(), which better describes the functionality. Link: https://patch.msgid.link/20251008-m_can-cleanups-v1-2-1784a18eaa84@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 9f4002f3481e5..110cfd54b6695 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2058,7 +2058,7 @@ static netdev_tx_t m_can_start_xmit(struct sk_buff *skb, return ret; } -static enum hrtimer_restart hrtimer_callback(struct hrtimer *timer) +static enum hrtimer_restart m_can_polling_timer(struct hrtimer *timer) { struct m_can_classdev *cdev = container_of(timer, struct m_can_classdev, hrtimer); @@ -2545,7 +2545,7 @@ int m_can_class_register(struct m_can_classdev *cdev) if (!cdev->net->irq) { dev_dbg(cdev->dev, "Polling enabled, initialize hrtimer"); - hrtimer_setup(&cdev->hrtimer, &hrtimer_callback, CLOCK_MONOTONIC, + hrtimer_setup(&cdev->hrtimer, m_can_polling_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); } else { hrtimer_setup(&cdev->hrtimer, m_can_coalescing_timer, CLOCK_MONOTONIC, From 293735053eaa7a15e0debacdcbe3cdcd006436be Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 8 Aug 2025 14:14:55 +0200 Subject: [PATCH 094/867] net: m_can: convert dev_{dbg,info,err} -> netdev_{dbg,info,err} To ease debugging use the netdev_{dbg,info,err}() functions instead of dev_{dbg,info,err}. Link: https://patch.msgid.link/20251008-m_can-cleanups-v1-3-1784a18eaa84@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 54 +++++++++++++++++------------------ 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 110cfd54b6695..6aef5e771fc32 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -387,8 +387,8 @@ static int m_can_cccr_update_bits(struct m_can_classdev *cdev, u32 mask, u32 val size_t tries = 10; if (!(mask & CCCR_INIT) && !(val_before & CCCR_INIT)) { - dev_err(cdev->dev, - "refusing to configure device when in normal mode\n"); + netdev_err(cdev->net, + "refusing to configure device when in normal mode\n"); return -EBUSY; } @@ -470,7 +470,7 @@ static void m_can_coalescing_disable(struct m_can_classdev *cdev) static inline void m_can_enable_all_interrupts(struct m_can_classdev *cdev) { if (!cdev->net->irq) { - dev_dbg(cdev->dev, "Start hrtimer\n"); + netdev_dbg(cdev->net, "Start hrtimer\n"); hrtimer_start(&cdev->hrtimer, ms_to_ktime(HRTIMER_POLL_INTERVAL_MS), HRTIMER_MODE_REL_PINNED); @@ -486,7 +486,7 @@ static inline void m_can_disable_all_interrupts(struct m_can_classdev *cdev) m_can_write(cdev, M_CAN_ILE, 0x0); if (!cdev->net->irq) { - dev_dbg(cdev->dev, "Stop hrtimer\n"); + netdev_dbg(cdev->net, "Stop hrtimer\n"); hrtimer_try_to_cancel(&cdev->hrtimer); } } @@ -1486,7 +1486,7 @@ static int m_can_chip_config(struct net_device *dev) err = m_can_init_ram(cdev); if (err) { - dev_err(cdev->dev, "Message RAM configuration failed\n"); + netdev_err(dev, "Message RAM configuration failed\n"); return err; } @@ -1716,7 +1716,7 @@ static int m_can_niso_supported(struct m_can_classdev *cdev) /* Then clear the it again. */ ret = m_can_cccr_update_bits(cdev, CCCR_NISO, 0); if (ret) { - dev_err(cdev->dev, "failed to revert the NON-ISO bit in CCCR\n"); + netdev_err(cdev->net, "failed to revert the NON-ISO bit in CCCR\n"); return ret; } @@ -1735,8 +1735,8 @@ static int m_can_dev_setup(struct m_can_classdev *cdev) m_can_version = m_can_check_core_release(cdev); /* return if unsupported version */ if (!m_can_version) { - dev_err(cdev->dev, "Unsupported version number: %2d", - m_can_version); + netdev_err(cdev->net, "Unsupported version number: %2d", + m_can_version); return -EINVAL; } @@ -1794,8 +1794,8 @@ static int m_can_dev_setup(struct m_can_classdev *cdev) cdev->can.ctrlmode_supported |= CAN_CTRLMODE_FD_NON_ISO; break; default: - dev_err(cdev->dev, "Unsupported version number: %2d", - cdev->version); + netdev_err(cdev->net, "Unsupported version number: %2d", + cdev->version); return -EINVAL; } @@ -2348,8 +2348,8 @@ int m_can_check_mram_cfg(struct m_can_classdev *cdev, u32 mram_max_size) total_size = cdev->mcfg[MRAM_TXB].off - cdev->mcfg[MRAM_SIDF].off + cdev->mcfg[MRAM_TXB].num * TXB_ELEMENT_SIZE; if (total_size > mram_max_size) { - dev_err(cdev->dev, "Total size of mram config(%u) exceeds mram(%u)\n", - total_size, mram_max_size); + netdev_err(cdev->net, "Total size of mram config(%u) exceeds mram(%u)\n", + total_size, mram_max_size); return -EINVAL; } @@ -2384,15 +2384,15 @@ static void m_can_of_parse_mram(struct m_can_classdev *cdev, cdev->mcfg[MRAM_TXB].num = mram_config_vals[7] & FIELD_MAX(TXBC_NDTB_MASK); - dev_dbg(cdev->dev, - "sidf 0x%x %d xidf 0x%x %d rxf0 0x%x %d rxf1 0x%x %d rxb 0x%x %d txe 0x%x %d txb 0x%x %d\n", - cdev->mcfg[MRAM_SIDF].off, cdev->mcfg[MRAM_SIDF].num, - cdev->mcfg[MRAM_XIDF].off, cdev->mcfg[MRAM_XIDF].num, - cdev->mcfg[MRAM_RXF0].off, cdev->mcfg[MRAM_RXF0].num, - cdev->mcfg[MRAM_RXF1].off, cdev->mcfg[MRAM_RXF1].num, - cdev->mcfg[MRAM_RXB].off, cdev->mcfg[MRAM_RXB].num, - cdev->mcfg[MRAM_TXE].off, cdev->mcfg[MRAM_TXE].num, - cdev->mcfg[MRAM_TXB].off, cdev->mcfg[MRAM_TXB].num); + netdev_dbg(cdev->net, + "sidf 0x%x %d xidf 0x%x %d rxf0 0x%x %d rxf1 0x%x %d rxb 0x%x %d txe 0x%x %d txb 0x%x %d\n", + cdev->mcfg[MRAM_SIDF].off, cdev->mcfg[MRAM_SIDF].num, + cdev->mcfg[MRAM_XIDF].off, cdev->mcfg[MRAM_XIDF].num, + cdev->mcfg[MRAM_RXF0].off, cdev->mcfg[MRAM_RXF0].num, + cdev->mcfg[MRAM_RXF1].off, cdev->mcfg[MRAM_RXF1].num, + cdev->mcfg[MRAM_RXB].off, cdev->mcfg[MRAM_RXB].num, + cdev->mcfg[MRAM_TXE].off, cdev->mcfg[MRAM_TXE].num, + cdev->mcfg[MRAM_TXB].off, cdev->mcfg[MRAM_TXB].num); } int m_can_class_get_clocks(struct m_can_classdev *cdev) @@ -2403,7 +2403,7 @@ int m_can_class_get_clocks(struct m_can_classdev *cdev) cdev->cclk = devm_clk_get(cdev->dev, "cclk"); if (IS_ERR(cdev->hclk) || IS_ERR(cdev->cclk)) { - dev_err(cdev->dev, "no clock found\n"); + netdev_err(cdev->net, "no clock found\n"); ret = -ENODEV; } @@ -2544,7 +2544,7 @@ int m_can_class_register(struct m_can_classdev *cdev) } if (!cdev->net->irq) { - dev_dbg(cdev->dev, "Polling enabled, initialize hrtimer"); + netdev_dbg(cdev->net, "Polling enabled, initialize hrtimer"); hrtimer_setup(&cdev->hrtimer, m_can_polling_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); } else { @@ -2558,15 +2558,15 @@ int m_can_class_register(struct m_can_classdev *cdev) ret = register_m_can_dev(cdev); if (ret) { - dev_err(cdev->dev, "registering %s failed (err=%d)\n", - cdev->net->name, ret); + netdev_err(cdev->net, "registering %s failed (err=%d)\n", + cdev->net->name, ret); goto rx_offload_del; } of_can_transceiver(cdev->net); - dev_info(cdev->dev, "%s device registered (irq=%d, version=%d)\n", - KBUILD_MODNAME, cdev->net->irq, cdev->version); + netdev_info(cdev->net, "device registered (irq=%d, version=%d)\n", + cdev->net->irq, cdev->version); /* Probe finished * Assert reset and stop clocks. From c6cbd24f65f1bbdae15cf8450a8298255c231e2e Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 7 Aug 2025 16:27:45 +0200 Subject: [PATCH 095/867] can: m_can: m_can_interrupt_enable(): use m_can_write() instead of open coding it As everywhere else in the driver, use m_can_write() instead of open coding it. Link: https://patch.msgid.link/20251008-m_can-cleanups-v1-4-1784a18eaa84@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 6aef5e771fc32..98e7ab612bba4 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -452,7 +452,7 @@ static void m_can_interrupt_enable(struct m_can_classdev *cdev, u32 interrupts) { if (cdev->active_interrupts == interrupts) return; - cdev->ops->write_reg(cdev, M_CAN_IE, interrupts); + m_can_write(cdev, M_CAN_IE, interrupts); cdev->active_interrupts = interrupts; } From 6218391758b53e6cadf2aa80093457eeb485f06e Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 7 Aug 2025 17:19:31 +0200 Subject: [PATCH 096/867] can: m_can: m_can_class_register(): remove error message in case devm_kzalloc() fails If devm_kzalloc() fails, it already outputs an error message. Remove the error message from m_can_class_register() accordingly. Link: https://patch.msgid.link/20251008-m_can-cleanups-v1-5-1784a18eaa84@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 98e7ab612bba4..8013e88350270 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2517,10 +2517,8 @@ int m_can_class_register(struct m_can_classdev *cdev) devm_kzalloc(cdev->dev, cdev->tx_fifo_size * sizeof(*cdev->tx_ops), GFP_KERNEL); - if (!cdev->tx_ops) { - dev_err(cdev->dev, "Failed to allocate tx_ops for workqueue\n"); + if (!cdev->tx_ops) return -ENOMEM; - } } cdev->rst = devm_reset_control_get_optional_shared(cdev->dev, NULL); From b24b43522eb33c32ca2348354e2555685c8c4644 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 7 Aug 2025 17:20:08 +0200 Subject: [PATCH 097/867] can: m_can: m_can_tx_submit(): remove unneeded sanity checks m_can_tx_submit() is only called for peripheral devices. So remove the sanity check. Link: https://patch.msgid.link/20251008-m_can-cleanups-v1-6-1784a18eaa84@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 8013e88350270..713d3df296df5 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1973,11 +1973,6 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev, static void m_can_tx_submit(struct m_can_classdev *cdev) { - if (cdev->version == 30) - return; - if (!cdev->is_peripheral) - return; - m_can_write(cdev, M_CAN_TXBAR, cdev->tx_peripheral_submit); cdev->tx_peripheral_submit = 0; } From 91a55c72a821d106a588af4c28aec129dcbea5af Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 6 Aug 2025 12:20:44 +0200 Subject: [PATCH 098/867] can: m_can: m_can_get_berr_counter(): don't wake up controller if interface is down If the interface is down, the CAN controller might be powered down, the clock disabled, and/or it's external reset asserted. Don't wake up the controller to read the CAN bus error counters, if the interface is down. Reviewed-by: Markus Schneider-Pargmann Link: https://patch.msgid.link/20251008-m_can-cleanups-v1-7-1784a18eaa84@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 713d3df296df5..eb856547ae7df 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -791,6 +791,10 @@ static int m_can_get_berr_counter(const struct net_device *dev, struct m_can_classdev *cdev = netdev_priv(dev); int err; + /* Avoid waking up the controller if the interface is down */ + if (!(dev->flags & IFF_UP)) + return 0; + err = m_can_clk_start(cdev); if (err) return err; From e5ae07b2ef86ee5bcb90a1c933c9d5ac1a0e33be Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Sun, 28 Sep 2025 20:28:19 +0200 Subject: [PATCH 099/867] batman-adv: Start new development cycle This version will contain all the (major or even only minor) changes for Linux 6.19. The version number isn't a semantic version number with major and minor information. It is just encoding the year of the expected publishing as Linux -rc1 and the number of published versions this year (starting at 0). Signed-off-by: Simon Wunderlich --- net/batman-adv/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 2be1ac17acaa4..af230b017bc17 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -13,7 +13,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2025.4" +#define BATADV_SOURCE_VERSION "2025.5" #endif /* B.A.T.M.A.N. parameters */ From ed5730f3f733659a4a023a5f1e767365fe341648 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 28 Sep 2025 10:29:32 +0200 Subject: [PATCH 100/867] batman-adv: use skb_crc32c() instead of skb_seq_read() Make batadv_bla_check_duplist() just use the new function skb_crc32c(), instead of calling skb_seq_read() with crc32c(). This is faster and simpler. Suggested-by: Eric Biggers Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/Kconfig | 1 + net/batman-adv/bridge_loop_avoidance.c | 51 ++++---------------------- net/batman-adv/types.h | 2 +- 3 files changed, 10 insertions(+), 44 deletions(-) diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index c299e2bc87eda..58c408b7a7d9c 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -35,6 +35,7 @@ config BATMAN_ADV_BLA bool "Bridge Loop Avoidance" depends on BATMAN_ADV && INET select CRC16 + select NET_CRC32C default y help This option enables BLA (Bridge Loop Avoidance), a mechanism diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index b992ba12aa247..3dc791c15bf72 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -1585,45 +1584,11 @@ int batadv_bla_init(struct batadv_priv *bat_priv) return 0; } -/** - * batadv_skb_crc32() - calculate CRC32 of the whole packet and skip bytes in - * the header - * @skb: skb pointing to fragmented socket buffers - * @payload_ptr: Pointer to position inside the head buffer of the skb - * marking the start of the data to be CRC'ed - * - * payload_ptr must always point to an address in the skb head buffer and not to - * a fragment. - * - * Return: big endian crc32c of the checksummed data - */ -static __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr) -{ - unsigned int to = skb->len; - unsigned int consumed = 0; - struct skb_seq_state st; - unsigned int from; - unsigned int len; - const u8 *data; - u32 crc = 0; - - from = (unsigned int)(payload_ptr - skb->data); - - skb_prepare_seq_read(skb, from, to, &st); - while ((len = skb_seq_read(consumed, &data, &st)) != 0) { - crc = crc32c(crc, data, len); - consumed += len; - } - - return htonl(crc); -} - /** * batadv_bla_check_duplist() - Check if a frame is in the broadcast dup. * @bat_priv: the bat priv with all the mesh interface information * @skb: contains the multicast packet to be checked - * @payload_ptr: pointer to position inside the head buffer of the skb - * marking the start of the data to be CRC'ed + * @payload_offset: offset in the skb, marking the start of the data to be CRC'ed * @orig: originator mac address, NULL if unknown * * Check if it is on our broadcast list. Another gateway might have sent the @@ -1638,16 +1603,18 @@ static __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr) * Return: true if a packet is in the duplicate list, false otherwise. */ static bool batadv_bla_check_duplist(struct batadv_priv *bat_priv, - struct sk_buff *skb, u8 *payload_ptr, + struct sk_buff *skb, int payload_offset, const u8 *orig) { struct batadv_bcast_duplist_entry *entry; bool ret = false; + int payload_len; int i, curr; - __be32 crc; + u32 crc; /* calculate the crc ... */ - crc = batadv_skb_crc32(skb, payload_ptr); + payload_len = skb->len - payload_offset; + crc = skb_crc32c(skb, payload_offset, payload_len, 0); spin_lock_bh(&bat_priv->bla.bcast_duplist_lock); @@ -1727,7 +1694,7 @@ static bool batadv_bla_check_duplist(struct batadv_priv *bat_priv, static bool batadv_bla_check_ucast_duplist(struct batadv_priv *bat_priv, struct sk_buff *skb) { - return batadv_bla_check_duplist(bat_priv, skb, (u8 *)skb->data, NULL); + return batadv_bla_check_duplist(bat_priv, skb, 0, NULL); } /** @@ -1745,12 +1712,10 @@ bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, struct sk_buff *skb) { struct batadv_bcast_packet *bcast_packet; - u8 *payload_ptr; bcast_packet = (struct batadv_bcast_packet *)skb->data; - payload_ptr = (u8 *)(bcast_packet + 1); - return batadv_bla_check_duplist(bat_priv, skb, payload_ptr, + return batadv_bla_check_duplist(bat_priv, skb, sizeof(*bcast_packet), bcast_packet->orig); } diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index ae1d7a8dc480f..8fc5fe0e9b053 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -734,7 +734,7 @@ struct batadv_bcast_duplist_entry { u8 orig[ETH_ALEN]; /** @crc: crc32 checksum of broadcast payload */ - __be32 crc; + u32 crc; /** @entrytime: time when the broadcast packet was received */ unsigned long entrytime; From 0746da01767e8a0df97ae5d031d852e932e03682 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Tue, 14 Oct 2025 21:40:18 +0800 Subject: [PATCH 101/867] net: hibmcge: support pci_driver.shutdown() support pci_driver.shutdown() for hibmcge driver. Signed-off-by: Jijie Shao Link: https://patch.msgid.link/20251014134018.1178385-1-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/hisilicon/hibmcge/hbg_main.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c index 0b92a2e5e9869..068da2fd1fea8 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c @@ -472,6 +472,22 @@ static int hbg_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; } +static void hbg_shutdown(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + + rtnl_lock(); + if (netif_running(netdev)) + dev_close(netdev); + rtnl_unlock(); + + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + + if (system_state == SYSTEM_POWER_OFF) + pci_set_power_state(pdev, PCI_D3hot); +} + static const struct pci_device_id hbg_pci_tbl[] = { {PCI_VDEVICE(HUAWEI, 0x3730), 0}, { } @@ -482,6 +498,7 @@ static struct pci_driver hbg_driver = { .name = "hibmcge", .id_table = hbg_pci_tbl, .probe = hbg_probe, + .shutdown = hbg_shutdown, }; static int __init hbg_module_init(void) From 1c17f4373d4db1e1f0ebd3ddcd8e7a642927a826 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 14 Oct 2025 22:42:07 +0000 Subject: [PATCH 102/867] ipv6: Move ipv6_fl_list from ipv6_pinfo to inet_sock. In {tcp6,udp6,raw6}_sock, struct ipv6_pinfo is always placed at the beginning of a new cache line because 1. __alignof__(struct tcp_sock) is 64 due to ____cacheline_aligned of __cacheline_group_begin(tcp_sock_write_tx) 2. __alignof__(struct udp_sock) is 64 due to ____cacheline_aligned of struct numa_drop_counters 3. in raw6_sock, struct numa_drop_counters is placed before struct ipv6_pinfo . struct ipv6_pinfo is 136 bytes, but the last cache line is only used by ipv6_fl_list: $ pahole -C ipv6_pinfo vmlinux struct ipv6_pinfo { ... /* --- cacheline 2 boundary (128 bytes) --- */ struct ipv6_fl_socklist * ipv6_fl_list; /* 128 8 */ /* size: 136, cachelines: 3, members: 23 */ Let's move ipv6_fl_list from struct ipv6_pinfo to struct inet_sock to save a full cache line for {tcp6,udp6,raw6}_sock. Now, struct ipv6_pinfo is 128 bytes, and {tcp6,udp6,raw6}_sock have 64 bytes less, while {tcp,udp,raw}_sock retain the same size. Before: # grep -E "^(RAW|UDP[^L\-]|TCP)" /proc/slabinfo | awk '{print $1, "\t", $4}' RAWv6 1408 UDPv6 1472 TCPv6 2560 RAW 1152 UDP 1280 TCP 2368 After: # grep -E "^(RAW|UDP[^L\-]|TCP)" /proc/slabinfo | awk '{print $1, "\t", $4}' RAWv6 1344 UDPv6 1408 TCPv6 2496 RAW 1152 UDP 1280 TCP 2368 Also, ipv6_fl_list and inet_flags (SNDFLOW bit) are placed in the same cache line. $ pahole -C inet_sock vmlinux ... /* --- cacheline 11 boundary (704 bytes) was 56 bytes ago --- */ struct ipv6_pinfo * pinet6; /* 760 8 */ /* --- cacheline 12 boundary (768 bytes) --- */ struct ipv6_fl_socklist * ipv6_fl_list; /* 768 8 */ unsigned long inet_flags; /* 776 8 */ Doc churn is due to the insufficient Type column (only 1 space short). Suggested-by: Eric Dumazet Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20251014224210.2964778-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- .../networking/net_cachelines/inet_sock.rst | 79 ++++++++++--------- .../chelsio/inline_crypto/chtls/chtls_cm.c | 4 +- include/linux/ipv6.h | 1 - include/net/inet_sock.h | 1 + net/ipv6/ip6_flowlabel.c | 44 +++++------ net/ipv6/tcp_ipv6.c | 13 +-- net/sctp/ipv6.c | 8 +- 7 files changed, 76 insertions(+), 74 deletions(-) diff --git a/Documentation/networking/net_cachelines/inet_sock.rst b/Documentation/networking/net_cachelines/inet_sock.rst index b11bf48fa2b36..4c72a28a7012e 100644 --- a/Documentation/networking/net_cachelines/inet_sock.rst +++ b/Documentation/networking/net_cachelines/inet_sock.rst @@ -5,42 +5,43 @@ inet_sock struct fast path usage breakdown ========================================== -======================= ===================== =================== =================== ====================================================================================================== -Type Name fastpath_tx_access fastpath_rx_access comment -======================= ===================== =================== =================== ====================================================================================================== -struct sock sk read_mostly read_mostly tcp_init_buffer_space,tcp_init_transfer,tcp_finish_connect,tcp_connect,tcp_send_rcvq,tcp_send_syn_data -struct ipv6_pinfo* pinet6 -be16 inet_sport read_mostly __tcp_transmit_skb -be32 inet_daddr read_mostly ip_select_ident_segs -be32 inet_rcv_saddr -be16 inet_dport read_mostly __tcp_transmit_skb -u16 inet_num -be32 inet_saddr -s16 uc_ttl read_mostly __ip_queue_xmit/ip_select_ttl -u16 cmsg_flags -struct ip_options_rcu* inet_opt read_mostly __ip_queue_xmit -u16 inet_id read_mostly ip_select_ident_segs -u8 tos read_mostly ip_queue_xmit -u8 min_ttl -u8 mc_ttl -u8 pmtudisc -u8:1 recverr -u8:1 is_icsk -u8:1 freebind -u8:1 hdrincl -u8:1 mc_loop -u8:1 transparent -u8:1 mc_all -u8:1 nodefrag -u8:1 bind_address_no_port -u8:1 recverr_rfc4884 -u8:1 defer_connect read_mostly tcp_sendmsg_fastopen -u8 rcv_tos -u8 convert_csum -int uc_index -int mc_index -be32 mc_addr -struct ip_mc_socklist* mc_list -struct inet_cork_full cork read_mostly __tcp_transmit_skb -struct local_port_range -======================= ===================== =================== =================== ====================================================================================================== +======================== ===================== =================== =================== ====================================================================================================== +Type Name fastpath_tx_access fastpath_rx_access comment +======================== ===================== =================== =================== ====================================================================================================== +struct sock sk read_mostly read_mostly tcp_init_buffer_space,tcp_init_transfer,tcp_finish_connect,tcp_connect,tcp_send_rcvq,tcp_send_syn_data +struct ipv6_pinfo* pinet6 +struct ipv6_fl_socklist* ipv6_fl_list read_mostly tcp_v6_connect,__ip6_datagram_connect,udpv6_sendmsg,rawv6_sendmsg +be16 inet_sport read_mostly __tcp_transmit_skb +be32 inet_daddr read_mostly ip_select_ident_segs +be32 inet_rcv_saddr +be16 inet_dport read_mostly __tcp_transmit_skb +u16 inet_num +be32 inet_saddr +s16 uc_ttl read_mostly __ip_queue_xmit/ip_select_ttl +u16 cmsg_flags +struct ip_options_rcu* inet_opt read_mostly __ip_queue_xmit +u16 inet_id read_mostly ip_select_ident_segs +u8 tos read_mostly ip_queue_xmit +u8 min_ttl +u8 mc_ttl +u8 pmtudisc +u8:1 recverr +u8:1 is_icsk +u8:1 freebind +u8:1 hdrincl +u8:1 mc_loop +u8:1 transparent +u8:1 mc_all +u8:1 nodefrag +u8:1 bind_address_no_port +u8:1 recverr_rfc4884 +u8:1 defer_connect read_mostly tcp_sendmsg_fastopen +u8 rcv_tos +u8 convert_csum +int uc_index +int mc_index +be32 mc_addr +struct ip_mc_socklist* mc_list +struct inet_cork_full cork read_mostly __tcp_transmit_skb +struct local_port_range +======================== ===================== =================== =================== ====================================================================================================== diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 4ee970f3bad6e..ee0154337a9c5 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1199,12 +1199,12 @@ static struct sock *chtls_recv_sock(struct sock *lsk, struct ipv6_pinfo *newnp = inet6_sk(newsk); struct ipv6_pinfo *np = inet6_sk(lsk); - inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; + newinet->pinet6 = &newtcp6sk->inet6; + newinet->ipv6_fl_list = NULL; memcpy(newnp, np, sizeof(struct ipv6_pinfo)); newsk->sk_v6_daddr = treq->ir_v6_rmt_addr; newsk->sk_v6_rcv_saddr = treq->ir_v6_loc_addr; inet6_sk(newsk)->saddr = treq->ir_v6_loc_addr; - newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newsk->sk_bound_dev_if = treq->ir_iif; newinet->inet_opt = NULL; diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 43b7bb8287388..7294e4e89b797 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -271,7 +271,6 @@ struct ipv6_pinfo { struct ipv6_mc_socklist __rcu *ipv6_mc_list; struct ipv6_ac_socklist *ipv6_ac_list; - struct ipv6_fl_socklist __rcu *ipv6_fl_list; }; /* We currently use available bits from inet_sk(sk)->inet_flags, diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 1086256549faa..b6ec08072533a 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -214,6 +214,7 @@ struct inet_sock { struct sock sk; #if IS_ENABLED(CONFIG_IPV6) struct ipv6_pinfo *pinet6; + struct ipv6_fl_socklist __rcu *ipv6_fl_list; #endif /* Socket demultiplex comparisons on incoming packets. */ #define inet_daddr sk.__sk_common.skc_daddr diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index a3ff575798dda..60d0be47a9f31 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -66,8 +66,8 @@ EXPORT_SYMBOL(ipv6_flowlabel_exclusive); fl != NULL; \ fl = rcu_dereference(fl->next)) -#define for_each_sk_fl_rcu(np, sfl) \ - for (sfl = rcu_dereference(np->ipv6_fl_list); \ +#define for_each_sk_fl_rcu(sk, sfl) \ + for (sfl = rcu_dereference(inet_sk(sk)->ipv6_fl_list); \ sfl != NULL; \ sfl = rcu_dereference(sfl->next)) @@ -262,12 +262,11 @@ static struct ip6_flowlabel *fl_intern(struct net *net, struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label) { struct ipv6_fl_socklist *sfl; - struct ipv6_pinfo *np = inet6_sk(sk); label &= IPV6_FLOWLABEL_MASK; rcu_read_lock(); - for_each_sk_fl_rcu(np, sfl) { + for_each_sk_fl_rcu(sk, sfl) { struct ip6_flowlabel *fl = sfl->fl; if (fl->label == label && atomic_inc_not_zero(&fl->users)) { @@ -283,16 +282,16 @@ EXPORT_SYMBOL_GPL(__fl6_sock_lookup); void fl6_free_socklist(struct sock *sk) { - struct ipv6_pinfo *np = inet6_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ipv6_fl_socklist *sfl; - if (!rcu_access_pointer(np->ipv6_fl_list)) + if (!rcu_access_pointer(inet->ipv6_fl_list)) return; spin_lock_bh(&ip6_sk_fl_lock); - while ((sfl = rcu_dereference_protected(np->ipv6_fl_list, + while ((sfl = rcu_dereference_protected(inet->ipv6_fl_list, lockdep_is_held(&ip6_sk_fl_lock))) != NULL) { - np->ipv6_fl_list = sfl->next; + inet->ipv6_fl_list = sfl->next; spin_unlock_bh(&ip6_sk_fl_lock); fl_release(sfl->fl); @@ -470,16 +469,15 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, static int mem_check(struct sock *sk) { - struct ipv6_pinfo *np = inet6_sk(sk); - struct ipv6_fl_socklist *sfl; int room = FL_MAX_SIZE - atomic_read(&fl_size); + struct ipv6_fl_socklist *sfl; int count = 0; if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) return 0; rcu_read_lock(); - for_each_sk_fl_rcu(np, sfl) + for_each_sk_fl_rcu(sk, sfl) count++; rcu_read_unlock(); @@ -492,13 +490,15 @@ static int mem_check(struct sock *sk) return 0; } -static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, - struct ip6_flowlabel *fl) +static inline void fl_link(struct sock *sk, struct ipv6_fl_socklist *sfl, + struct ip6_flowlabel *fl) { + struct inet_sock *inet = inet_sk(sk); + spin_lock_bh(&ip6_sk_fl_lock); sfl->fl = fl; - sfl->next = np->ipv6_fl_list; - rcu_assign_pointer(np->ipv6_fl_list, sfl); + sfl->next = inet->ipv6_fl_list; + rcu_assign_pointer(inet->ipv6_fl_list, sfl); spin_unlock_bh(&ip6_sk_fl_lock); } @@ -520,7 +520,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, rcu_read_lock(); - for_each_sk_fl_rcu(np, sfl) { + for_each_sk_fl_rcu(sk, sfl) { if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) { spin_lock_bh(&ip6_fl_lock); freq->flr_label = sfl->fl->label; @@ -559,7 +559,7 @@ static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq) } spin_lock_bh(&ip6_sk_fl_lock); - for (sflp = &np->ipv6_fl_list; + for (sflp = &inet_sk(sk)->ipv6_fl_list; (sfl = socklist_dereference(*sflp)) != NULL; sflp = &sfl->next) { if (sfl->fl->label == freq->flr_label) @@ -579,13 +579,12 @@ static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq) static int ipv6_flowlabel_renew(struct sock *sk, struct in6_flowlabel_req *freq) { - struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); struct ipv6_fl_socklist *sfl; int err; rcu_read_lock(); - for_each_sk_fl_rcu(np, sfl) { + for_each_sk_fl_rcu(sk, sfl) { if (sfl->fl->label == freq->flr_label) { err = fl6_renew(sfl->fl, freq->flr_linger, freq->flr_expires); @@ -614,7 +613,6 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq, { struct ipv6_fl_socklist *sfl, *sfl1 = NULL; struct ip6_flowlabel *fl, *fl1 = NULL; - struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); int err; @@ -645,7 +643,7 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq, if (freq->flr_label) { err = -EEXIST; rcu_read_lock(); - for_each_sk_fl_rcu(np, sfl) { + for_each_sk_fl_rcu(sk, sfl) { if (sfl->fl->label == freq->flr_label) { if (freq->flr_flags & IPV6_FL_F_EXCL) { rcu_read_unlock(); @@ -682,7 +680,7 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq, fl1->linger = fl->linger; if ((long)(fl->expires - fl1->expires) > 0) fl1->expires = fl->expires; - fl_link(np, sfl1, fl1); + fl_link(sk, sfl1, fl1); fl_free(fl); return 0; @@ -716,7 +714,7 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq, } } - fl_link(np, sfl1, fl); + fl_link(sk, sfl1, fl); return 0; done: fl_free(fl); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 59c4977a811a0..6197dd4e6261c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1386,7 +1386,9 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * if (!newsk) return NULL; - inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); + newinet = inet_sk(newsk); + newinet->pinet6 = tcp_inet6_sk(newsk); + newinet->ipv6_fl_list = NULL; newnp = tcp_inet6_sk(newsk); newtp = tcp_sk(newsk); @@ -1405,7 +1407,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; - newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = inet_iif(skb); @@ -1453,10 +1454,12 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newsk->sk_gso_type = SKB_GSO_TCPV6; inet6_sk_rx_dst_set(newsk, skb); - inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); + newinet = inet_sk(newsk); + newinet->pinet6 = tcp_inet6_sk(newsk); + newinet->ipv6_fl_list = NULL; + newinet->inet_opt = NULL; newtp = tcp_sk(newsk); - newinet = inet_sk(newsk); newnp = tcp_inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); @@ -1469,10 +1472,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * First: no IPv4 options. */ - newinet->inet_opt = NULL; newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; - newnp->ipv6_fl_list = NULL; /* Clone RX bits */ newnp->rxopt.all = np->rxopt.all; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 568ff8797c393..d725b21587588 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -782,9 +782,10 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, struct sctp_association *asoc, bool kern) { - struct sock *newsk; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct sctp6_sock *newsctp6sk; + struct inet_sock *newinet; + struct sock *newsk; newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, kern); if (!newsk) @@ -796,7 +797,9 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, sock_reset_flag(sk, SOCK_ZAPPED); newsctp6sk = (struct sctp6_sock *)newsk; - inet_sk(newsk)->pinet6 = &newsctp6sk->inet6; + newinet = inet_sk(newsk); + newinet->pinet6 = &newsctp6sk->inet6; + newinet->ipv6_fl_list = NULL; sctp_sk(newsk)->v4mapped = sctp_sk(sk)->v4mapped; @@ -805,7 +808,6 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; - newnp->ipv6_fl_list = NULL; sctp_v6_copy_ip_options(sk, newsk); From 9c4609225ec1cb551006d6a03c7c4ad8cb5584c0 Mon Sep 17 00:00:00 2001 From: Xuanqiang Luo Date: Wed, 15 Oct 2025 10:02:34 +0800 Subject: [PATCH 103/867] rculist: Add hlist_nulls_replace_rcu() and hlist_nulls_replace_init_rcu() Add two functions to atomically replace RCU-protected hlist_nulls entries. Keep using WRITE_ONCE() to assign values to ->next and ->pprev, as mentioned in the patch below: commit efd04f8a8b45 ("rcu: Use WRITE_ONCE() for assignments to ->next for rculist_nulls") commit 860c8802ace1 ("rcu: Use WRITE_ONCE() for assignments to ->pprev for hlist_nulls") Reviewed-by: Kuniyuki Iwashima Reviewed-by: Frederic Weisbecker Reviewed-by: Eric Dumazet Signed-off-by: Xuanqiang Luo Link: https://patch.msgid.link/20251015020236.431822-2-xuanqiang.luo@linux.dev Signed-off-by: Jakub Kicinski --- include/linux/rculist_nulls.h | 59 +++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index 89186c499dd47..c26cb83ca0711 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -52,6 +52,13 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) #define hlist_nulls_next_rcu(node) \ (*((struct hlist_nulls_node __rcu __force **)&(node)->next)) +/** + * hlist_nulls_pprev_rcu - returns the dereferenced pprev of @node. + * @node: element of the list. + */ +#define hlist_nulls_pprev_rcu(node) \ + (*((struct hlist_nulls_node __rcu __force **)(node)->pprev)) + /** * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization * @n: the element to delete from the hash list. @@ -152,6 +159,58 @@ static inline void hlist_nulls_add_fake(struct hlist_nulls_node *n) n->next = (struct hlist_nulls_node *)NULLS_MARKER(NULL); } +/** + * hlist_nulls_replace_rcu - replace an old entry by a new one + * @old: the element to be replaced + * @new: the new element to insert + * + * Description: + * Replace the old entry with the new one in a RCU-protected hlist_nulls, while + * permitting racing traversals. + * + * The caller must take whatever precautions are necessary (such as holding + * appropriate locks) to avoid racing with another list-mutation primitive, such + * as hlist_nulls_add_head_rcu() or hlist_nulls_del_rcu(), running on this same + * list. However, it is perfectly legal to run concurrently with the _rcu + * list-traversal primitives, such as hlist_nulls_for_each_entry_rcu(). + */ +static inline void hlist_nulls_replace_rcu(struct hlist_nulls_node *old, + struct hlist_nulls_node *new) +{ + struct hlist_nulls_node *next = old->next; + + WRITE_ONCE(new->next, next); + WRITE_ONCE(new->pprev, old->pprev); + rcu_assign_pointer(hlist_nulls_pprev_rcu(new), new); + if (!is_a_nulls(next)) + WRITE_ONCE(next->pprev, &new->next); +} + +/** + * hlist_nulls_replace_init_rcu - replace an old entry by a new one and + * initialize the old + * @old: the element to be replaced + * @new: the new element to insert + * + * Description: + * Replace the old entry with the new one in a RCU-protected hlist_nulls, while + * permitting racing traversals, and reinitialize the old entry. + * + * Note: @old must be hashed. + * + * The caller must take whatever precautions are necessary (such as holding + * appropriate locks) to avoid racing with another list-mutation primitive, such + * as hlist_nulls_add_head_rcu() or hlist_nulls_del_rcu(), running on this same + * list. However, it is perfectly legal to run concurrently with the _rcu + * list-traversal primitives, such as hlist_nulls_for_each_entry_rcu(). + */ +static inline void hlist_nulls_replace_init_rcu(struct hlist_nulls_node *old, + struct hlist_nulls_node *new) +{ + hlist_nulls_replace_rcu(old, new); + WRITE_ONCE(old->pprev, NULL); +} + /** * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type * @tpos: the type * to use as a loop cursor. From 1532ed0d0753c83e72595f785f82b48c28bbe5dc Mon Sep 17 00:00:00 2001 From: Xuanqiang Luo Date: Wed, 15 Oct 2025 10:02:35 +0800 Subject: [PATCH 104/867] inet: Avoid ehash lookup race in inet_ehash_insert() Since ehash lookups are lockless, if one CPU performs a lookup while another concurrently deletes and inserts (removing reqsk and inserting sk), the lookup may fail to find the socket, an RST may be sent. The call trace map is drawn as follows: CPU 0 CPU 1 ----- ----- inet_ehash_insert() spin_lock() sk_nulls_del_node_init_rcu(osk) __inet_lookup_established() (lookup failed) __sk_nulls_add_node_rcu(sk, list) spin_unlock() As both deletion and insertion operate on the same ehash chain, this patch introduces a new sk_nulls_replace_node_init_rcu() helper functions to implement atomic replacement. Fixes: 5e0724d027f0 ("tcp/dccp: fix hashdance race for passive sessions") Reviewed-by: Kuniyuki Iwashima Reviewed-by: Jiayuan Chen Signed-off-by: Xuanqiang Luo Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251015020236.431822-3-xuanqiang.luo@linux.dev Signed-off-by: Jakub Kicinski --- include/net/sock.h | 13 +++++++++++++ net/ipv4/inet_hashtables.c | 8 ++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 30ac2eb4ef9bf..335d0da82d79e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -856,6 +856,19 @@ static inline bool sk_nulls_del_node_init_rcu(struct sock *sk) return rc; } +static inline bool sk_nulls_replace_node_init_rcu(struct sock *old, + struct sock *new) +{ + if (sk_hashed(old)) { + hlist_nulls_replace_init_rcu(&old->sk_nulls_node, + &new->sk_nulls_node); + __sock_put(old); + return true; + } + + return false; +} + static inline void __sk_add_node(struct sock *sk, struct hlist_head *list) { hlist_add_head(&sk->sk_node, list); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index b7024e3d9ac3d..f5826ec4bcaa8 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -720,8 +720,11 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) spin_lock(lock); if (osk) { WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); - ret = sk_nulls_del_node_init_rcu(osk); - } else if (found_dup_sk) { + ret = sk_nulls_replace_node_init_rcu(osk, sk); + goto unlock; + } + + if (found_dup_sk) { *found_dup_sk = inet_ehash_lookup_by_sk(sk, list); if (*found_dup_sk) ret = false; @@ -730,6 +733,7 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) if (ret) __sk_nulls_add_node_rcu(sk, list); +unlock: spin_unlock(lock); return ret; From b8ec80b130211e7bf076ef72365952979d5f7a72 Mon Sep 17 00:00:00 2001 From: Xuanqiang Luo Date: Wed, 15 Oct 2025 10:02:36 +0800 Subject: [PATCH 105/867] inet: Avoid ehash lookup race in inet_twsk_hashdance_schedule() Since ehash lookups are lockless, if another CPU is converting sk to tw concurrently, fetching the newly inserted tw with tw->tw_refcnt == 0 cause lookup failure. The call trace map is drawn as follows: CPU 0 CPU 1 ----- ----- inet_twsk_hashdance_schedule() spin_lock() inet_twsk_add_node_rcu(tw, ...) __inet_lookup_established() (find tw, failure due to tw_refcnt = 0) __sk_nulls_del_node_init_rcu(sk) refcount_set(&tw->tw_refcnt, 3) spin_unlock() By replacing sk with tw atomically via hlist_nulls_replace_init_rcu() after setting tw_refcnt, we ensure that tw is either fully initialized or not visible to other CPUs, eliminating the race. It's worth noting that we held lock_sock() before the replacement, so there's no need to check if sk is hashed. Thanks to Kuniyuki Iwashima! Fixes: 3ab5aee7fe84 ("net: Convert TCP & DCCP hash tables to use RCU / hlist_nulls") Reviewed-by: Kuniyuki Iwashima Reviewed-by: Jiayuan Chen Signed-off-by: Xuanqiang Luo Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251015020236.431822-4-xuanqiang.luo@linux.dev Signed-off-by: Jakub Kicinski --- net/ipv4/inet_timewait_sock.c | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index c96d61d08854f..d4c781a0667fe 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -88,12 +88,6 @@ void inet_twsk_put(struct inet_timewait_sock *tw) } EXPORT_SYMBOL_GPL(inet_twsk_put); -static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, - struct hlist_nulls_head *list) -{ - hlist_nulls_add_head_rcu(&tw->tw_node, list); -} - static void inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo) { __inet_twsk_schedule(tw, timeo, false); @@ -113,13 +107,12 @@ void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw, { const struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); - struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); struct inet_bind_hashbucket *bhead, *bhead2; - /* Step 1: Put TW into bind hash. Original socket stays there too. - Note, that any socket with inet->num != 0 MUST be bound in - binding cache, even if it is closed. + /* Put TW into bind hash. Original socket stays there too. + * Note, that any socket with inet->num != 0 MUST be bound in + * binding cache, even if it is closed. */ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, hashinfo->bhash_size)]; @@ -141,19 +134,6 @@ void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw, spin_lock(lock); - /* Step 2: Hash TW into tcp ehash chain */ - inet_twsk_add_node_rcu(tw, &ehead->chain); - - /* Step 3: Remove SK from hash chain */ - if (__sk_nulls_del_node_init_rcu(sk)) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - - - /* Ensure above writes are committed into memory before updating the - * refcount. - * Provides ordering vs later refcount_inc(). - */ - smp_wmb(); /* tw_refcnt is set to 3 because we have : * - one reference for bhash chain. * - one reference for ehash chain. @@ -163,6 +143,15 @@ void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw, */ refcount_set(&tw->tw_refcnt, 3); + /* Ensure tw_refcnt has been set before tw is published. + * smp_wmb() provides the necessary memory barrier to enforce this + * ordering. + */ + smp_wmb(); + + hlist_nulls_replace_init_rcu(&sk->sk_nulls_node, &tw->tw_node); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + inet_twsk_schedule(tw, timeo); spin_unlock(lock); From 2af8ff1e472e9862983303890e98d45f40863351 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 16 Oct 2025 13:51:47 +0200 Subject: [PATCH 106/867] net: Kconfig: discourage drop_monitor enablement Quoting Eric Dumazet: "I do not understand the fascination with net/core/drop_monitor.c [..] misses all the features, flexibility, scalability that 'perf', eBPF tracing, bpftrace, .... have today." Reword DROP_MONITOR kconfig help text to clearly state that its not related to perf-based drop monitoring and that its safe to disable this unless support for the older netlink-based tools is needed. Signed-off-by: Florian Westphal Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251016115147.18503-1-fw@strlen.de Signed-off-by: Jakub Kicinski --- net/Kconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/Kconfig b/net/Kconfig index 1d3f757d4b07e..62266eaf0e95a 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -400,15 +400,15 @@ config NET_PKTGEN module will be called pktgen. config NET_DROP_MONITOR - tristate "Network packet drop alerting service" + tristate "Legacy network packet drop alerting service" depends on INET && TRACEPOINTS help This feature provides an alerting service to userspace in the event that packets are discarded in the network stack. Alerts are broadcast via netlink socket to any listening user space - process. If you don't need network drop alerts, or if you are ok - just checking the various proc files and other utilities for - drop statistics, say N here. + process. This feature is NOT related to "perf" based drop monitoring. + Say N here unless you need to support older userspace tools like + "dropwatch". endmenu # Network testing From 3dc2a17efc5f9d0a51caa9f63600e6f02e0feba4 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 16 Oct 2025 21:25:28 +0200 Subject: [PATCH 107/867] r8169: reconfigure rx unconditionally before chip reset when resuming There's a good chance that more chip versions suffer from the same hw issue. So let's reconfigure rx unconditionally before the chip reset when resuming. This shouldn't have any side effect on unaffected chip versions. Reviewed-by: Simon Horman Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/a5c2e2d2-226f-4896-b8f6-45e2d91f0e24@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index d18734fe12e42..2a4d9b5488103 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4995,9 +4995,7 @@ static int rtl8169_resume(struct device *device) clk_prepare_enable(tp->clk); /* Some chip versions may truncate packets without this initialization */ - if (tp->mac_version == RTL_GIGA_MAC_VER_37 || - tp->mac_version == RTL_GIGA_MAC_VER_46) - rtl_init_rxcfg(tp); + rtl_init_rxcfg(tp); return rtl8169_runtime_resume(device); } From f578ff4c53889cb9bc15a5b2acc7274d46bb38cb Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Thu, 16 Oct 2025 18:25:37 +0000 Subject: [PATCH 108/867] selftests/net: io_uring: fix unknown errnum values The io_uring functions return negative error values, but error() expects these to be positive to properly match them to an errno string. Fix this to make sure the correct error descriptions are displayed upon failure. Signed-off-by: Carlos Llamas Link: https://patch.msgid.link/20251016182538.3790567-1-cmllamas@google.com Signed-off-by: Jakub Kicinski --- .../selftests/net/io_uring_zerocopy_tx.c | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c index 76e604e4810e1..7bfeeb1337054 100644 --- a/tools/testing/selftests/net/io_uring_zerocopy_tx.c +++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c @@ -106,14 +106,14 @@ static void do_tx(int domain, int type, int protocol) ret = io_uring_queue_init(512, &ring, 0); if (ret) - error(1, ret, "io_uring: queue init"); + error(1, -ret, "io_uring: queue init"); iov.iov_base = payload; iov.iov_len = cfg_payload_len; ret = io_uring_register_buffers(&ring, &iov, 1); if (ret) - error(1, ret, "io_uring: buffer registration"); + error(1, -ret, "io_uring: buffer registration"); tstop = gettimeofday_ms() + cfg_runtime_ms; do { @@ -149,24 +149,24 @@ static void do_tx(int domain, int type, int protocol) ret = io_uring_submit(&ring); if (ret != cfg_nr_reqs) - error(1, ret, "submit"); + error(1, -ret, "submit"); if (cfg_cork) do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0); for (i = 0; i < cfg_nr_reqs; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) - error(1, ret, "wait cqe"); + error(1, -ret, "wait cqe"); if (cqe->user_data != NONZC_TAG && cqe->user_data != ZC_TAG) - error(1, -EINVAL, "invalid cqe->user_data"); + error(1, EINVAL, "invalid cqe->user_data"); if (cqe->flags & IORING_CQE_F_NOTIF) { if (cqe->flags & IORING_CQE_F_MORE) - error(1, -EINVAL, "invalid notif flags"); + error(1, EINVAL, "invalid notif flags"); if (compl_cqes <= 0) - error(1, -EINVAL, "notification mismatch"); + error(1, EINVAL, "notification mismatch"); compl_cqes--; i--; io_uring_cqe_seen(&ring); @@ -174,14 +174,14 @@ static void do_tx(int domain, int type, int protocol) } if (cqe->flags & IORING_CQE_F_MORE) { if (cqe->user_data != ZC_TAG) - error(1, cqe->res, "unexpected F_MORE"); + error(1, -cqe->res, "unexpected F_MORE"); compl_cqes++; } if (cqe->res >= 0) { packets++; bytes += cqe->res; } else if (cqe->res != -EAGAIN) { - error(1, cqe->res, "send failed"); + error(1, -cqe->res, "send failed"); } io_uring_cqe_seen(&ring); } @@ -190,11 +190,11 @@ static void do_tx(int domain, int type, int protocol) while (compl_cqes) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) - error(1, ret, "wait cqe"); + error(1, -ret, "wait cqe"); if (cqe->flags & IORING_CQE_F_MORE) - error(1, -EINVAL, "invalid notif flags"); + error(1, EINVAL, "invalid notif flags"); if (!(cqe->flags & IORING_CQE_F_NOTIF)) - error(1, -EINVAL, "missing notif flag"); + error(1, EINVAL, "missing notif flag"); io_uring_cqe_seen(&ring); compl_cqes--; From 37a183d3b7cdb873e7f5f9daef1ad6d8f7c95fb7 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 14 Oct 2025 14:58:36 -0700 Subject: [PATCH 109/867] tcp: Convert tcp-md5 to use MD5 library instead of crypto_ahash Make tcp-md5 use the MD5 library API (added in 6.18) instead of the crypto_ahash API. This is much simpler and also more efficient: - The library API just operates on struct md5_ctx. Just allocate this struct on the stack instead of using a pool of pre-allocated crypto_ahash and ahash_request objects. - The library API accepts standard pointers and doesn't require scatterlists. So, for hashing the headers just use an on-stack buffer instead of a pool of pre-allocated kmalloc'ed scratch buffers. - The library API never fails. Therefore, checking for MD5 hashing errors is no longer necessary. Update tcp_v4_md5_hash_skb(), tcp_v6_md5_hash_skb(), tcp_v4_md5_hash_hdr(), tcp_v6_md5_hash_hdr(), tcp_md5_hash_key(), tcp_sock_af_ops::calc_md5_hash, and tcp_request_sock_ops::calc_md5_hash to return void instead of int. - The library API provides direct access to the MD5 code, eliminating unnecessary overhead such as indirect function calls and scatterlist management. Microbenchmarks of tcp_v4_md5_hash_skb() on x86_64 show a speedup from 7518 to 7041 cycles (6% fewer) with skb->len == 1440, or from 1020 to 678 cycles (33% fewer) with skb->len == 140. Since tcp_sigpool_hash_skb_data() can no longer be used, add a function tcp_md5_hash_skb_data() which is specialized to MD5. Of course, to the extent that this duplicates any code, it's well worth it. To preserve the existing behavior of TCP-MD5 support being disabled when the kernel is booted with "fips=1", make tcp_md5_do_add() check fips_enabled itself. Previously it relied on the error from crypto_alloc_ahash("md5") being bubbled up. I don't know for sure that this is actually needed, but this preserves the existing behavior. Tested with bidirectional TCP-MD5, both IPv4 and IPv6, between a kernel that includes this commit and a kernel that doesn't include this commit. (Side note: please don't use TCP-MD5! It's cryptographically weak. But as long as Linux supports it, it might as well be implemented properly.) Signed-off-by: Eric Biggers Link: https://patch.msgid.link/20251014215836.115616-1-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 26 +++----- net/ipv4/Kconfig | 4 +- net/ipv4/tcp.c | 73 +++++++++------------ net/ipv4/tcp_ipv4.c | 137 +++++++++++++-------------------------- net/ipv4/tcp_minisocks.c | 2 - net/ipv6/tcp_ipv6.c | 119 +++++++++++----------------------- 6 files changed, 121 insertions(+), 240 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 1e547138f4fb7..67fdd2523d929 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1898,13 +1898,6 @@ struct tcp6_pseudohdr { __be32 protocol; /* including padding */ }; -union tcp_md5sum_block { - struct tcp4_pseudohdr ip4; -#if IS_ENABLED(CONFIG_IPV6) - struct tcp6_pseudohdr ip6; -#endif -}; - /* * struct tcp_sigpool - per-CPU pool of ahash_requests * @scratch: per-CPU temporary area, that can be used between @@ -1939,8 +1932,8 @@ int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c); void tcp_sigpool_end(struct tcp_sigpool *c); size_t tcp_sigpool_algo(unsigned int id, char *buf, size_t buf_len); /* - functions */ -int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, - const struct sock *sk, const struct sk_buff *skb); +void tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, + const struct sock *sk, const struct sk_buff *skb); int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, u8 flags, const u8 *newkey, u8 newkeylen); @@ -1999,13 +1992,10 @@ static inline void tcp_md5_destruct_sock(struct sock *sk) } #endif -int tcp_md5_alloc_sigpool(void); -void tcp_md5_release_sigpool(void); -void tcp_md5_add_sigpool(void); -extern int tcp_md5_sigpool_id; - -int tcp_md5_hash_key(struct tcp_sigpool *hp, - const struct tcp_md5sig_key *key); +struct md5_ctx; +void tcp_md5_hash_skb_data(struct md5_ctx *ctx, const struct sk_buff *skb, + unsigned int header_len); +void tcp_md5_hash_key(struct md5_ctx *ctx, const struct tcp_md5sig_key *key); /* From tcp_fastopen.c */ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, @@ -2355,7 +2345,7 @@ struct tcp_sock_af_ops { #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *(*md5_lookup) (const struct sock *sk, const struct sock *addr_sk); - int (*calc_md5_hash)(char *location, + void (*calc_md5_hash)(char *location, const struct tcp_md5sig_key *md5, const struct sock *sk, const struct sk_buff *skb); @@ -2383,7 +2373,7 @@ struct tcp_request_sock_ops { #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *(*req_md5_lookup)(const struct sock *sk, const struct sock *addr_sk); - int (*calc_md5_hash) (char *location, + void (*calc_md5_hash) (char *location, const struct tcp_md5sig_key *md5, const struct sock *sk, const struct sk_buff *skb); diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 12850a277251d..b71c22475c515 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -760,9 +760,7 @@ config TCP_AO config TCP_MD5SIG bool "TCP: MD5 Signature Option support (RFC2385)" - select CRYPTO - select CRYPTO_MD5 - select TCP_SIGPOOL + select CRYPTO_LIB_MD5 help RFC2385 specifies a method of giving MD5 protection to TCP sessions. Its main (only?) use is to protect BGP sessions between core routers diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4d720aa09a4c6..0ccc5405e7408 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -243,7 +243,7 @@ #define pr_fmt(fmt) "TCP: " fmt -#include +#include #include #include #include @@ -253,7 +253,6 @@ #include #include #include -#include #include #include #include @@ -425,7 +424,6 @@ void tcp_md5_destruct_sock(struct sock *sk) tcp_clear_md5_list(sk); kfree(rcu_replace_pointer(tp->md5sig_info, NULL, 1)); static_branch_slow_dec_deferred(&tcp_md5_needed); - tcp_md5_release_sigpool(); } } EXPORT_IPV6_MOD_GPL(tcp_md5_destruct_sock); @@ -4838,52 +4836,45 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, EXPORT_IPV6_MOD(tcp_getsockopt); #ifdef CONFIG_TCP_MD5SIG -int tcp_md5_sigpool_id = -1; -EXPORT_IPV6_MOD_GPL(tcp_md5_sigpool_id); - -int tcp_md5_alloc_sigpool(void) +void tcp_md5_hash_skb_data(struct md5_ctx *ctx, const struct sk_buff *skb, + unsigned int header_len) { - size_t scratch_size; - int ret; + const unsigned int head_data_len = skb_headlen(skb) > header_len ? + skb_headlen(skb) - header_len : 0; + const struct skb_shared_info *shi = skb_shinfo(skb); + struct sk_buff *frag_iter; + unsigned int i; - scratch_size = sizeof(union tcp_md5sum_block) + sizeof(struct tcphdr); - ret = tcp_sigpool_alloc_ahash("md5", scratch_size); - if (ret >= 0) { - /* As long as any md5 sigpool was allocated, the return - * id would stay the same. Re-write the id only for the case - * when previously all MD5 keys were deleted and this call - * allocates the first MD5 key, which may return a different - * sigpool id than was used previously. - */ - WRITE_ONCE(tcp_md5_sigpool_id, ret); /* Avoids the compiler potentially being smart here */ - return 0; - } - return ret; -} + md5_update(ctx, (const u8 *)tcp_hdr(skb) + header_len, head_data_len); -void tcp_md5_release_sigpool(void) -{ - tcp_sigpool_release(READ_ONCE(tcp_md5_sigpool_id)); -} + for (i = 0; i < shi->nr_frags; ++i) { + const skb_frag_t *f = &shi->frags[i]; + u32 p_off, p_len, copied; + const void *vaddr; + struct page *p; -void tcp_md5_add_sigpool(void) -{ - tcp_sigpool_get(READ_ONCE(tcp_md5_sigpool_id)); + skb_frag_foreach_page(f, skb_frag_off(f), skb_frag_size(f), + p, p_off, p_len, copied) { + vaddr = kmap_local_page(p); + md5_update(ctx, vaddr + p_off, p_len); + kunmap_local(vaddr); + } + } + + skb_walk_frags(skb, frag_iter) + tcp_md5_hash_skb_data(ctx, frag_iter, 0); } +EXPORT_IPV6_MOD(tcp_md5_hash_skb_data); -int tcp_md5_hash_key(struct tcp_sigpool *hp, - const struct tcp_md5sig_key *key) +void tcp_md5_hash_key(struct md5_ctx *ctx, + const struct tcp_md5sig_key *key) { u8 keylen = READ_ONCE(key->keylen); /* paired with WRITE_ONCE() in tcp_md5_do_add */ - struct scatterlist sg; - - sg_init_one(&sg, key->key, keylen); - ahash_request_set_crypt(hp->req, &sg, NULL, keylen); /* We use data_race() because tcp_md5_do_add() might change * key->key under us */ - return data_race(crypto_ahash_update(hp->req)); + data_race(({ md5_update(ctx, key->key, keylen), 0; })); } EXPORT_IPV6_MOD(tcp_md5_hash_key); @@ -4902,7 +4893,6 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, const struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; u8 newhash[16]; - int genhash; key = tcp_md5_do_lookup(sk, l3index, saddr, family); @@ -4917,11 +4907,10 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, * IPv4-mapped case. */ if (family == AF_INET) - genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); + tcp_v4_md5_hash_skb(newhash, key, NULL, skb); else - genhash = tp->af_specific->calc_md5_hash(newhash, key, - NULL, skb); - if (genhash || memcmp(hash_location, newhash, 16) != 0) { + tp->af_specific->calc_md5_hash(newhash, key, NULL, skb); + if (memcmp(hash_location, newhash, 16) != 0) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); trace_tcp_hash_md5_mismatch(sk, skb); return SKB_DROP_REASON_TCP_MD5FAILURE; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b1fcf3e4e1ce0..40a76da5364a1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include #include @@ -86,14 +87,13 @@ #include #include -#include -#include +#include #include #ifdef CONFIG_TCP_MD5SIG -static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, - __be32 daddr, __be32 saddr, const struct tcphdr *th); +static void tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, + __be32 daddr, __be32 saddr, const struct tcphdr *th); #endif struct inet_hashinfo tcp_hashinfo; @@ -754,7 +754,6 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, struct tcp_md5sig_key *key = NULL; unsigned char newhash[16]; struct sock *sk1 = NULL; - int genhash; #endif u64 transmit_time = 0; struct sock *ctl_sk; @@ -840,11 +839,9 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, if (!key) goto out; - - genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); - if (genhash || memcmp(md5_hash_location, newhash, 16) != 0) + tcp_v4_md5_hash_skb(newhash, key, NULL, skb); + if (memcmp(md5_hash_location, newhash, 16) != 0) goto out; - } if (key) { @@ -1425,13 +1422,13 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, struct tcp_sock *tp = tcp_sk(sk); if (!rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk))) { - if (tcp_md5_alloc_sigpool()) - return -ENOMEM; + if (fips_enabled) { + pr_warn_once("TCP-MD5 support is disabled due to FIPS\n"); + return -EOPNOTSUPP; + } - if (tcp_md5sig_info_add(sk, GFP_KERNEL)) { - tcp_md5_release_sigpool(); + if (tcp_md5sig_info_add(sk, GFP_KERNEL)) return -ENOMEM; - } if (!static_branch_inc(&tcp_md5_needed.key)) { struct tcp_md5sig_info *md5sig; @@ -1439,7 +1436,6 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, md5sig = rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk)); rcu_assign_pointer(tp->md5sig_info, NULL); kfree_rcu(md5sig, rcu); - tcp_md5_release_sigpool(); return -EUSERS; } } @@ -1456,12 +1452,9 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, struct tcp_sock *tp = tcp_sk(sk); if (!rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk))) { - tcp_md5_add_sigpool(); - if (tcp_md5sig_info_add(sk, sk_gfp_mask(sk, GFP_ATOMIC))) { - tcp_md5_release_sigpool(); + if (tcp_md5sig_info_add(sk, sk_gfp_mask(sk, GFP_ATOMIC))) return -ENOMEM; - } if (!static_key_fast_inc_not_disabled(&tcp_md5_needed.key.key)) { struct tcp_md5sig_info *md5sig; @@ -1470,7 +1463,6 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, net_warn_ratelimited("Too many TCP-MD5 keys in the system\n"); rcu_assign_pointer(tp->md5sig_info, NULL); kfree_rcu(md5sig, rcu); - tcp_md5_release_sigpool(); return -EUSERS; } } @@ -1578,66 +1570,44 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, cmd.tcpm_key, cmd.tcpm_keylen); } -static int tcp_v4_md5_hash_headers(struct tcp_sigpool *hp, - __be32 daddr, __be32 saddr, - const struct tcphdr *th, int nbytes) +static void tcp_v4_md5_hash_headers(struct md5_ctx *ctx, + __be32 daddr, __be32 saddr, + const struct tcphdr *th, int nbytes) { - struct tcp4_pseudohdr *bp; - struct scatterlist sg; - struct tcphdr *_th; - - bp = hp->scratch; - bp->saddr = saddr; - bp->daddr = daddr; - bp->pad = 0; - bp->protocol = IPPROTO_TCP; - bp->len = cpu_to_be16(nbytes); - - _th = (struct tcphdr *)(bp + 1); - memcpy(_th, th, sizeof(*th)); - _th->check = 0; + struct { + struct tcp4_pseudohdr ip; + struct tcphdr tcp; + } h; - sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); - ahash_request_set_crypt(hp->req, &sg, NULL, - sizeof(*bp) + sizeof(*th)); - return crypto_ahash_update(hp->req); + h.ip.saddr = saddr; + h.ip.daddr = daddr; + h.ip.pad = 0; + h.ip.protocol = IPPROTO_TCP; + h.ip.len = cpu_to_be16(nbytes); + h.tcp = *th; + h.tcp.check = 0; + md5_update(ctx, (const u8 *)&h, sizeof(h.ip) + sizeof(h.tcp)); } -static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, - __be32 daddr, __be32 saddr, const struct tcphdr *th) +static noinline_for_stack void +tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, + __be32 daddr, __be32 saddr, const struct tcphdr *th) { - struct tcp_sigpool hp; + struct md5_ctx ctx; - if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) - goto clear_hash_nostart; - - if (crypto_ahash_init(hp.req)) - goto clear_hash; - if (tcp_v4_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2)) - goto clear_hash; - if (tcp_md5_hash_key(&hp, key)) - goto clear_hash; - ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); - if (crypto_ahash_final(hp.req)) - goto clear_hash; - - tcp_sigpool_end(&hp); - return 0; - -clear_hash: - tcp_sigpool_end(&hp); -clear_hash_nostart: - memset(md5_hash, 0, 16); - return 1; + md5_init(&ctx); + tcp_v4_md5_hash_headers(&ctx, daddr, saddr, th, th->doff << 2); + tcp_md5_hash_key(&ctx, key); + md5_final(&ctx, md5_hash); } -int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, - const struct sock *sk, - const struct sk_buff *skb) +noinline_for_stack void +tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, + const struct sock *sk, const struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); - struct tcp_sigpool hp; __be32 saddr, daddr; + struct md5_ctx ctx; if (sk) { /* valid for establish/request sockets */ saddr = sk->sk_rcv_saddr; @@ -1648,30 +1618,11 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, daddr = iph->daddr; } - if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) - goto clear_hash_nostart; - - if (crypto_ahash_init(hp.req)) - goto clear_hash; - - if (tcp_v4_md5_hash_headers(&hp, daddr, saddr, th, skb->len)) - goto clear_hash; - if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2)) - goto clear_hash; - if (tcp_md5_hash_key(&hp, key)) - goto clear_hash; - ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); - if (crypto_ahash_final(hp.req)) - goto clear_hash; - - tcp_sigpool_end(&hp); - return 0; - -clear_hash: - tcp_sigpool_end(&hp); -clear_hash_nostart: - memset(md5_hash, 0, 16); - return 1; + md5_init(&ctx); + tcp_v4_md5_hash_headers(&ctx, daddr, saddr, th, skb->len); + tcp_md5_hash_skb_data(&ctx, skb, th->doff << 2); + tcp_md5_hash_key(&ctx, key); + md5_final(&ctx, md5_hash); } EXPORT_IPV6_MOD(tcp_v4_md5_hash_skb); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 2ec8c6f1cdccc..ded2cf1f60067 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -312,7 +312,6 @@ static void tcp_time_wait_init(struct sock *sk, struct tcp_timewait_sock *tcptw) return; if (!static_key_fast_inc_not_disabled(&tcp_md5_needed.key.key)) goto out_free; - tcp_md5_add_sigpool(); } return; out_free: @@ -406,7 +405,6 @@ void tcp_twsk_destructor(struct sock *sk) if (twsk->tw_md5_key) { kfree(twsk->tw_md5_key); static_branch_slow_dec_deferred(&tcp_md5_needed); - tcp_md5_release_sigpool(); } } #endif diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6197dd4e6261c..06eb90e4078e5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -67,8 +67,7 @@ #include #include -#include -#include +#include #include @@ -691,69 +690,45 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, cmd.tcpm_key, cmd.tcpm_keylen); } -static int tcp_v6_md5_hash_headers(struct tcp_sigpool *hp, - const struct in6_addr *daddr, - const struct in6_addr *saddr, - const struct tcphdr *th, int nbytes) +static void tcp_v6_md5_hash_headers(struct md5_ctx *ctx, + const struct in6_addr *daddr, + const struct in6_addr *saddr, + const struct tcphdr *th, int nbytes) { - struct tcp6_pseudohdr *bp; - struct scatterlist sg; - struct tcphdr *_th; - - bp = hp->scratch; - /* 1. TCP pseudo-header (RFC2460) */ - bp->saddr = *saddr; - bp->daddr = *daddr; - bp->protocol = cpu_to_be32(IPPROTO_TCP); - bp->len = cpu_to_be32(nbytes); - - _th = (struct tcphdr *)(bp + 1); - memcpy(_th, th, sizeof(*th)); - _th->check = 0; - - sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); - ahash_request_set_crypt(hp->req, &sg, NULL, - sizeof(*bp) + sizeof(*th)); - return crypto_ahash_update(hp->req); + struct { + struct tcp6_pseudohdr ip; /* TCP pseudo-header (RFC2460) */ + struct tcphdr tcp; + } h; + + h.ip.saddr = *saddr; + h.ip.daddr = *daddr; + h.ip.protocol = cpu_to_be32(IPPROTO_TCP); + h.ip.len = cpu_to_be32(nbytes); + h.tcp = *th; + h.tcp.check = 0; + md5_update(ctx, (const u8 *)&h, sizeof(h.ip) + sizeof(h.tcp)); } -static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, - const struct in6_addr *daddr, struct in6_addr *saddr, - const struct tcphdr *th) +static noinline_for_stack void +tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, + const struct in6_addr *daddr, struct in6_addr *saddr, + const struct tcphdr *th) { - struct tcp_sigpool hp; - - if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) - goto clear_hash_nostart; - - if (crypto_ahash_init(hp.req)) - goto clear_hash; - if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2)) - goto clear_hash; - if (tcp_md5_hash_key(&hp, key)) - goto clear_hash; - ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); - if (crypto_ahash_final(hp.req)) - goto clear_hash; - - tcp_sigpool_end(&hp); - return 0; + struct md5_ctx ctx; -clear_hash: - tcp_sigpool_end(&hp); -clear_hash_nostart: - memset(md5_hash, 0, 16); - return 1; + md5_init(&ctx); + tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, th->doff << 2); + tcp_md5_hash_key(&ctx, key); + md5_final(&ctx, md5_hash); } -static int tcp_v6_md5_hash_skb(char *md5_hash, - const struct tcp_md5sig_key *key, - const struct sock *sk, - const struct sk_buff *skb) +static noinline_for_stack void +tcp_v6_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, + const struct sock *sk, const struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); const struct in6_addr *saddr, *daddr; - struct tcp_sigpool hp; + struct md5_ctx ctx; if (sk) { /* valid for establish/request sockets */ saddr = &sk->sk_v6_rcv_saddr; @@ -764,30 +739,11 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, daddr = &ip6h->daddr; } - if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) - goto clear_hash_nostart; - - if (crypto_ahash_init(hp.req)) - goto clear_hash; - - if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, skb->len)) - goto clear_hash; - if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2)) - goto clear_hash; - if (tcp_md5_hash_key(&hp, key)) - goto clear_hash; - ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); - if (crypto_ahash_final(hp.req)) - goto clear_hash; - - tcp_sigpool_end(&hp); - return 0; - -clear_hash: - tcp_sigpool_end(&hp); -clear_hash_nostart: - memset(md5_hash, 0, 16); - return 1; + md5_init(&ctx); + tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, skb->len); + tcp_md5_hash_skb_data(&ctx, skb, th->doff << 2); + tcp_md5_hash_key(&ctx, key); + md5_final(&ctx, md5_hash); } #endif @@ -1032,7 +988,6 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, int oif = 0; #ifdef CONFIG_TCP_MD5SIG unsigned char newhash[16]; - int genhash; struct sock *sk1 = NULL; #endif @@ -1091,8 +1046,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, goto out; key.type = TCP_KEY_MD5; - genhash = tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); - if (genhash || memcmp(md5_hash_location, newhash, 16) != 0) + tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); + if (memcmp(md5_hash_location, newhash, 16) != 0) goto out; } #endif From e29bbd73ad7129adfeba56d28871e56e637aa7a4 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 15 Oct 2025 23:32:05 +0100 Subject: [PATCH 110/867] net: dsa: lantiq_gswip: support bridge FDB entries on the CPU port Currently, the driver takes the bridge from dsa_port_bridge_dev_get(), which only works for user ports. This is why it has to ignore FDB entries installed on the CPU port. Commit c26933639b54 ("net: dsa: request drivers to perform FDB isolation") introduced the possibility of getting the originating bridge from the passed dsa_db argument, so let's do that instead. This way, we can act on the local FDB entries coming from the bridge. Note that we do not expect FDB events for the DSA_DB_PORT database, because this driver doesn't fulfill the dsa_switch_supports_uc_filtering() requirements. So we can just return -EOPNOTSUPP and expect it will never be triggered. Signed-off-by: Vladimir Oltean Signed-off-by: Daniel Golle Link: https://patch.msgid.link/ed9d847c0356f0fec81422bdad9ebdcc6a59da79.1760566491.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/lantiq_gswip.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c index 2169c0814a48b..91755a5972fa5 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip.c @@ -1140,9 +1140,9 @@ static void gswip_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) } static int gswip_port_fdb(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid, bool add) + struct net_device *bridge, const unsigned char *addr, + u16 vid, bool add) { - struct net_device *bridge = dsa_port_bridge_dev_get(dsa_to_port(ds, port)); struct gswip_priv *priv = ds->priv; struct gswip_pce_table_entry mac_bridge = {0,}; unsigned int max_ports = priv->hw_info->max_ports; @@ -1150,10 +1150,6 @@ static int gswip_port_fdb(struct dsa_switch *ds, int port, int i; int err; - /* Operation not supported on the CPU port, don't throw errors */ - if (!bridge) - return 0; - for (i = max_ports; i < ARRAY_SIZE(priv->vlans); i++) { if (priv->vlans[i].bridge == bridge) { fid = priv->vlans[i].fid; @@ -1188,14 +1184,20 @@ static int gswip_port_fdb_add(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid, struct dsa_db db) { - return gswip_port_fdb(ds, port, addr, vid, true); + if (db.type != DSA_DB_BRIDGE) + return -EOPNOTSUPP; + + return gswip_port_fdb(ds, port, db.bridge.dev, addr, vid, true); } static int gswip_port_fdb_del(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid, struct dsa_db db) { - return gswip_port_fdb(ds, port, addr, vid, false); + if (db.type != DSA_DB_BRIDGE) + return -EOPNOTSUPP; + + return gswip_port_fdb(ds, port, db.bridge.dev, addr, vid, false); } static int gswip_port_fdb_dump(struct dsa_switch *ds, int port, From 92790e6c11a882cc8d49e05da5ed39fb5475a5ae Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 15 Oct 2025 23:32:15 +0100 Subject: [PATCH 111/867] net: dsa: lantiq_gswip: define VLAN ID 0 constant This patch adds an explicit definition for VID 0 to the Lantiq GSWIP DSA driver, clarifying its special meaning. Signed-off-by: Vladimir Oltean Signed-off-by: Daniel Golle Link: https://patch.msgid.link/e8862239d0bb727723cf60947d2262473b46c96d.1760566491.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/lantiq_gswip.c | 12 +++++++----- drivers/net/dsa/lantiq/lantiq_gswip.h | 2 ++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c index 91755a5972fa5..9526317443a1b 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip.c @@ -432,7 +432,7 @@ static int gswip_add_single_port_br(struct gswip_priv *priv, int port, bool add) vlan_active.index = port + 1; vlan_active.table = GSWIP_TABLE_ACTIVE_VLAN; - vlan_active.key[0] = 0; /* vid */ + vlan_active.key[0] = GSWIP_VLAN_UNAWARE_PVID; vlan_active.val[0] = port + 1 /* fid */; vlan_active.valid = add; err = gswip_pce_table_entry_write(priv, &vlan_active); @@ -446,7 +446,7 @@ static int gswip_add_single_port_br(struct gswip_priv *priv, int port, bool add) vlan_mapping.index = port + 1; vlan_mapping.table = GSWIP_TABLE_VLAN_MAPPING; - vlan_mapping.val[0] = 0 /* vid */; + vlan_mapping.val[0] = GSWIP_VLAN_UNAWARE_PVID; vlan_mapping.val[1] = BIT(port) | dsa_cpu_ports(priv->ds); vlan_mapping.val[2] = 0; err = gswip_pce_table_entry_write(priv, &vlan_mapping); @@ -772,7 +772,8 @@ static int gswip_vlan_add_unaware(struct gswip_priv *priv, * entry in a free slot and prepare the VLAN mapping table entry. */ if (idx == -1) { - idx = gswip_vlan_active_create(priv, bridge, -1, 0); + idx = gswip_vlan_active_create(priv, bridge, -1, + GSWIP_VLAN_UNAWARE_PVID); if (idx < 0) return idx; active_vlan_created = true; @@ -780,7 +781,7 @@ static int gswip_vlan_add_unaware(struct gswip_priv *priv, vlan_mapping.index = idx; vlan_mapping.table = GSWIP_TABLE_VLAN_MAPPING; /* VLAN ID byte, maps to the VLAN ID of vlan active table */ - vlan_mapping.val[0] = 0; + vlan_mapping.val[0] = GSWIP_VLAN_UNAWARE_PVID; } else { /* Read the existing VLAN mapping entry from the switch */ vlan_mapping.index = idx; @@ -977,7 +978,8 @@ static void gswip_port_bridge_leave(struct dsa_switch *ds, int port, * specific bridges. No bridge is configured here. */ if (!br_vlan_enabled(br)) - gswip_vlan_remove(priv, br, port, 0, true, false); + gswip_vlan_remove(priv, br, port, GSWIP_VLAN_UNAWARE_PVID, true, + false); } static int gswip_port_vlan_prepare(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.h b/drivers/net/dsa/lantiq/lantiq_gswip.h index 2df9c8e8cfd0b..6aae1ff2f130f 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.h +++ b/drivers/net/dsa/lantiq/lantiq_gswip.h @@ -222,6 +222,8 @@ */ #define GSWIP_MAX_PACKET_LENGTH 2400 +#define GSWIP_VLAN_UNAWARE_PVID 0 + struct gswip_pce_microcode { u16 val_3; u16 val_2; From 8f5c71e44413ac1c8dcc162cd4769645eff80590 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 15 Oct 2025 23:32:30 +0100 Subject: [PATCH 112/867] net: dsa: lantiq_gswip: remove duplicate assignment to vlan_mapping.val[0] When idx == -1 in gswip_vlan_add(), we set vlan_mapping.val[0] = vid, even though we do the exact same thing again outside the if/else block. Remove the duplicate assignment. Signed-off-by: Vladimir Oltean Signed-off-by: Daniel Golle Link: https://patch.msgid.link/039ecb48e038cea856a9a6230ad1543db2bc382d.1760566491.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/lantiq_gswip.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c index 9526317443a1b..e41d67ea89c52 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip.c @@ -848,8 +848,6 @@ static int gswip_vlan_add_aware(struct gswip_priv *priv, vlan_mapping.index = idx; vlan_mapping.table = GSWIP_TABLE_VLAN_MAPPING; - /* VLAN ID byte, maps to the VLAN ID of vlan active table */ - vlan_mapping.val[0] = vid; } else { /* Read the existing VLAN mapping entry from the switch */ vlan_mapping.index = idx; @@ -862,6 +860,7 @@ static int gswip_vlan_add_aware(struct gswip_priv *priv, } } + /* VLAN ID byte, maps to the VLAN ID of vlan active table */ vlan_mapping.val[0] = vid; /* Update the VLAN mapping entry and write it to the switch */ vlan_mapping.val[1] |= cpu_ports; From b92068755ee0359eb009ae7584145fa3bab51cce Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 15 Oct 2025 23:32:41 +0100 Subject: [PATCH 113/867] net: dsa: lantiq_gswip: merge gswip_vlan_add_unaware() and gswip_vlan_add_aware() The two functions largely duplicate functionality. The differences consist in: - the "fid" passed to gswip_vlan_active_create(). The unaware variant always passes -1, the aware variant passes fid = priv->vlans[i].fid, where i is an index into priv->vlans[] for which priv->vlans[i].bridge is equal to the given bridge. - the "vid" is not passed to gswip_vlan_add_unaware(). It is implicitly GSWIP_VLAN_UNAWARE_PVID (zero). - The "untagged" is not passed to gswip_vlan_add_unaware(). It is implicitly true. Also, the CPU port must not be a tag member of the PVID used for VLAN-unaware bridging. - The "pvid" is not passed to gswip_vlan_add_unaware(). It is implicitly true. - The GSWIP_PCE_DEFPVID(port) register is written by the aware variant with an "idx", but with a hardcoded 0 by the unaware variant. Merge the two functions into a single unified function without any functional changes. Signed-off-by: Vladimir Oltean Signed-off-by: Daniel Golle Link: https://patch.msgid.link/2be190701d4c17038ce4b8047f9fb0bdf8abdf6e.1760566491.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/lantiq_gswip.c | 94 +++++---------------------- 1 file changed, 17 insertions(+), 77 deletions(-) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c index e41d67ea89c52..6cbcb54a5ed0a 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip.c @@ -750,86 +750,25 @@ static int gswip_vlan_active_remove(struct gswip_priv *priv, int idx) return err; } -static int gswip_vlan_add_unaware(struct gswip_priv *priv, - struct net_device *bridge, int port) -{ - struct gswip_pce_table_entry vlan_mapping = {0,}; - unsigned int max_ports = priv->hw_info->max_ports; - bool active_vlan_created = false; - int idx = -1; - int i; - int err; - - /* Check if there is already a page for this bridge */ - for (i = max_ports; i < ARRAY_SIZE(priv->vlans); i++) { - if (priv->vlans[i].bridge == bridge) { - idx = i; - break; - } - } - - /* If this bridge is not programmed yet, add a Active VLAN table - * entry in a free slot and prepare the VLAN mapping table entry. - */ - if (idx == -1) { - idx = gswip_vlan_active_create(priv, bridge, -1, - GSWIP_VLAN_UNAWARE_PVID); - if (idx < 0) - return idx; - active_vlan_created = true; - - vlan_mapping.index = idx; - vlan_mapping.table = GSWIP_TABLE_VLAN_MAPPING; - /* VLAN ID byte, maps to the VLAN ID of vlan active table */ - vlan_mapping.val[0] = GSWIP_VLAN_UNAWARE_PVID; - } else { - /* Read the existing VLAN mapping entry from the switch */ - vlan_mapping.index = idx; - vlan_mapping.table = GSWIP_TABLE_VLAN_MAPPING; - err = gswip_pce_table_entry_read(priv, &vlan_mapping); - if (err) { - dev_err(priv->dev, "failed to read VLAN mapping: %d\n", - err); - return err; - } - } - - /* Update the VLAN mapping entry and write it to the switch */ - vlan_mapping.val[1] |= dsa_cpu_ports(priv->ds); - vlan_mapping.val[1] |= BIT(port); - err = gswip_pce_table_entry_write(priv, &vlan_mapping); - if (err) { - dev_err(priv->dev, "failed to write VLAN mapping: %d\n", err); - /* In case an Active VLAN was creaetd delete it again */ - if (active_vlan_created) - gswip_vlan_active_remove(priv, idx); - return err; - } - - gswip_switch_w(priv, 0, GSWIP_PCE_DEFPVID(port)); - return 0; -} - -static int gswip_vlan_add_aware(struct gswip_priv *priv, - struct net_device *bridge, int port, - u16 vid, bool untagged, - bool pvid) +static int gswip_vlan_add(struct gswip_priv *priv, struct net_device *bridge, + int port, u16 vid, bool untagged, bool pvid, + bool vlan_aware) { struct gswip_pce_table_entry vlan_mapping = {0,}; unsigned int max_ports = priv->hw_info->max_ports; unsigned int cpu_ports = dsa_cpu_ports(priv->ds); bool active_vlan_created = false; - int idx = -1; - int fid = -1; - int i; - int err; + int fid = -1, idx = -1; + int i, err; /* Check if there is already a page for this bridge */ for (i = max_ports; i < ARRAY_SIZE(priv->vlans); i++) { if (priv->vlans[i].bridge == bridge) { - if (fid != -1 && fid != priv->vlans[i].fid) - dev_err(priv->dev, "one bridge with multiple flow ids\n"); - fid = priv->vlans[i].fid; + if (vlan_aware) { + if (fid != -1 && fid != priv->vlans[i].fid) + dev_err(priv->dev, "one bridge with multiple flow ids\n"); + fid = priv->vlans[i].fid; + } if (priv->vlans[i].vid == vid) { idx = i; break; @@ -864,8 +803,9 @@ static int gswip_vlan_add_aware(struct gswip_priv *priv, vlan_mapping.val[0] = vid; /* Update the VLAN mapping entry and write it to the switch */ vlan_mapping.val[1] |= cpu_ports; - vlan_mapping.val[2] |= cpu_ports; vlan_mapping.val[1] |= BIT(port); + if (vlan_aware) + vlan_mapping.val[2] |= cpu_ports; if (untagged) vlan_mapping.val[2] &= ~BIT(port); else @@ -879,8 +819,7 @@ static int gswip_vlan_add_aware(struct gswip_priv *priv, return err; } - if (pvid) - gswip_switch_w(priv, idx, GSWIP_PCE_DEFPVID(port)); + gswip_switch_w(priv, vlan_aware ? idx : 0, GSWIP_PCE_DEFPVID(port)); return 0; } @@ -955,7 +894,8 @@ static int gswip_port_bridge_join(struct dsa_switch *ds, int port, * specific bridges. No bridge is configured here. */ if (!br_vlan_enabled(br)) { - err = gswip_vlan_add_unaware(priv, br, port); + err = gswip_vlan_add(priv, br, port, GSWIP_VLAN_UNAWARE_PVID, + true, true, false); if (err) return err; priv->port_vlan_filter &= ~BIT(port); @@ -1049,8 +989,8 @@ static int gswip_port_vlan_add(struct dsa_switch *ds, int port, if (dsa_is_cpu_port(ds, port)) return 0; - return gswip_vlan_add_aware(priv, bridge, port, vlan->vid, - untagged, pvid); + return gswip_vlan_add(priv, bridge, port, vlan->vid, untagged, pvid, + true); } static int gswip_port_vlan_del(struct dsa_switch *ds, int port, From 21c3237c60c3694ba6afc582c6b0568905d1dca8 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 15 Oct 2025 23:32:50 +0100 Subject: [PATCH 114/867] net: dsa: lantiq_gswip: remove legacy configure_vlan_while_not_filtering option This driver doesn't support dynamic VLAN filtering changes, for simplicity. It expects that on a port, either gswip_vlan_add_unaware() or gswip_vlan_add_aware() is called, but not both. When !br_vlan_enabled(), the configure_vlan_while_not_filtering = false option is exactly what will prevent calls to gswip_port_vlan_add() from being issued by DSA. In fact, at the time these features were submitted: https://patchwork.ozlabs.org/project/netdev/patch/20190501204506.21579-3-hauke@hauke-m.de/ "configure_vlan_while_not_filtering = false" did not even have a name, it was implicit behaviour. It only became legacy in commit 54a0ed0df496 ("net: dsa: provide an option for drivers to always receive bridge VLANs"). Section "Bridge VLAN filtering" of Documentation/networking/switchdev.rst describes the exact set of rules. Notably, the PVID of the port must follow the VLAN awareness state of the bridge port. A VLAN-unaware bridge port should not respond to the addition of a bridge VLAN with the PVID flag. In fact, the pvid_change() test in tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh tests exactly this. The lantiq_gswip driver indeed does not respond to the addition of PVID VLANs while VLAN-unaware in the way described above, but only because of configure_vlan_while_not_filtering. Our purpose here is to get rid of configure_vlan_while_not_filtering, so we must add more complex logic which follows the VLAN awareness state and walks through the Active VLAN table entries, to find the index of the PVID register that should be committed to hardware on each port. As a side-effect of now having a proper implementation to assign the PVID all the "VLAN upper: ..." tests of the local_termination.sh self- tests which would previously all FAIL now all PASS (or XFAIL, but that's ok). Signed-off-by: Vladimir Oltean Signed-off-by: Daniel Golle Tested-by: Daniel Golle Link: https://patch.msgid.link/47dab8a8b69ebb92624b9795b723114475d3fe4e.1760566491.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/lantiq_gswip.c | 49 +++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c index 6cbcb54a5ed0a..30cff623bec0c 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip.c @@ -547,6 +547,45 @@ static int gswip_pce_load_microcode(struct gswip_priv *priv) return 0; } +static void gswip_port_commit_pvid(struct gswip_priv *priv, int port) +{ + struct dsa_port *dp = dsa_to_port(priv->ds, port); + struct net_device *br = dsa_port_bridge_dev_get(dp); + int idx; + + if (!dsa_port_is_user(dp)) + return; + + if (br) { + u16 pvid = GSWIP_VLAN_UNAWARE_PVID; + + if (br_vlan_enabled(br)) + br_vlan_get_pvid(br, &pvid); + + /* VLAN-aware bridge ports with no PVID will use Active VLAN + * index 0. The expectation is that this drops all untagged and + * VID-0 tagged ingress traffic. + */ + idx = 0; + for (int i = priv->hw_info->max_ports; + i < ARRAY_SIZE(priv->vlans); i++) { + if (priv->vlans[i].bridge == br && + priv->vlans[i].vid == pvid) { + idx = i; + break; + } + } + } else { + /* The Active VLAN table index as configured by + * gswip_add_single_port_br() + */ + idx = port + 1; + } + + /* GSWIP 2.2 (GRX300) and later program here the VID directly. */ + gswip_switch_w(priv, idx, GSWIP_PCE_DEFPVID(port)); +} + static int gswip_port_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering, struct netlink_ext_ack *extack) @@ -581,6 +620,8 @@ static int gswip_port_vlan_filtering(struct dsa_switch *ds, int port, GSWIP_PCE_PCTRL_0p(port)); } + gswip_port_commit_pvid(priv, port); + return 0; } @@ -677,8 +718,6 @@ static int gswip_setup(struct dsa_switch *ds) ds->mtu_enforcement_ingress = true; - ds->configure_vlan_while_not_filtering = false; - return 0; } @@ -819,7 +858,7 @@ static int gswip_vlan_add(struct gswip_priv *priv, struct net_device *bridge, return err; } - gswip_switch_w(priv, vlan_aware ? idx : 0, GSWIP_PCE_DEFPVID(port)); + gswip_port_commit_pvid(priv, port); return 0; } @@ -874,9 +913,7 @@ static int gswip_vlan_remove(struct gswip_priv *priv, } } - /* GSWIP 2.2 (GRX300) and later program here the VID directly. */ - if (pvid) - gswip_switch_w(priv, 0, GSWIP_PCE_DEFPVID(port)); + gswip_port_commit_pvid(priv, port); return 0; } From ab3ce58559d6227a8a54dc78650f5763768a1cfc Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 15 Oct 2025 23:32:58 +0100 Subject: [PATCH 115/867] net: dsa: lantiq_gswip: permit dynamic changes to VLAN filtering state The driver should now tolerate these changes, now that the PVID is automatically recalculated on a VLAN awareness state change. The VLAN-unaware PVID must be installed to hardware even if the joined bridge is currently VLAN-aware. Otherwise, when the bridge VLAN filtering state dynamically changes to VLAN-unaware later, this PVID will be missing. Signed-off-by: Vladimir Oltean Signed-off-by: Daniel Golle Link: https://patch.msgid.link/c58759074fb699581336dc2c2c6bf106257b134e.1760566491.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/lantiq_gswip.c | 38 +++++++++------------------ drivers/net/dsa/lantiq/lantiq_gswip.h | 1 - 2 files changed, 12 insertions(+), 27 deletions(-) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c index 30cff623bec0c..58fdd54094d65 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip.c @@ -590,16 +590,8 @@ static int gswip_port_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering, struct netlink_ext_ack *extack) { - struct net_device *bridge = dsa_port_bridge_dev_get(dsa_to_port(ds, port)); struct gswip_priv *priv = ds->priv; - /* Do not allow changing the VLAN filtering options while in bridge */ - if (bridge && !!(priv->port_vlan_filter & BIT(port)) != vlan_filtering) { - NL_SET_ERR_MSG_MOD(extack, - "Dynamic toggling of vlan_filtering not supported"); - return -EIO; - } - if (vlan_filtering) { /* Use tag based VLAN */ gswip_switch_mask(priv, @@ -927,18 +919,15 @@ static int gswip_port_bridge_join(struct dsa_switch *ds, int port, struct gswip_priv *priv = ds->priv; int err; - /* When the bridge uses VLAN filtering we have to configure VLAN - * specific bridges. No bridge is configured here. + /* Set up the VLAN for VLAN-unaware bridging for this port, and remove + * it from the "single-port bridge" through which it was operating as + * standalone. */ - if (!br_vlan_enabled(br)) { - err = gswip_vlan_add(priv, br, port, GSWIP_VLAN_UNAWARE_PVID, - true, true, false); - if (err) - return err; - priv->port_vlan_filter &= ~BIT(port); - } else { - priv->port_vlan_filter |= BIT(port); - } + err = gswip_vlan_add(priv, br, port, GSWIP_VLAN_UNAWARE_PVID, + true, true, false); + if (err) + return err; + return gswip_add_single_port_br(priv, port, false); } @@ -948,14 +937,11 @@ static void gswip_port_bridge_leave(struct dsa_switch *ds, int port, struct net_device *br = bridge.dev; struct gswip_priv *priv = ds->priv; - gswip_add_single_port_br(priv, port, true); - - /* When the bridge uses VLAN filtering we have to configure VLAN - * specific bridges. No bridge is configured here. + /* Add the port back to the "single-port bridge", and remove it from + * the VLAN-unaware PVID created for this bridge. */ - if (!br_vlan_enabled(br)) - gswip_vlan_remove(priv, br, port, GSWIP_VLAN_UNAWARE_PVID, true, - false); + gswip_add_single_port_br(priv, port, true); + gswip_vlan_remove(priv, br, port, GSWIP_VLAN_UNAWARE_PVID, true, false); } static int gswip_port_vlan_prepare(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.h b/drivers/net/dsa/lantiq/lantiq_gswip.h index 6aae1ff2f130f..4590a1a7dbd9b 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.h +++ b/drivers/net/dsa/lantiq/lantiq_gswip.h @@ -270,7 +270,6 @@ struct gswip_priv { struct gswip_vlan vlans[64]; int num_gphy_fw; struct gswip_gphy_fw *gphy_fw; - u32 port_vlan_filter; struct mutex pce_table_lock; u16 version; }; From 96a91e6eeb4d7881454071ecd1443f025cc21c3b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 15 Oct 2025 23:33:25 +0100 Subject: [PATCH 116/867] net: dsa: lantiq_gswip: disallow changes to privately set up VID 0 User space can force the altering of VID 0 as it was privately set up by this driver. For example, when the port joins a VLAN-aware bridge, dsa_user_manage_vlan_filtering() will set NETIF_F_HW_VLAN_CTAG_FILTER. If the port is subsequently brought up and CONFIG_VLAN_8021Q is enabled, the vlan_vid0_add() function will want to make sure we are capable of accepting packets tagged with VID 0. Generally, DSA/switchdev drivers want to suppress that bit of help from the 8021q layer, and handle VID 0 filters themselves. The 8021q layer might actually be even detrimential, because VLANs added through vlan_vid_add() pass through dsa_user_vlan_rx_add_vid(), which is documented as this: /* This API only allows programming tagged, non-PVID VIDs */ .flags = 0, so it will force VID 0 to be reconfigured as egress-tagged, non-PVID. Whereas the driver configures it as PVID and egress-untagged, the exact opposite. Signed-off-by: Vladimir Oltean Signed-off-by: Daniel Golle Link: https://patch.msgid.link/9f68340c34b5312c3b8c6c7ecf3cfce574a3f65d.1760566491.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/lantiq_gswip.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c index 58fdd54094d65..26e963840f3b8 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip.c @@ -1000,6 +1000,9 @@ static int gswip_port_vlan_add(struct dsa_switch *ds, int port, bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; int err; + if (vlan->vid == GSWIP_VLAN_UNAWARE_PVID) + return 0; + err = gswip_port_vlan_prepare(ds, port, vlan, extack); if (err) return err; @@ -1023,6 +1026,9 @@ static int gswip_port_vlan_del(struct dsa_switch *ds, int port, struct gswip_priv *priv = ds->priv; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; + if (vlan->vid == GSWIP_VLAN_UNAWARE_PVID) + return 0; + /* We have to receive all packets on the CPU port and should not * do any VLAN filtering here. This is also called with bridge * NULL and then we do not know for which bridge to configure From 7ed1965f10100a0928e8d88b205273240d597d95 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 15 Oct 2025 23:33:33 +0100 Subject: [PATCH 117/867] net: dsa: lantiq_gswip: remove vlan_aware and pvid arguments from gswip_vlan_remove() "bool pvid" is unused since commit "net: dsa: lantiq_gswip: remove legacy configure_vlan_while_not_filtering option". "bool vlan_aware" shouldn't have a role in finding the bridge VLAN. It should be identified by VID regardless of VLAN-aware or VLAN-unaware. The driver sets up VID 0 for the VLAN-unaware PVID. Signed-off-by: Vladimir Oltean Signed-off-by: Daniel Golle Link: https://patch.msgid.link/c63f89ca19269ef6c8bf00a62cacc739164b4441.1760566491.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/lantiq_gswip.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c index 26e963840f3b8..d9a7a004f9eb5 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip.c @@ -857,7 +857,7 @@ static int gswip_vlan_add(struct gswip_priv *priv, struct net_device *bridge, static int gswip_vlan_remove(struct gswip_priv *priv, struct net_device *bridge, int port, - u16 vid, bool pvid, bool vlan_aware) + u16 vid) { struct gswip_pce_table_entry vlan_mapping = {0,}; unsigned int max_ports = priv->hw_info->max_ports; @@ -868,7 +868,7 @@ static int gswip_vlan_remove(struct gswip_priv *priv, /* Check if there is already a page for this bridge */ for (i = max_ports; i < ARRAY_SIZE(priv->vlans); i++) { if (priv->vlans[i].bridge == bridge && - (!vlan_aware || priv->vlans[i].vid == vid)) { + priv->vlans[i].vid == vid) { idx = i; break; } @@ -941,7 +941,7 @@ static void gswip_port_bridge_leave(struct dsa_switch *ds, int port, * the VLAN-unaware PVID created for this bridge. */ gswip_add_single_port_br(priv, port, true); - gswip_vlan_remove(priv, br, port, GSWIP_VLAN_UNAWARE_PVID, true, false); + gswip_vlan_remove(priv, br, port, GSWIP_VLAN_UNAWARE_PVID); } static int gswip_port_vlan_prepare(struct dsa_switch *ds, int port, @@ -1024,7 +1024,6 @@ static int gswip_port_vlan_del(struct dsa_switch *ds, int port, { struct net_device *bridge = dsa_port_bridge_dev_get(dsa_to_port(ds, port)); struct gswip_priv *priv = ds->priv; - bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; if (vlan->vid == GSWIP_VLAN_UNAWARE_PVID) return 0; @@ -1037,7 +1036,7 @@ static int gswip_port_vlan_del(struct dsa_switch *ds, int port, if (dsa_is_cpu_port(ds, port)) return 0; - return gswip_vlan_remove(priv, bridge, port, vlan->vid, pvid, true); + return gswip_vlan_remove(priv, bridge, port, vlan->vid); } static void gswip_port_fast_age(struct dsa_switch *ds, int port) From a57627626636c20399f0c45ea1d16e6283affc91 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 15 Oct 2025 23:33:41 +0100 Subject: [PATCH 118/867] net: dsa: lantiq_gswip: put a more descriptive error print in gswip_vlan_remove() Improve the error message printed in case of a port VLAN entry not being found upon removal. Signed-off-by: Vladimir Oltean Signed-off-by: Daniel Golle Link: https://patch.msgid.link/abd4ec58e0f0f53eb3d7027097a20af0bd7b1d6d.1760566491.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/lantiq_gswip.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c index d9a7a004f9eb5..cfdeb81485006 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip.c @@ -875,7 +875,8 @@ static int gswip_vlan_remove(struct gswip_priv *priv, } if (idx == -1) { - dev_err(priv->dev, "bridge to leave does not exists\n"); + dev_err(priv->dev, "Port %d cannot find VID %u of bridge %s\n", + port, vid, bridge ? bridge->name : "(null)"); return -ENOENT; } From 3bb500caf656b918bddd7e32dba7ed0e5c1c9598 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 15 Oct 2025 23:33:50 +0100 Subject: [PATCH 119/867] net: dsa: lantiq_gswip: drop untagged on VLAN-aware bridge ports with no PVID Implement the required functionality, as written in Documentation/networking/switchdev.rst section "Bridge VLAN filtering", by using the "VLAN Ingress Tag Rule" feature of the switch. The bit field definitions for this were found while browsing the Intel dual BSD/GPLv2 licensed drivers for this switch IP. Signed-off-by: Vladimir Oltean Signed-off-by: Daniel Golle Link: https://patch.msgid.link/787aa807d00b726d75db2a40add215c8b8ba7466.1760566491.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/lantiq_gswip.c | 6 ++++++ drivers/net/dsa/lantiq/lantiq_gswip.h | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c index cfdeb81485006..1ff0932dae318 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip.c @@ -551,6 +551,7 @@ static void gswip_port_commit_pvid(struct gswip_priv *priv, int port) { struct dsa_port *dp = dsa_to_port(priv->ds, port); struct net_device *br = dsa_port_bridge_dev_get(dp); + u32 vinr; int idx; if (!dsa_port_is_user(dp)) @@ -582,6 +583,11 @@ static void gswip_port_commit_pvid(struct gswip_priv *priv, int port) idx = port + 1; } + vinr = idx ? GSWIP_PCE_VCTRL_VINR_ALL : GSWIP_PCE_VCTRL_VINR_TAGGED; + gswip_switch_mask(priv, GSWIP_PCE_VCTRL_VINR, + FIELD_PREP(GSWIP_PCE_VCTRL_VINR, vinr), + GSWIP_PCE_VCTRL(port)); + /* GSWIP 2.2 (GRX300) and later program here the VID directly. */ gswip_switch_w(priv, idx, GSWIP_PCE_DEFPVID(port)); } diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.h b/drivers/net/dsa/lantiq/lantiq_gswip.h index 4590a1a7dbd9b..69c8d2deff2d4 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.h +++ b/drivers/net/dsa/lantiq/lantiq_gswip.h @@ -159,6 +159,10 @@ #define GSWIP_PCE_PCTRL_0_PSTATE_MASK GENMASK(2, 0) #define GSWIP_PCE_VCTRL(p) (0x485 + ((p) * 0xA)) #define GSWIP_PCE_VCTRL_UVR BIT(0) /* Unknown VLAN Rule */ +#define GSWIP_PCE_VCTRL_VINR GENMASK(2, 1) /* VLAN Ingress Tag Rule */ +#define GSWIP_PCE_VCTRL_VINR_ALL 0 /* Admit tagged and untagged packets */ +#define GSWIP_PCE_VCTRL_VINR_TAGGED 1 /* Admit only tagged packets */ +#define GSWIP_PCE_VCTRL_VINR_UNTAGGED 2 /* Admit only untagged packets */ #define GSWIP_PCE_VCTRL_VIMR BIT(3) /* VLAN Ingress Member violation rule */ #define GSWIP_PCE_VCTRL_VEMR BIT(4) /* VLAN Egress Member violation rule */ #define GSWIP_PCE_VCTRL_VSR BIT(5) /* VLAN Security */ From 1f89ed0ebf2696d1d8fa7625e26c692aa153774a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 15 Oct 2025 23:34:01 +0100 Subject: [PATCH 120/867] net: dsa: lantiq_gswip: treat VID 0 like the PVID Documentation/networking/switchdev.rst says that VLAN-aware bridges must treat packets tagged with VID 0 the same as untagged. It appears from the documentation that setting the GSWIP_PCE_VCTRL_VID0 flag (which this driver already had defined) might achieve this. Signed-off-by: Vladimir Oltean Signed-off-by: Daniel Golle Link: https://patch.msgid.link/b220ac149922839a261b754202c05df5bb253c98.1760566491.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/lantiq_gswip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c index 1ff0932dae318..25f6b46957a01 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip.c @@ -603,7 +603,7 @@ static int gswip_port_vlan_filtering(struct dsa_switch *ds, int port, gswip_switch_mask(priv, GSWIP_PCE_VCTRL_VSR, GSWIP_PCE_VCTRL_UVR | GSWIP_PCE_VCTRL_VIMR | - GSWIP_PCE_VCTRL_VEMR, + GSWIP_PCE_VCTRL_VEMR | GSWIP_PCE_VCTRL_VID0, GSWIP_PCE_VCTRL(port)); gswip_switch_mask(priv, GSWIP_PCE_PCTRL_0_TVM, 0, GSWIP_PCE_PCTRL_0p(port)); @@ -611,7 +611,7 @@ static int gswip_port_vlan_filtering(struct dsa_switch *ds, int port, /* Use port based VLAN */ gswip_switch_mask(priv, GSWIP_PCE_VCTRL_UVR | GSWIP_PCE_VCTRL_VIMR | - GSWIP_PCE_VCTRL_VEMR, + GSWIP_PCE_VCTRL_VEMR | GSWIP_PCE_VCTRL_VID0, GSWIP_PCE_VCTRL_VSR, GSWIP_PCE_VCTRL(port)); gswip_switch_mask(priv, 0, GSWIP_PCE_PCTRL_0_TVM, From 38e3a9408496540f3a1dbbfc2ea7e495e14e03d7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 3 Oct 2025 10:29:19 +0200 Subject: [PATCH 121/867] wifi: ath12k: Add MODULE_FIRMWARE() entries Some systems such as live-image or installer require the firmware information for each module declared by MODULE_FIRMWARE(), which is currently missing in ath12k driver. For addressing it, this patch adds the MODULE_FIRMWARE() entries. Like ath11k driver, we can just put the currently used firmware entries for QCN9274 and WCN7850 with wildcards. Signed-off-by: Takashi Iwai Reviewed-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20251003082955.11436-1-tiwai@suse.de Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c index c729d5526c753..48161db6af579 100644 --- a/drivers/net/wireless/ath/ath12k/pci.c +++ b/drivers/net/wireless/ath/ath12k/pci.c @@ -1871,3 +1871,7 @@ void ath12k_pci_exit(void) { pci_unregister_driver(&ath12k_pci_driver); } + +/* firmware files */ +MODULE_FIRMWARE(ATH12K_FW_DIR "/QCN9274/hw2.0/*"); +MODULE_FIRMWARE(ATH12K_FW_DIR "/WCN7850/hw2.0/*"); From 88de89f184661ebb946804a5abdf2bdec7f0a7ab Mon Sep 17 00:00:00 2001 From: YanLong Dai Date: Wed, 24 Sep 2025 14:14:44 +0800 Subject: [PATCH 122/867] RDMA/bnxt_re: Fix a potential memory leak in destroy_gsi_sqp The current error handling path in bnxt_re_destroy_gsi_sqp() could lead to a resource leak. When bnxt_qplib_destroy_qp() fails, the function jumps to the 'fail' label and returns immediately, skipping the call to bnxt_qplib_free_qp_res(). Continue the resource teardown even if bnxt_qplib_destroy_qp() fails, which aligns with the driver's general error handling strategy and prevents the potential leak. Fixes: 8dae419f9ec73 ("RDMA/bnxt_re: Refactor queue pair creation code") Signed-off-by: YanLong Dai Link: https://patch.msgid.link/20250924061444.11288-1-daiyanlong@kylinos.cn Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 4dab5ca7362b5..84ce3fce2826b 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -913,7 +913,7 @@ void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, spin_unlock_irqrestore(&qp->scq->cq_lock, flags); } -static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) +static void bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) { struct bnxt_re_qp *gsi_sqp; struct bnxt_re_ah *gsi_sah; @@ -933,10 +933,9 @@ static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) ibdev_dbg(&rdev->ibdev, "Destroy the shadow QP\n"); rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &gsi_sqp->qplib_qp); - if (rc) { + if (rc) ibdev_err(&rdev->ibdev, "Destroy Shadow QP failed"); - goto fail; - } + bnxt_qplib_free_qp_res(&rdev->qplib_res, &gsi_sqp->qplib_qp); /* remove from active qp list */ @@ -951,10 +950,6 @@ static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) rdev->gsi_ctx.gsi_sqp = NULL; rdev->gsi_ctx.gsi_sah = NULL; rdev->gsi_ctx.sqp_tbl = NULL; - - return 0; -fail: - return rc; } static void bnxt_re_del_unique_gid(struct bnxt_re_dev *rdev) From 8d158f47f1f33d8747e80c3afbea5aa337e59d41 Mon Sep 17 00:00:00 2001 From: Jacob Moroni Date: Tue, 23 Sep 2025 19:08:50 +0000 Subject: [PATCH 123/867] RDMA/irdma: Fix SD index calculation In some cases, it is possible for pble_rsrc->next_fpm_addr to be larger than u32, so remove the u32 cast to avoid unintentional truncation. This fixes the following error that can be observed when registering massive memory regions: [ 447.227494] (NULL ib_device): cqp opcode = 0x1f maj_err_code = 0xffff min_err_code = 0x800c [ 447.227505] (NULL ib_device): [Update PE SDs Cmd Error][op_code=21] status=-5 waiting=1 completion_err=1 maj=0xffff min=0x800c Fixes: e8c4dbc2fcac ("RDMA/irdma: Add PBLE resource manager") Signed-off-by: Jacob Moroni Link: https://patch.msgid.link/20250923190850.1022773-1-jmoroni@google.com Acked-by: Tatyana Nikolova Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/pble.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/pble.c b/drivers/infiniband/hw/irdma/pble.c index 3091f9345f124..fa6325adaedec 100644 --- a/drivers/infiniband/hw/irdma/pble.c +++ b/drivers/infiniband/hw/irdma/pble.c @@ -71,7 +71,7 @@ int irdma_hmc_init_pble(struct irdma_sc_dev *dev, static void get_sd_pd_idx(struct irdma_hmc_pble_rsrc *pble_rsrc, struct sd_pd_idx *idx) { - idx->sd_idx = (u32)pble_rsrc->next_fpm_addr / IRDMA_HMC_DIRECT_BP_SIZE; + idx->sd_idx = pble_rsrc->next_fpm_addr / IRDMA_HMC_DIRECT_BP_SIZE; idx->pd_idx = (u32)(pble_rsrc->next_fpm_addr / IRDMA_HMC_PAGED_BP_SIZE); idx->rel_pd_idx = (idx->pd_idx % IRDMA_HMC_PD_CNT_IN_SD); } From 5575b7646b94c0afb0f4c0d86e00e13cf3397a62 Mon Sep 17 00:00:00 2001 From: Jacob Moroni Date: Tue, 23 Sep 2025 14:24:39 +0000 Subject: [PATCH 124/867] RDMA/irdma: Set irdma_cq cq_num field during CQ create The driver maintains a CQ table that is used to ensure that a CQ is still valid when processing CQ related AEs. When a CQ is destroyed, the table entry is cleared, using irdma_cq.cq_num as the index. This field was never being set, so it was just always clearing out entry 0. Additionally, the cq_num field size was increased to accommodate HW supporting more than 64K CQs. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Jacob Moroni Link: https://patch.msgid.link/20250923142439.943930-1-jmoroni@google.com Acked-by: Tatyana Nikolova Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/verbs.c | 1 + drivers/infiniband/hw/irdma/verbs.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 76ce6137f2ba3..c883c9ea5a831 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2503,6 +2503,7 @@ static int irdma_create_cq(struct ib_cq *ibcq, spin_lock_init(&iwcq->lock); INIT_LIST_HEAD(&iwcq->resize_list); INIT_LIST_HEAD(&iwcq->cmpl_generated); + iwcq->cq_num = cq_num; info.dev = dev; ukinfo->cq_size = max(entries, 4); ukinfo->cq_id = cq_num; diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h index ed21c1b56e8ec..ac8b387018350 100644 --- a/drivers/infiniband/hw/irdma/verbs.h +++ b/drivers/infiniband/hw/irdma/verbs.h @@ -140,7 +140,7 @@ struct irdma_srq { struct irdma_cq { struct ib_cq ibcq; struct irdma_sc_cq sc_cq; - u16 cq_num; + u32 cq_num; bool user_mode; atomic_t armed; enum irdma_cmpl_notify last_notify; From d8713158faad0fd4418cb2f4e432c3876ad53a1f Mon Sep 17 00:00:00 2001 From: Shuhao Fu Date: Fri, 10 Oct 2025 10:55:17 +0800 Subject: [PATCH 125/867] RDMA/uverbs: Fix umem release in UVERBS_METHOD_CQ_CREATE In `UVERBS_METHOD_CQ_CREATE`, umem should be released if anything goes wrong. Currently, if `create_cq_umem` fails, umem would not be released or referenced, causing a possible leak. In this patch, we release umem at `UVERBS_METHOD_CQ_CREATE`, the driver should not release umem if it returns an error code. Fixes: 1a40c362ae26 ("RDMA/uverbs: Add a common way to create CQ with umem") Signed-off-by: Shuhao Fu Link: https://patch.msgid.link/aOh1le4YqtYwj-hH@osx.local Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_std_types_cq.c | 1 + drivers/infiniband/hw/efa/efa_verbs.c | 16 +++++++--------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 37cd375565104..fab5d914029dd 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -206,6 +206,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( return ret; err_free: + ib_umem_release(umem); rdma_restrack_put(&cq->res); kfree(cq); err_event_file: diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index d9a12681f8434..22d3e25c3b9d1 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1216,13 +1216,13 @@ int efa_create_cq_umem(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, if (umem->length < cq->size) { ibdev_dbg(&dev->ibdev, "External memory too small\n"); err = -EINVAL; - goto err_free_mem; + goto err_out; } if (!ib_umem_is_contiguous(umem)) { ibdev_dbg(&dev->ibdev, "Non contiguous CQ unsupported\n"); err = -EINVAL; - goto err_free_mem; + goto err_out; } cq->cpu_addr = NULL; @@ -1251,7 +1251,7 @@ int efa_create_cq_umem(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, err = efa_com_create_cq(&dev->edev, ¶ms, &result); if (err) - goto err_free_mem; + goto err_free_mapped; resp.db_off = result.db_off; resp.cq_idx = result.cq_idx; @@ -1299,12 +1299,10 @@ int efa_create_cq_umem(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, efa_cq_user_mmap_entries_remove(cq); err_destroy_cq: efa_destroy_cq_idx(dev, cq->cq_idx); -err_free_mem: - if (umem) - ib_umem_release(umem); - else - efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, DMA_FROM_DEVICE); - +err_free_mapped: + if (!umem) + efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, + DMA_FROM_DEVICE); err_out: atomic64_inc(&dev->stats.create_cq_err); return err; From 3dfdc98d1dc29d6339fd0551348df0d968fd9ad4 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Fri, 17 Oct 2025 12:35:20 -0700 Subject: [PATCH 126/867] net: phy: micrel: simplify return in ksz9477_phy_errata() ksz9477_phy_errata function currently assigns the return value of genphy_restart_aneg() to a variable and then immediately returns it err = genphy_restart_aneg(phydev); if (err) return err; return err; This can be simplified by directly returning the function call result, as the intermediate variable and conditional are redundant. Signed-off-by: Alok Tiwari Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251017193525.1457064-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 79ce3eb6752b6..65994d97c403c 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2095,11 +2095,7 @@ static int ksz9477_phy_errata(struct phy_device *phydev) return err; } - err = genphy_restart_aneg(phydev); - if (err) - return err; - - return err; + return genphy_restart_aneg(phydev); } static int ksz9477_config_init(struct phy_device *phydev) From ba397fde5e998478e4962b009321c184b0060067 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Fri, 17 Oct 2025 12:35:21 -0700 Subject: [PATCH 127/867] net: phy: micrel: fix typos in comments Fix several spelling and grammatical errors in comments across micrel PHY drivers. Corrections include: - "dealy" -> "delay" - "autonegotation" -> "autonegotiation" - "recheas" -> "reaches" - "one" -> "on" - "improvenent" -> "improvement" - "intput" -> "input" Signed-off-by: Alok Tiwari Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251017193525.1457064-2-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 65994d97c403c..5f2c7e5c314f5 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -1050,7 +1050,7 @@ static int ksz9021_config_init(struct phy_device *phydev) #define TX_CLK_ID 0x1f /* set tx and tx_clk to "No delay adjustment" to keep 0ns - * dealy + * delay */ #define TX_ND 0x7 #define TX_CLK_ND 0xf @@ -1913,7 +1913,7 @@ static int ksz886x_config_aneg(struct phy_device *phydev) return ret; if (phydev->autoneg != AUTONEG_ENABLE) { - /* When autonegotation is disabled, we need to manually force + /* When autonegotiation is disabled, we need to manually force * the link state. If we don't do this, the PHY will keep * sending Fast Link Pulses (FLPs) which are part of the * autonegotiation process. This is not desired when @@ -3533,7 +3533,7 @@ static void lan8814_ptp_disable_event(struct phy_device *phydev, int event) /* Set target to too far in the future, effectively disabling it */ lan8814_ptp_set_target(phydev, event, 0xFFFFFFFF, 0); - /* And then reload once it recheas the target */ + /* And then reload once it reaches the target */ lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, LAN8814_PTP_GENERAL_CONFIG, LAN8814_PTP_GENERAL_CONFIG_RELOAD_ADD_X(event), LAN8814_PTP_GENERAL_CONFIG_RELOAD_ADD_X(event)); @@ -4403,7 +4403,7 @@ static int lan8814_release_coma_mode(struct phy_device *phydev) static void lan8814_clear_2psp_bit(struct phy_device *phydev) { /* It was noticed that when traffic is passing through the PHY and the - * cable is removed then the LED was still one even though there is no + * cable is removed then the LED was still on even though there is no * link */ lanphy_modify_page_reg(phydev, LAN8814_PAGE_PCS_DIGITAL, LAN8814_EEE_STATE, @@ -4543,7 +4543,7 @@ static int lan8841_config_init(struct phy_device *phydev) phy_write_mmd(phydev, KSZ9131RN_MMD_COMMON_CTRL_REG, LAN8841_PTP_TX_VERSION, 0xff00); - /* 100BT Clause 40 improvenent errata */ + /* 100BT Clause 40 improvement errata */ phy_write_mmd(phydev, LAN8841_MMD_ANALOG_REG, LAN8841_ANALOG_CONTROL_1, LAN8841_ANALOG_CONTROL_1_PLL_TRIM(0x2)); @@ -5563,7 +5563,7 @@ static int lan8841_ptp_extts_on(struct kszphy_ptp_priv *ptp_priv, int pin, u16 tmp = 0; int ret; - /* Set GPIO to be intput */ + /* Set GPIO to be input */ ret = phy_set_bits_mmd(phydev, 2, LAN8841_GPIO_EN, BIT(pin)); if (ret) return ret; From f8a55d5e71e6bdf4793080e622b92cb2646171b3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Oct 2025 14:53:34 +0000 Subject: [PATCH 128/867] net: add a fast path in __netif_schedule() Cpus serving NIC interrupts and specifically TX completions are often trapped in also restarting a busy qdisc (because qdisc was stopped by BQL or the driver's own flow control). When they call netdev_tx_completed_queue() or netif_tx_wake_queue(), they call __netif_schedule() so that the queue can be run later from net_tx_action() (involving NET_TX_SOFTIRQ) Quite often, by the time the cpu reaches net_tx_action(), another cpu grabbed the qdisc spinlock from __dev_xmit_skb(), and we spend too much time spinning on this lock. We can detect in __netif_schedule() if a cpu is already at a specific point in __dev_xmit_skb() where we have the guarantee the queue will be run. This patch gives a 13 % increase of throughput on an IDPF NIC (200Gbit), 32 TX qeues, sending UDP packets of 120 bytes. This also helps __qdisc_run() to not force a NET_TX_SOFTIRQ if another thread is waiting in __dev_xmit_skb() Before: sar -n DEV 5 5|grep eth1|grep Average Average: eth1 1496.44 52191462.56 210.00 13369396.90 0.00 0.00 0.00 54.76 After: sar -n DEV 5 5|grep eth1|grep Average Average: eth1 1457.88 59363099.96 205.08 15206384.35 0.00 0.00 0.00 62.29 Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20251017145334.3016097-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 821e7c7189244..9482b905c66a5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3373,6 +3373,13 @@ static void __netif_reschedule(struct Qdisc *q) void __netif_schedule(struct Qdisc *q) { + /* If q->defer_list is not empty, at least one thread is + * in __dev_xmit_skb() before llist_del_all(&q->defer_list). + * This thread will attempt to run the queue. + */ + if (!llist_empty(&q->defer_list)) + return; + if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) __netif_reschedule(q); } From 370157293175a702036203faec3e0495b081f135 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Oct 2025 20:59:17 -0700 Subject: [PATCH 129/867] nl802154: fix some kernel-doc warnings Correct multiple kernel-doc warnings in nl802154.h: - Fix a typo on one enum name to avoid a kernel-doc warning. - Drop 2 enum descriptions that are no longer needed. - Mark 2 internal enums as "private:" so that kernel-doc is not needed for them. Warning: nl802154.h:239 Enum value 'NL802154_CAP_ATTR_MAX_MAXBE' not described in enum 'nl802154_wpan_phy_capability_attr' Warning: nl802154.h:239 Excess enum value '%NL802154_CAP_ATTR_MIN_CCA_ED_LEVEL' description in 'nl802154_wpan_phy_capability_attr' Warning: nl802154.h:239 Excess enum value '%NL802154_CAP_ATTR_MAX_CCA_ED_LEVEL' description in 'nl802154_wpan_phy_capability_attr' Warning: nl802154.h:369 Enum value '__NL802154_CCA_OPT_ATTR_AFTER_LAST' not described in enum 'nl802154_cca_opts' Warning: nl802154.h:369 Enum value 'NL802154_CCA_OPT_ATTR_MAX' not described in enum 'nl802154_cca_opts' Signed-off-by: Randy Dunlap Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251016035917.1148012-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- include/net/nl802154.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/net/nl802154.h b/include/net/nl802154.h index a994dea745966..442822746e924 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -191,14 +191,12 @@ enum nl802154_iftype { * @NL802154_CAP_ATTR_CHANNELS: a nested attribute for nl802154_channel_attr * @NL802154_CAP_ATTR_TX_POWERS: a nested attribute for * nl802154_wpan_phy_tx_power - * @NL802154_CAP_ATTR_MIN_CCA_ED_LEVEL: minimum value for cca_ed_level - * @NL802154_CAP_ATTR_MAX_CCA_ED_LEVEL: maximum value for cca_ed_level * @NL802154_CAP_ATTR_CCA_MODES: nl802154_cca_modes flags * @NL802154_CAP_ATTR_CCA_OPTS: nl802154_cca_opts flags * @NL802154_CAP_ATTR_MIN_MINBE: minimum of minbe value * @NL802154_CAP_ATTR_MAX_MINBE: maximum of minbe value * @NL802154_CAP_ATTR_MIN_MAXBE: minimum of maxbe value - * @NL802154_CAP_ATTR_MAX_MINBE: maximum of maxbe value + * @NL802154_CAP_ATTR_MAX_MAXBE: maximum of maxbe value * @NL802154_CAP_ATTR_MIN_CSMA_BACKOFFS: minimum of csma backoff value * @NL802154_CAP_ATTR_MAX_CSMA_BACKOFFS: maximum of csma backoffs value * @NL802154_CAP_ATTR_MIN_FRAME_RETRIES: minimum of frame retries value @@ -364,6 +362,7 @@ enum nl802154_cca_opts { NL802154_CCA_OPT_ENERGY_CARRIER_AND, NL802154_CCA_OPT_ENERGY_CARRIER_OR, + /* private: */ /* keep last */ __NL802154_CCA_OPT_ATTR_AFTER_LAST, NL802154_CCA_OPT_ATTR_MAX = __NL802154_CCA_OPT_ATTR_AFTER_LAST - 1 From 813882ae22756bcf9645d405e045c60e5aab0a93 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 16 Oct 2025 15:36:46 +0100 Subject: [PATCH 130/867] net: stmmac: remove broken PCS code Changing the netif_carrier_*() state behind phylink's back has always been prohibited because it messes up with phylinks state tracking, and means that phylink no longer guarantees to call the mac_link_down() and mac_link_up() methods at the appropriate times. This was later documented in the sfp-phylink network driver conversion guide. stmmac was converted to phylink in 2019, but nothing was done with the "PCS" code. Since then, apart from the updates as part of phylink development, nothing has happened with stmmac to improve its use of phylink, or even to address this point. A couple of years ago, a has_integrated_pcs boolean was added by Bart, which later became the STMMAC_FLAG_HAS_INTEGRATED_PCS flag, to avoid manipulating the netif_carrier_*() state. This flag is mis-named, because whenever the stmmac is synthesized for its native SGMII, TBI or RTBI interfaces, it has an "integrated PCS". This boolean/flag actually means "ignore the status from the integrated PCS". Discussing with Bart, the reasons for this are lost to the winds of time (which is why we should always document the reasons in the commit message.) RGMII also has in-band status, and the dwmac cores and stmmac code supports this but with one bug that saves the day. When dwmac cores are synthesised for RGMII only, they do not contain an integrated PCS, and so priv->dma_cap.pcs is clear, which prevents (incorrectly) the "RGMII PCS" being used, meaning we don't read the in-band status. However, a core synthesised for RGMII and also SGMII, TBI or RTBI will have this capability bit set, thus making these code paths reachable. The Jetson Xavier NX uses RGMII mode to talk to its PHY, and removing the incorrect check for priv->dma_cap.pcs reveals the theortical issue with netif_carrier_*() manipulation is real: dwc-eth-dwmac 2490000.ethernet eth0: Register MEM_TYPE_PAGE_POOL RxQ-0 dwc-eth-dwmac 2490000.ethernet eth0: PHY [stmmac-0:00] driver [RTL8211F Gigabit Ethernet] (irq=141) dwc-eth-dwmac 2490000.ethernet eth0: No Safety Features support found dwc-eth-dwmac 2490000.ethernet eth0: IEEE 1588-2008 Advanced Timestamp supported dwc-eth-dwmac 2490000.ethernet eth0: registered PTP clock dwc-eth-dwmac 2490000.ethernet eth0: configuring for phy/rgmii-id link mode 8021q: adding VLAN 0 to HW filter on device eth0 dwc-eth-dwmac 2490000.ethernet eth0: Adding VLAN ID 0 is not supported Link is Up - 1000/Full Link is Down Link is Up - 1000/Full This looks good until one realises that the phylink "Link" status messages are missing, even when the RJ45 cable is reconnected. Nothing one can do results in the interface working. The interrupt handler (which prints those "Link is" messages) always wins over phylink's resolve worker, meaning phylink never calls the mac_link_up() nor mac_link_down() methods. eth0 also sees no traffic received, and is unable to obtain a DHCP address: 3: eth0: mtu 1500 qdisc mq state UP group defa ult qlen 1000 link/ether e6:d3:6a:e6:92:de brd ff:ff:ff:ff:ff:ff RX: bytes packets errors dropped overrun mcast 0 0 0 0 0 0 TX: bytes packets errors dropped carrier collsns 27686 149 0 0 0 0 With the STMMAC_FLAG_HAS_INTEGRATED_PCS flag set, which disables the netif_carrier_*() manipulation then stmmac works normally: dwc-eth-dwmac 2490000.ethernet eth0: Register MEM_TYPE_PAGE_POOL RxQ-0 dwc-eth-dwmac 2490000.ethernet eth0: PHY [stmmac-0:00] driver [RTL8211F Gigabit Ethernet] (irq=141) dwc-eth-dwmac 2490000.ethernet eth0: No Safety Features support found dwc-eth-dwmac 2490000.ethernet eth0: IEEE 1588-2008 Advanced Timestamp supported dwc-eth-dwmac 2490000.ethernet eth0: registered PTP clock dwc-eth-dwmac 2490000.ethernet eth0: configuring for phy/rgmii-id link mode 8021q: adding VLAN 0 to HW filter on device eth0 dwc-eth-dwmac 2490000.ethernet eth0: Adding VLAN ID 0 is not supported Link is Up - 1000/Full dwc-eth-dwmac 2490000.ethernet eth0: Link is Up - 1Gbps/Full - flow control rx/tx and packets can be transferred. This clearly shows that when priv->hw->pcs is set, but STMMAC_FLAG_HAS_INTEGRATED_PCS is clear, the driver reliably fails. Discovering whether a platform falls into this is impossible as parsing all the dtsi and dts files to find out which use the stmmac driver, whether any of them use RGMII or SGMII and also depends whether an external interface is being used. The kernel likely doesn't contain all dts files either. The only driver that sets this flag uses the qcom,sa8775p-ethqos compatible, and uses SGMII or 2500BASE-X. but these are saved from this problem by the incorrect check for priv->dma_cap.pcs. So, we have to assume that for every other platform that uses SGMII with stmmac is using an external PCS. Moreover, ethtool output can be incorrect. With the full-duplex link negotiated, ethtool reports: Speed: 1000Mb/s Duplex: Half because with dwmac4, the full-duplex bit is in bit 16 of the status, priv->xstats.pcs_duplex becomes BIT(16) for full duplex, but the ethtool ksettings duplex member is u8 - so becomes zero. Moreover, the supported, advertised and link partner modes are all "not reported". Finally, ksettings_set() won't be able to set the advertisement on a PHY if this PCS code is activated, which is incorrect when SGMII is used with a PHY. Thus, remove: 1. the incorrect netif_carrier_*() manipulation. 2. the broken ethtool ksettings code. Given that all uses of STMMAC_FLAG_HAS_INTEGRATED_PCS are now gone, remove the flag from stmmac.h and dwmac-qcom-ethqos.c. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Tested-by: Maxime Chevallier Tested-by: Lad Prabhakar Link: https://patch.msgid.link/E1v9P5y-0000000AolC-1QWH@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../stmicro/stmmac/dwmac-qcom-ethqos.c | 4 -- .../ethernet/stmicro/stmmac/stmmac_ethtool.c | 55 ------------------- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 9 --- include/linux/stmmac.h | 1 - 4 files changed, 69 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index d8fd4d8f6ced7..f62825220cf70 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -96,7 +96,6 @@ struct ethqos_emac_driver_data { bool rgmii_config_loopback_en; bool has_emac_ge_3; const char *link_clk_name; - bool has_integrated_pcs; u32 dma_addr_width; struct dwmac4_addrs dwmac4_addrs; bool needs_sgmii_loopback; @@ -282,7 +281,6 @@ static const struct ethqos_emac_driver_data emac_v4_0_0_data = { .rgmii_config_loopback_en = false, .has_emac_ge_3 = true, .link_clk_name = "phyaux", - .has_integrated_pcs = true, .needs_sgmii_loopback = true, .dma_addr_width = 36, .dwmac4_addrs = { @@ -856,8 +854,6 @@ static int qcom_ethqos_probe(struct platform_device *pdev) plat_dat->flags |= STMMAC_FLAG_TSO_EN; if (of_device_is_compatible(np, "qcom,qcs404-ethqos")) plat_dat->flags |= STMMAC_FLAG_RX_CLK_RUNS_IN_LPI; - if (data->has_integrated_pcs) - plat_dat->flags |= STMMAC_FLAG_HAS_INTEGRATED_PCS; if (data->dma_addr_width) plat_dat->host_dma_width = data->dma_addr_width; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 39fa1ec92f82f..d89662b48087e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -322,47 +322,6 @@ static int stmmac_ethtool_get_link_ksettings(struct net_device *dev, { struct stmmac_priv *priv = netdev_priv(dev); - if (!(priv->plat->flags & STMMAC_FLAG_HAS_INTEGRATED_PCS) && - (priv->hw->pcs & STMMAC_PCS_RGMII || - priv->hw->pcs & STMMAC_PCS_SGMII)) { - u32 supported, advertising, lp_advertising; - - if (!priv->xstats.pcs_link) { - cmd->base.speed = SPEED_UNKNOWN; - cmd->base.duplex = DUPLEX_UNKNOWN; - return 0; - } - cmd->base.duplex = priv->xstats.pcs_duplex; - - cmd->base.speed = priv->xstats.pcs_speed; - - /* Encoding of PSE bits is defined in 802.3z, 37.2.1.4 */ - - ethtool_convert_link_mode_to_legacy_u32( - &supported, cmd->link_modes.supported); - ethtool_convert_link_mode_to_legacy_u32( - &advertising, cmd->link_modes.advertising); - ethtool_convert_link_mode_to_legacy_u32( - &lp_advertising, cmd->link_modes.lp_advertising); - - /* Reg49[3] always set because ANE is always supported */ - cmd->base.autoneg = ADVERTISED_Autoneg; - supported |= SUPPORTED_Autoneg; - advertising |= ADVERTISED_Autoneg; - lp_advertising |= ADVERTISED_Autoneg; - - cmd->base.port = PORT_OTHER; - - ethtool_convert_legacy_u32_to_link_mode( - cmd->link_modes.supported, supported); - ethtool_convert_legacy_u32_to_link_mode( - cmd->link_modes.advertising, advertising); - ethtool_convert_legacy_u32_to_link_mode( - cmd->link_modes.lp_advertising, lp_advertising); - - return 0; - } - return phylink_ethtool_ksettings_get(priv->phylink, cmd); } @@ -372,20 +331,6 @@ stmmac_ethtool_set_link_ksettings(struct net_device *dev, { struct stmmac_priv *priv = netdev_priv(dev); - if (!(priv->plat->flags & STMMAC_FLAG_HAS_INTEGRATED_PCS) && - (priv->hw->pcs & STMMAC_PCS_RGMII || - priv->hw->pcs & STMMAC_PCS_SGMII)) { - /* Only support ANE */ - if (cmd->base.autoneg != AUTONEG_ENABLE) - return -EINVAL; - - mutex_lock(&priv->lock); - stmmac_pcs_ctrl_ane(priv, 1, priv->hw->ps, 0); - mutex_unlock(&priv->lock); - - return 0; - } - return phylink_ethtool_ksettings_set(priv->phylink, cmd); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index c9fa965c85660..867d0ca3b45e2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -6001,15 +6001,6 @@ static void stmmac_common_interrupt(struct stmmac_priv *priv) for (queue = 0; queue < queues_count; queue++) stmmac_host_mtl_irq_status(priv, priv->hw, queue); - /* PCS link status */ - if (priv->hw->pcs && - !(priv->plat->flags & STMMAC_FLAG_HAS_INTEGRATED_PCS)) { - if (priv->xstats.pcs_link) - netif_carrier_on(priv->dev); - else - netif_carrier_off(priv->dev); - } - stmmac_timestamp_interrupt(priv, priv); } } diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index fa1318bac06c4..99022620457ac 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -171,7 +171,6 @@ struct dwmac4_addrs { u32 mtl_low_cred_offset; }; -#define STMMAC_FLAG_HAS_INTEGRATED_PCS BIT(0) #define STMMAC_FLAG_SPH_DISABLE BIT(1) #define STMMAC_FLAG_USE_PHY_WOL BIT(2) #define STMMAC_FLAG_HAS_SUN8I BIT(3) From 14f74bc6dc699f63b5a6dfa9b22229f0caea89f3 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 16 Oct 2025 15:36:51 +0100 Subject: [PATCH 131/867] net: stmmac: remove xstats.pcs_* members As a result of the previous commit, the pcs_link, pcs_duplex and pcs_speed members are not used outside of the interrupt handling code, and are only used to print their status using the misleading "Link is" messages that bear no relation to the actual status of the link. Remove the printing of these messages, these members, and the code that decodes them from the hardware. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Tested-by: Maxime Chevallier Tested-by: Lad Prabhakar Link: https://patch.msgid.link/E1v9P63-0000000AolI-23Kf@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/common.h | 3 -- .../ethernet/stmicro/stmmac/dwmac1000_core.c | 28 +------------------ .../net/ethernet/stmicro/stmmac/dwmac4_core.c | 28 +------------------ 3 files changed, 2 insertions(+), 57 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 8f34c9ad457f0..33aeac5666f4b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -192,9 +192,6 @@ struct stmmac_extra_stats { unsigned long irq_pcs_ane_n; unsigned long irq_pcs_link_n; unsigned long irq_rgmii_n; - unsigned long pcs_link; - unsigned long pcs_duplex; - unsigned long pcs_speed; /* debug register */ unsigned long mtl_tx_status_fifo_full; unsigned long mtl_tx_fifo_not_empty; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index fe776ddf68895..2c5ee59c32086 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -266,34 +266,8 @@ static void dwmac1000_pmt(struct mac_device_info *hw, unsigned long mode) /* RGMII or SMII interface */ static void dwmac1000_rgsmii(void __iomem *ioaddr, struct stmmac_extra_stats *x) { - u32 status; - - status = readl(ioaddr + GMAC_RGSMIIIS); + readl(ioaddr + GMAC_RGSMIIIS); x->irq_rgmii_n++; - - /* Check the link status */ - if (status & GMAC_RGSMIIIS_LNKSTS) { - int speed_value; - - x->pcs_link = 1; - - speed_value = ((status & GMAC_RGSMIIIS_SPEED) >> - GMAC_RGSMIIIS_SPEED_SHIFT); - if (speed_value == GMAC_RGSMIIIS_SPEED_125) - x->pcs_speed = SPEED_1000; - else if (speed_value == GMAC_RGSMIIIS_SPEED_25) - x->pcs_speed = SPEED_100; - else - x->pcs_speed = SPEED_10; - - x->pcs_duplex = (status & GMAC_RGSMIIIS_LNKMOD_MASK); - - pr_info("Link is Up - %d/%s\n", (int)x->pcs_speed, - x->pcs_duplex ? "Full" : "Half"); - } else { - x->pcs_link = 0; - pr_info("Link is Down\n"); - } } static int dwmac1000_irq_status(struct mac_device_info *hw, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index d85bc0bb5c3c0..8a19df7b05775 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -592,34 +592,8 @@ static void dwmac4_ctrl_ane(struct stmmac_priv *priv, bool ane, bool srgmi_ral, /* RGMII or SMII interface */ static void dwmac4_phystatus(void __iomem *ioaddr, struct stmmac_extra_stats *x) { - u32 status; - - status = readl(ioaddr + GMAC_PHYIF_CONTROL_STATUS); + readl(ioaddr + GMAC_PHYIF_CONTROL_STATUS); x->irq_rgmii_n++; - - /* Check the link status */ - if (status & GMAC_PHYIF_CTRLSTATUS_LNKSTS) { - int speed_value; - - x->pcs_link = 1; - - speed_value = ((status & GMAC_PHYIF_CTRLSTATUS_SPEED) >> - GMAC_PHYIF_CTRLSTATUS_SPEED_SHIFT); - if (speed_value == GMAC_PHYIF_CTRLSTATUS_SPEED_125) - x->pcs_speed = SPEED_1000; - else if (speed_value == GMAC_PHYIF_CTRLSTATUS_SPEED_25) - x->pcs_speed = SPEED_100; - else - x->pcs_speed = SPEED_10; - - x->pcs_duplex = (status & GMAC_PHYIF_CTRLSTATUS_LNKMOD); - - pr_info("Link is Up - %d/%s\n", (int)x->pcs_speed, - x->pcs_duplex ? "Full" : "Half"); - } else { - x->pcs_link = 0; - pr_info("Link is Down\n"); - } } static int dwmac4_irq_mtl_status(struct stmmac_priv *priv, From 2e2c878a314175a4c3b76ee3be455f543438f472 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 16 Oct 2025 15:36:56 +0100 Subject: [PATCH 132/867] net: stmmac: remove SGMII/RGMII/SMII interrupt handling Now that the only use for the interrupt is to clear it and increment a statistic counter (which is not that relevant anymore) remove all this code and ensure that the interrupt remains disabled to avoid a stuck interrupt. dwmac-sun8i still uses this statistic counter, so it is inappropriate for this patch to remove it. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Tested-by: Maxime Chevallier Tested-by: Lad Prabhakar Link: https://patch.msgid.link/E1v9P68-0000000AolO-2W5s@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac1000.h | 6 +++--- drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c | 10 ---------- drivers/net/ethernet/stmicro/stmmac/dwmac4.h | 3 +-- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 9 --------- 4 files changed, 4 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h index 0c011a47d5a3e..8f3002d9de78b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h @@ -38,10 +38,10 @@ #define GMAC_INT_DISABLE_PCSAN BIT(2) #define GMAC_INT_DISABLE_PMT BIT(3) #define GMAC_INT_DISABLE_TIMESTAMP BIT(9) -#define GMAC_INT_DISABLE_PCS (GMAC_INT_DISABLE_RGMII | \ - GMAC_INT_DISABLE_PCSLINK | \ +#define GMAC_INT_DISABLE_PCS (GMAC_INT_DISABLE_PCSLINK | \ GMAC_INT_DISABLE_PCSAN) -#define GMAC_INT_DEFAULT_MASK (GMAC_INT_DISABLE_TIMESTAMP | \ +#define GMAC_INT_DEFAULT_MASK (GMAC_INT_DISABLE_RGMII | \ + GMAC_INT_DISABLE_TIMESTAMP | \ GMAC_INT_DISABLE_PCS) /* PMT Control and Status */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 2c5ee59c32086..654331b411f44 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -263,13 +263,6 @@ static void dwmac1000_pmt(struct mac_device_info *hw, unsigned long mode) writel(pmt, ioaddr + GMAC_PMT); } -/* RGMII or SMII interface */ -static void dwmac1000_rgsmii(void __iomem *ioaddr, struct stmmac_extra_stats *x) -{ - readl(ioaddr + GMAC_RGSMIIIS); - x->irq_rgmii_n++; -} - static int dwmac1000_irq_status(struct mac_device_info *hw, struct stmmac_extra_stats *x) { @@ -311,9 +304,6 @@ static int dwmac1000_irq_status(struct mac_device_info *hw, dwmac_pcs_isr(ioaddr, GMAC_PCS_BASE, intr_status, x); - if (intr_status & PCS_RGSMIIIS_IRQ) - dwmac1000_rgsmii(ioaddr, x); - return ret; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index 3dec1a264cf60..6dd84b6544cc0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -106,8 +106,7 @@ #define GMAC_INT_LPI_EN BIT(5) #define GMAC_INT_TSIE BIT(12) -#define GMAC_PCS_IRQ_DEFAULT (GMAC_INT_RGSMIIS | GMAC_INT_PCS_LINK | \ - GMAC_INT_PCS_ANE) +#define GMAC_PCS_IRQ_DEFAULT (GMAC_INT_PCS_LINK | GMAC_INT_PCS_ANE) #define GMAC_INT_DEFAULT_ENABLE (GMAC_INT_PMT_EN | GMAC_INT_LPI_EN | \ GMAC_INT_TSIE) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 8a19df7b05775..bff4c371c1d26 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -589,13 +589,6 @@ static void dwmac4_ctrl_ane(struct stmmac_priv *priv, bool ane, bool srgmi_ral, dwmac_ctrl_ane(priv->ioaddr, GMAC_PCS_BASE, ane, srgmi_ral, loopback); } -/* RGMII or SMII interface */ -static void dwmac4_phystatus(void __iomem *ioaddr, struct stmmac_extra_stats *x) -{ - readl(ioaddr + GMAC_PHYIF_CONTROL_STATUS); - x->irq_rgmii_n++; -} - static int dwmac4_irq_mtl_status(struct stmmac_priv *priv, struct mac_device_info *hw, u32 chan) { @@ -667,8 +660,6 @@ static int dwmac4_irq_status(struct mac_device_info *hw, } dwmac_pcs_isr(ioaddr, GMAC_PCS_BASE, intr_status, x); - if (intr_status & PCS_RGSMIIIS_IRQ) - dwmac4_phystatus(ioaddr, x); return ret; } From ebc5d656b78cbcc6314f3af12af2d556f795d280 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 16 Oct 2025 15:37:01 +0100 Subject: [PATCH 133/867] net: stmmac: remove PCS "mode" pause handling Remove the "we always autoneg pause" forcing when the stmmac driver decides that a "PCS" is present, which blocks passing the ethtool pause calls to phylink when using SGMII mode. This prevents the pause results being reported when a PHY is attached using SGMII mode, or the pause settings being changed in SGMII mode. There is no reason to prevent this. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Tested-by: Maxime Chevallier Tested-by: Lad Prabhakar Link: https://patch.msgid.link/E1v9P6D-0000000AolU-2zjv@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index d89662b48087e..c60cd948311ea 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -424,11 +424,7 @@ stmmac_get_pauseparam(struct net_device *netdev, { struct stmmac_priv *priv = netdev_priv(netdev); - if (priv->hw->pcs) { - pause->autoneg = 1; - } else { - phylink_ethtool_get_pauseparam(priv->phylink, pause); - } + phylink_ethtool_get_pauseparam(priv->phylink, pause); } static int @@ -437,12 +433,7 @@ stmmac_set_pauseparam(struct net_device *netdev, { struct stmmac_priv *priv = netdev_priv(netdev); - if (priv->hw->pcs) { - pause->autoneg = 1; - return 0; - } else { - return phylink_ethtool_set_pauseparam(priv->phylink, pause); - } + return phylink_ethtool_set_pauseparam(priv->phylink, pause); } static u64 stmmac_get_rx_normal_irq_n(struct stmmac_priv *priv, int q) From 19064a58bd3c38dd1f93eb6c687a5b268cea03ba Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 16 Oct 2025 15:37:06 +0100 Subject: [PATCH 134/867] net: stmmac: remove unused PCS loopback support Nothing calls stmmac_pcs_ctrl_ane() with the "loopback" argument set to anything except zero, so this serves no useful purpose. Remove the argument to reduce the code complexity. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Tested-by: Maxime Chevallier Tested-by: Lad Prabhakar Link: https://patch.msgid.link/E1v9P6I-0000000Aola-3Sih@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c | 4 ++-- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 5 ++--- drivers/net/ethernet/stmicro/stmmac/hwif.h | 4 ++-- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h | 6 +----- 6 files changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index f62825220cf70..32244217d9526 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -622,7 +622,7 @@ static void ethqos_set_serdes_speed(struct qcom_ethqos *ethqos, int speed) static void ethqos_pcs_set_inband(struct stmmac_priv *priv, bool enable) { - stmmac_pcs_ctrl_ane(priv, enable, 0, 0); + stmmac_pcs_ctrl_ane(priv, enable, 0); } /* On interface toggle MAC registers gets reset. diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 654331b411f44..5c653be3d453f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -358,9 +358,9 @@ static void dwmac1000_set_eee_timer(struct mac_device_info *hw, int ls, int tw) } static void dwmac1000_ctrl_ane(struct stmmac_priv *priv, bool ane, - bool srgmi_ral, bool loopback) + bool srgmi_ral) { - dwmac_ctrl_ane(priv->ioaddr, GMAC_PCS_BASE, ane, srgmi_ral, loopback); + dwmac_ctrl_ane(priv->ioaddr, GMAC_PCS_BASE, ane, srgmi_ral); } static void dwmac1000_debug(struct stmmac_priv *priv, void __iomem *ioaddr, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index bff4c371c1d26..21e4461db9378 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -583,10 +583,9 @@ static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex, } } -static void dwmac4_ctrl_ane(struct stmmac_priv *priv, bool ane, bool srgmi_ral, - bool loopback) +static void dwmac4_ctrl_ane(struct stmmac_priv *priv, bool ane, bool srgmi_ral) { - dwmac_ctrl_ane(priv->ioaddr, GMAC_PCS_BASE, ane, srgmi_ral, loopback); + dwmac_ctrl_ane(priv->ioaddr, GMAC_PCS_BASE, ane, srgmi_ral); } static int dwmac4_irq_mtl_status(struct stmmac_priv *priv, diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 14dbe0685997d..7796f5f3c96f1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -374,8 +374,8 @@ struct stmmac_ops { struct stmmac_extra_stats *x, u32 rx_queues, u32 tx_queues); /* PCS calls */ - void (*pcs_ctrl_ane)(struct stmmac_priv *priv, bool ane, bool srgmi_ral, - bool loopback); + void (*pcs_ctrl_ane)(struct stmmac_priv *priv, bool ane, + bool srgmi_ral); /* Safety Features */ int (*safety_feat_config)(void __iomem *ioaddr, unsigned int asp, struct stmmac_safety_feature_cfg *safety_cfg); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 867d0ca3b45e2..e21d96933408a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3493,7 +3493,7 @@ static int stmmac_hw_setup(struct net_device *dev) } if (priv->hw->pcs) - stmmac_pcs_ctrl_ane(priv, 1, priv->hw->ps, 0); + stmmac_pcs_ctrl_ane(priv, 1, priv->hw->ps); /* set TX and RX rings length */ stmmac_set_rings_length(priv); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h index 4a684c97dfaeb..5778f5b2f3139 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h @@ -82,13 +82,12 @@ static inline void dwmac_pcs_isr(void __iomem *ioaddr, u32 reg, * @reg: Base address of the AN Control Register. * @ane: to enable the auto-negotiation * @srgmi_ral: to manage MAC-2-MAC SGMII connections. - * @loopback: to cause the PHY to loopback tx data into rx path. * Description: this is the main function to configure the AN control register * and init the ANE, select loopback (usually for debugging purpose) and * configure SGMII RAL. */ static inline void dwmac_ctrl_ane(void __iomem *ioaddr, u32 reg, bool ane, - bool srgmi_ral, bool loopback) + bool srgmi_ral) { u32 value = readl(ioaddr + GMAC_AN_CTRL(reg)); @@ -104,9 +103,6 @@ static inline void dwmac_ctrl_ane(void __iomem *ioaddr, u32 reg, bool ane, if (srgmi_ral) value |= GMAC_AN_CTRL_SGMRAL; - if (loopback) - value |= GMAC_AN_CTRL_ELE; - writel(value, ioaddr + GMAC_AN_CTRL(reg)); } #endif /* __STMMAC_PCS_H__ */ From aa1b6775aef74b2177aa72f912564cd064a83b70 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 16 Oct 2025 15:37:11 +0100 Subject: [PATCH 135/867] net: stmmac: remove hw->ps xxx_core_init() hardware setup After a lot of digging, it seems that the oddly named hw->ps member is all about configuring the core for reverse SGMII. This member is set to one of 0, SPEED_10, SPEED_100 or SPEED_1000 depending on priv->plat->mac_port_sel_speed. On DT systems, this comes from the "snps,ps-speed" DT property. When set to a non-zero value, it: 1. Configures the MAC at initialisation time to operate at a specific speed. However, this will be overwritten by mac_link_up() when the link comes up (e.g. with the fixed-link parameters.) Note that dwxgmac2 wants to also support SPEED_2500 and SPEED_10000, but both these values are impossible. 2. It _incorrectly_ enables the transmitter (GMAC_CONFIG_TE) which makes no sense, rather than enabling the "transmit configuration" bit (GMAC_CONFIG_TC). Likely a typo. 3. It configures the SGMII rate adapter layer to retrieve its speed setting from the MAC configuration register rather than the PHY. There are two ways forward here: a) fixing (2) so that we set GMAC_CONFIG_TC. However, we have platform that set the "snps,ps-speed" property and that work today. Fixing this will cause the RGMII, SGMII or SMII inband configuration to be transmitted, which will be a functional change which could cause a regression. b) ripping out (1) and (2) as they are ineffective. This also has the possibility of regressions, but the patch author believes this risk is much lower than (a). Therefore, this commit takes the approach in (b). Signed-off-by: Russell King (Oracle) Tested-by: Maxime Chevallier Tested-by: Lad Prabhakar Link: https://patch.msgid.link/E1v9P6N-0000000Aolg-3y0a@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac1000_core.c | 23 +++-------------- .../net/ethernet/stmicro/stmmac/dwmac4_core.c | 24 +++--------------- .../ethernet/stmicro/stmmac/dwxgmac2_core.c | 25 ++----------------- 3 files changed, 8 insertions(+), 64 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 5c653be3d453f..d35db8958be15 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -26,35 +26,18 @@ static void dwmac1000_core_init(struct mac_device_info *hw, struct net_device *dev) { void __iomem *ioaddr = hw->pcsr; - u32 value = readl(ioaddr + GMAC_CONTROL); int mtu = dev->mtu; + u32 value; /* Configure GMAC core */ - value |= GMAC_CORE_INIT; + value = readl(ioaddr + GMAC_CONTROL); if (mtu > 1500) value |= GMAC_CONTROL_2K; if (mtu > 2000) value |= GMAC_CONTROL_JE; - if (hw->ps) { - value |= GMAC_CONTROL_TE; - - value &= ~hw->link.speed_mask; - switch (hw->ps) { - case SPEED_1000: - value |= hw->link.speed1000; - break; - case SPEED_100: - value |= hw->link.speed100; - break; - case SPEED_10: - value |= hw->link.speed10; - break; - } - } - - writel(value, ioaddr + GMAC_CONTROL); + writel(value | GMAC_CORE_INIT, ioaddr + GMAC_CONTROL); /* Mask GMAC interrupts */ value = GMAC_INT_DEFAULT_MASK; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 21e4461db9378..d855ab6b91458 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -27,29 +27,11 @@ static void dwmac4_core_init(struct mac_device_info *hw, { struct stmmac_priv *priv = netdev_priv(dev); void __iomem *ioaddr = hw->pcsr; - u32 value = readl(ioaddr + GMAC_CONFIG); unsigned long clk_rate; + u32 value; - value |= GMAC_CORE_INIT; - - if (hw->ps) { - value |= GMAC_CONFIG_TE; - - value &= hw->link.speed_mask; - switch (hw->ps) { - case SPEED_1000: - value |= hw->link.speed1000; - break; - case SPEED_100: - value |= hw->link.speed100; - break; - case SPEED_10: - value |= hw->link.speed10; - break; - } - } - - writel(value, ioaddr + GMAC_CONFIG); + value = readl(ioaddr + GMAC_CONFIG); + writel(value | GMAC_CORE_INIT, ioaddr + GMAC_CONFIG); /* Configure LPI 1us counter to number of CSR clock ticks in 1us - 1 */ clk_rate = clk_get_rate(priv->plat->stmmac_clk); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 00e929bf280ba..0430af27da407 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -23,29 +23,8 @@ static void dwxgmac2_core_init(struct mac_device_info *hw, tx = readl(ioaddr + XGMAC_TX_CONFIG); rx = readl(ioaddr + XGMAC_RX_CONFIG); - tx |= XGMAC_CORE_INIT_TX; - rx |= XGMAC_CORE_INIT_RX; - - if (hw->ps) { - tx |= XGMAC_CONFIG_TE; - tx &= ~hw->link.speed_mask; - - switch (hw->ps) { - case SPEED_10000: - tx |= hw->link.xgmii.speed10000; - break; - case SPEED_2500: - tx |= hw->link.speed2500; - break; - case SPEED_1000: - default: - tx |= hw->link.speed1000; - break; - } - } - - writel(tx, ioaddr + XGMAC_TX_CONFIG); - writel(rx, ioaddr + XGMAC_RX_CONFIG); + writel(tx | XGMAC_CORE_INIT_TX, ioaddr + XGMAC_TX_CONFIG); + writel(rx | XGMAC_CORE_INIT_RX, ioaddr + XGMAC_RX_CONFIG); writel(XGMAC_INT_DEFAULT_EN, ioaddr + XGMAC_INT_EN); } From 70589b05a03e853367c6f887191a7527bcf08163 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 16 Oct 2025 15:37:17 +0100 Subject: [PATCH 136/867] net: stmmac: remove RGMII "pcs" mode Remove the RGMII "pcs" code in stmmac_check_pcs_mode() due to: 1) This should never have been conditional on a PCS being present, as when a core is synthesised using only RGMII, the PCS won't be present and priv->dma_cap.pcs will be false. Only multi-interface cores which have a PCS present would have detected RGMII. 2) STMMAC_PCS_RGMII has no effect since the broken netif_carrier and ethtool code was removed. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Tested-by: Maxime Chevallier Tested-by: Lad Prabhakar Link: https://patch.msgid.link/E1v9P6T-0000000Aoll-0Ify@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/common.h | 1 - drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 14 +++----------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 33aeac5666f4b..ed5e207ffdba8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -270,7 +270,6 @@ struct stmmac_safety_stats { #define FLOW_AUTO (FLOW_TX | FLOW_RX) /* PCS defines */ -#define STMMAC_PCS_RGMII (1 << 0) #define STMMAC_PCS_SGMII (1 << 1) #define SF_DMA_MODE 1 /* DMA STORE-AND-FORWARD Operation Mode */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index e21d96933408a..f160c5bd25b2a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1087,17 +1087,9 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv) { int interface = priv->plat->phy_interface; - if (priv->dma_cap.pcs) { - if ((interface == PHY_INTERFACE_MODE_RGMII) || - (interface == PHY_INTERFACE_MODE_RGMII_ID) || - (interface == PHY_INTERFACE_MODE_RGMII_RXID) || - (interface == PHY_INTERFACE_MODE_RGMII_TXID)) { - netdev_dbg(priv->dev, "PCS RGMII support enabled\n"); - priv->hw->pcs = STMMAC_PCS_RGMII; - } else if (interface == PHY_INTERFACE_MODE_SGMII) { - netdev_dbg(priv->dev, "PCS SGMII support enabled\n"); - priv->hw->pcs = STMMAC_PCS_SGMII; - } + if (priv->dma_cap.pcs && interface == PHY_INTERFACE_MODE_SGMII) { + netdev_dbg(priv->dev, "PCS SGMII support enabled\n"); + priv->hw->pcs = STMMAC_PCS_SGMII; } } From c7b0d7874de0db6ce24bae3530ee91309aa2ff4e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 16 Oct 2025 15:37:22 +0100 Subject: [PATCH 137/867] net: stmmac: move reverse-"pcs" mode setup to stmmac_check_pcs_mode() The broken reverse-mode, selected by snps,ps-speed, is configured when the platform provides a valid port speed and a PCS is being used. Both these remain constant after the driver has probed, so the software state doesn't need to be re-initialised each time stmmac_hw_setup() is called (which is called at open and resume time.) Move the software setup of reverse-mode to stmmac_check_pcs_mode() which is called from the driver probe function. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Tested-by: Maxime Chevallier Tested-by: Lad Prabhakar Link: https://patch.msgid.link/E1v9P6Y-0000000Aolr-0vLH@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f160c5bd25b2a..9a8dacf18c14c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1091,6 +1091,19 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv) netdev_dbg(priv->dev, "PCS SGMII support enabled\n"); priv->hw->pcs = STMMAC_PCS_SGMII; } + + /* PS and related bits will be programmed according to the speed */ + if (priv->hw->pcs) { + int speed = priv->plat->mac_port_sel_speed; + + if ((speed == SPEED_10) || (speed == SPEED_100) || + (speed == SPEED_1000)) { + priv->hw->ps = speed; + } else { + dev_warn(priv->device, "invalid port speed\n"); + priv->hw->ps = 0; + } + } } /** @@ -3435,19 +3448,6 @@ static int stmmac_hw_setup(struct net_device *dev) stmmac_set_umac_addr(priv, priv->hw, dev->dev_addr, 0); phylink_rx_clk_stop_unblock(priv->phylink); - /* PS and related bits will be programmed according to the speed */ - if (priv->hw->pcs) { - int speed = priv->plat->mac_port_sel_speed; - - if ((speed == SPEED_10) || (speed == SPEED_100) || - (speed == SPEED_1000)) { - priv->hw->ps = speed; - } else { - dev_warn(priv->device, "invalid port speed\n"); - priv->hw->ps = 0; - } - } - /* Initialize the MAC Core */ stmmac_core_init(priv, priv->hw, dev); From 412d5f32cb36110475d1c77e291b73e97f2f000a Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 16 Oct 2025 15:37:27 +0100 Subject: [PATCH 138/867] net: stmmac: simplify stmmac_check_pcs_mode() Now that we only support one mode, simplify stmmac_check_pcs_mode(). Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Tested-by: Maxime Chevallier Tested-by: Lad Prabhakar Link: https://patch.msgid.link/E1v9P6d-0000000Aolw-1T7d@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 9a8dacf18c14c..832062b2f4f31 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1086,22 +1086,23 @@ static const struct phylink_mac_ops stmmac_phylink_mac_ops = { static void stmmac_check_pcs_mode(struct stmmac_priv *priv) { int interface = priv->plat->phy_interface; + int speed = priv->plat->mac_port_sel_speed; if (priv->dma_cap.pcs && interface == PHY_INTERFACE_MODE_SGMII) { netdev_dbg(priv->dev, "PCS SGMII support enabled\n"); priv->hw->pcs = STMMAC_PCS_SGMII; - } - - /* PS and related bits will be programmed according to the speed */ - if (priv->hw->pcs) { - int speed = priv->plat->mac_port_sel_speed; - if ((speed == SPEED_10) || (speed == SPEED_100) || - (speed == SPEED_1000)) { + switch (speed) { + case SPEED_10: + case SPEED_100: + case SPEED_1000: priv->hw->ps = speed; - } else { + break; + + default: dev_warn(priv->device, "invalid port speed\n"); priv->hw->ps = 0; + break; } } } From 5d1e7621f869a4d9d1da1652eb2e944784d868d8 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 16 Oct 2025 15:37:32 +0100 Subject: [PATCH 139/867] net: stmmac: hw->ps becomes hw->reverse_sgmii_enable After a lot of digging, it seems that the oddly named hw->ps member is all about setting the core into reverse SGMII speed. When set to a non-zero value, it: 1. Configures the MAC at initialisation time to operate at a specific speed. 2. It _incorrectly_ enables the transmitter (GMAC_CONFIG_TE) which makes no sense, rather than enabling the "transmit configuration" bit (GMAC_CONFIG_TC). 3. It configures the SGMII rate adapter layer to retrieve its speed setting from the MAC configuration register rather than the PHY. In the previous commit, we removed (1) and (2) as phylink overwrites the configuration set at that step. Thus, the only functional aspect is (3), which is a boolean operation. This means there is no need to store the actual speed, and just have a boolean flag. Convert the priv->ps member to a boolean, and rename it to priv->reverse_sgmii_enable to make it more understandable. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Tested-by: Maxime Chevallier Tested-by: Lad Prabhakar Link: https://patch.msgid.link/E1v9P6i-0000000Aom3-1y2y@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/common.h | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index ed5e207ffdba8..fee7021246b1a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -599,13 +599,13 @@ struct mac_device_info { unsigned int mcast_bits_log2; unsigned int rx_csum; unsigned int pcs; - unsigned int ps; unsigned int xlgmac; unsigned int num_vlan; u32 vlan_filter[32]; bool vlan_fail_q_en; u8 vlan_fail_q; bool hw_vlan_en; + bool reverse_sgmii_enable; }; struct stmmac_rx_routing { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 832062b2f4f31..c4d749396b53e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1096,12 +1096,12 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv) case SPEED_10: case SPEED_100: case SPEED_1000: - priv->hw->ps = speed; + priv->hw->reverse_sgmii_enable = true; break; default: dev_warn(priv->device, "invalid port speed\n"); - priv->hw->ps = 0; + priv->hw->reverse_sgmii_enable = false; break; } } @@ -3486,7 +3486,7 @@ static int stmmac_hw_setup(struct net_device *dev) } if (priv->hw->pcs) - stmmac_pcs_ctrl_ane(priv, 1, priv->hw->ps); + stmmac_pcs_ctrl_ane(priv, 1, priv->hw->reverse_sgmii_enable); /* set TX and RX rings length */ stmmac_set_rings_length(priv); From 5c61db08d9aeb960691fae16cfdb23df1355987e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 16 Oct 2025 15:37:37 +0100 Subject: [PATCH 140/867] net: stmmac: do not require snps,ps-speed for SGMII SGMII mode does not require port-speed to be specified; this only switches SGMII to use the MAC configuration register speed settings and the actual value is irrelevant when the link comes up. As it seems the intention was to support "reverse SGMII" with this setting, but the code didn't actually configure that due to a typo, the warning and bad DT binding documentation has led people to specify snps,ps-speed in their DT files inappropriately. If mac_port_sel_speed is zero, then don't complain that the speed is invalid, as this means we're using "normal" SGMII. This does _not_ obsolete snps,ps-speed, nor does it change the behaviour of that property, with the exception of not making people mistakenly believe that they need to specify this option to use normal SGMII. There is no need to modify the binding. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Tested-by: Maxime Chevallier Tested-by: Lad Prabhakar Link: https://patch.msgid.link/E1v9P6n-0000000Aom9-2LuZ@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index c4d749396b53e..c643817f30da6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1101,6 +1101,8 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv) default: dev_warn(priv->device, "invalid port speed\n"); + fallthrough; + case 0: priv->hw->reverse_sgmii_enable = false; break; } From 045d7e5727c451f310f0de72ec1452274167431f Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 16 Oct 2025 15:37:42 +0100 Subject: [PATCH 141/867] net: stmmac: only call stmmac_pcs_ctrl_ane() for integrated SGMII PCS The internal PCS registers only exist if the core is synthesized with SGMII, TBI or RTBI support. They have no relevance for RGMII. However, priv->hw->pcs contains a STMMAC_PCS_RGMII flag, which is set if a PCS has been synthesized but we are operating in RGMII mode. As the register has no effect for RGMII, there is no point calling stmmac_pcs_ctrl_ane() in this case. Add a comment describing this and make it conditional on STMMAC_PCS_SGMII. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Tested-by: Maxime Chevallier Tested-by: Lad Prabhakar Link: https://patch.msgid.link/E1v9P6s-0000000AomE-2pAa@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index c643817f30da6..02dcbfa7d23e8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3487,7 +3487,11 @@ static int stmmac_hw_setup(struct net_device *dev) } } - if (priv->hw->pcs) + /* The PCS control register is only relevant for SGMII, TBI and RTBI + * modes. We no longer support TBI or RTBI, so only configure this + * register when operating in SGMII mode with the integrated PCS. + */ + if (priv->hw->pcs & STMMAC_PCS_SGMII) stmmac_pcs_ctrl_ane(priv, 1, priv->hw->reverse_sgmii_enable); /* set TX and RX rings length */ From 237e54caeaeff5693c1fc2c04737ee5fe77a8bcd Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 16 Oct 2025 15:37:47 +0100 Subject: [PATCH 142/867] net: stmmac: provide PCS initialisation hook dwmac cores provide a feature bit to indicate when the PCS block is present, but features are only read after the core's setup() function has been called, meaning we can't decide whether to initialise the integrated PCS in the setup function. Provide a new MAC core hook for PCS initialisation, which will be called after the feature registers have been read. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Tested-by: Maxime Chevallier Tested-by: Lad Prabhakar Link: https://patch.msgid.link/E1v9P6x-0000000AomL-3OKd@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/hwif.h | 4 ++++ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 7796f5f3c96f1..82cfb6bec334a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -313,6 +313,8 @@ enum stmmac_lpi_mode { /* Helpers to program the MAC core */ struct stmmac_ops { + /* Initialise any PCS instances */ + int (*pcs_init)(struct stmmac_priv *priv); /* MAC core initialization */ void (*core_init)(struct mac_device_info *hw, struct net_device *dev); /* Update MAC capabilities */ @@ -413,6 +415,8 @@ struct stmmac_ops { u32 pclass); }; +#define stmmac_mac_pcs_init(__priv) \ + stmmac_do_callback(__priv, mac, pcs_init, __priv) #define stmmac_core_init(__priv, __args...) \ stmmac_do_void_callback(__priv, mac, core_init, __args) #define stmmac_mac_update_caps(__priv) \ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 02dcbfa7d23e8..c041268d3a8d6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7239,6 +7239,13 @@ static int stmmac_hw_init(struct stmmac_priv *priv) "Enable RX Mitigation via HW Watchdog Timer\n"); } + /* Unimplemented PCS init (as indicated by stmmac_do_callback() + * perversely returning -EINVAL) is non-fatal. + */ + ret = stmmac_mac_pcs_init(priv); + if (ret != -EINVAL) + return ret; + return 0; } From 2c81f3357136a0ece9b36be991c78a384399eaac Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 16 Oct 2025 15:37:52 +0100 Subject: [PATCH 143/867] net: stmmac: convert to phylink PCS support Now that stmmac's PCS support is much more simple - just a matter of configuring the control register - the basic conversion to phylink PCS support becomes straight forward. Create the infrastructure to setup a phylink_pcs instance for the integrated PCS: - add a struct stmmac_pcs to encapsulate the phylink_pcs structure, pointer to stmmac_priv, and the core-specific base address of the PCS registers. - modify stmmac_priv and stmmac_mac_select_pcs() to return the embedded phylink_pcs structure when setup and STMMAC_PCS_SGMII is in use, and move the comment from stmmac_hw_setup() to here. - create stmmac_pcs.c, which contains the phylink_pcs_ops structure, a dummy .pcs_get_state() method which always reports link-down, and .pcs_config() method, moving the call to stmmac_pcs_ctrl_ane() here, but without indirecting through the dwmac specific core code. The link-down behaviour mentioned above maintains the current behaviour when phylink is used with inband but without a PCS. This will ensure that the PCS control register is configured to the same settings as before, but will now happen when the netdev is opened or reusmed rather than only during probe time. However, this will be before the .fix_mac_speed() method is called, which is critical for the behaviour in dwmac-qcom-ethqos's ethqos_configure_sgmii() function to be maintained. Signed-off-by: Russell King (Oracle) Tested-by: Maxime Chevallier Tested-by: Lad Prabhakar Link: https://patch.msgid.link/E1v9P72-0000000AomR-3ro4@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/Makefile | 2 +- .../ethernet/stmicro/stmmac/dwmac1000_core.c | 9 ++++ .../net/ethernet/stmicro/stmmac/dwmac4_core.c | 11 +++++ drivers/net/ethernet/stmicro/stmmac/stmmac.h | 4 ++ .../net/ethernet/stmicro/stmmac/stmmac_main.c | 15 +++--- .../net/ethernet/stmicro/stmmac/stmmac_pcs.c | 47 +++++++++++++++++++ .../net/ethernet/stmicro/stmmac/stmmac_pcs.h | 17 +++++++ 7 files changed, 97 insertions(+), 8 deletions(-) create mode 100644 drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index ec56bcf2db621..1681a8a283135 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -7,7 +7,7 @@ stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o \ dwmac4_dma.o dwmac4_lib.o dwmac4_core.o dwmac5.o hwif.o \ stmmac_tc.o dwxgmac2_core.o dwxgmac2_dma.o dwxgmac2_descs.o \ stmmac_xdp.o stmmac_est.o stmmac_fpe.o stmmac_vlan.o \ - $(stmmac-y) + stmmac_pcs.o $(stmmac-y) stmmac-$(CONFIG_STMMAC_SELFTESTS) += stmmac_selftests.o diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index d35db8958be15..571e483624442 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -22,6 +22,14 @@ #include "stmmac_ptp.h" #include "dwmac1000.h" +static int dwmac1000_pcs_init(struct stmmac_priv *priv) +{ + if (!priv->dma_cap.pcs) + return 0; + + return stmmac_integrated_pcs_init(priv, GMAC_PCS_BASE); +} + static void dwmac1000_core_init(struct mac_device_info *hw, struct net_device *dev) { @@ -435,6 +443,7 @@ static void dwmac1000_set_mac_loopback(void __iomem *ioaddr, bool enable) } const struct stmmac_ops dwmac1000_ops = { + .pcs_init = dwmac1000_pcs_init, .core_init = dwmac1000_core_init, .set_mac = stmmac_set_mac, .rx_ipc = dwmac1000_rx_ipc_enable, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index d855ab6b91458..0b785389b7eff 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -22,6 +22,14 @@ #include "dwmac4.h" #include "dwmac5.h" +static int dwmac4_pcs_init(struct stmmac_priv *priv) +{ + if (!priv->dma_cap.pcs) + return 0; + + return stmmac_integrated_pcs_init(priv, GMAC_PCS_BASE); +} + static void dwmac4_core_init(struct mac_device_info *hw, struct net_device *dev) { @@ -875,6 +883,7 @@ static int dwmac4_config_l4_filter(struct mac_device_info *hw, u32 filter_no, } const struct stmmac_ops dwmac4_ops = { + .pcs_init = dwmac4_pcs_init, .core_init = dwmac4_core_init, .update_caps = dwmac4_update_caps, .set_mac = stmmac_set_mac, @@ -909,6 +918,7 @@ const struct stmmac_ops dwmac4_ops = { }; const struct stmmac_ops dwmac410_ops = { + .pcs_init = dwmac4_pcs_init, .core_init = dwmac4_core_init, .update_caps = dwmac4_update_caps, .set_mac = stmmac_dwmac4_set_mac, @@ -945,6 +955,7 @@ const struct stmmac_ops dwmac410_ops = { }; const struct stmmac_ops dwmac510_ops = { + .pcs_init = dwmac4_pcs_init, .core_init = dwmac4_core_init, .update_caps = dwmac4_update_caps, .set_mac = stmmac_dwmac4_set_mac, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index ec8bddc1c37fe..aaeaf42084f0d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -25,6 +25,8 @@ #include #include +struct stmmac_pcs; + struct stmmac_resources { void __iomem *addr; u8 mac[ETH_ALEN]; @@ -273,6 +275,8 @@ struct stmmac_priv { unsigned int pause_time; struct mii_bus *mii; + struct stmmac_pcs *integrated_pcs; + struct phylink_config phylink_config; struct phylink *phylink; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index c041268d3a8d6..5e6aaead58946 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -46,6 +46,7 @@ #include "stmmac_ptp.h" #include "stmmac_fpe.h" #include "stmmac.h" +#include "stmmac_pcs.h" #include "stmmac_xdp.h" #include #include @@ -850,6 +851,13 @@ static struct phylink_pcs *stmmac_mac_select_pcs(struct phylink_config *config, return pcs; } + /* The PCS control register is only relevant for SGMII, TBI and RTBI + * modes. We no longer support TBI or RTBI, so only configure this + * register when operating in SGMII mode with the integrated PCS. + */ + if (priv->hw->pcs & STMMAC_PCS_SGMII && priv->integrated_pcs) + return &priv->integrated_pcs->pcs; + return NULL; } @@ -3487,13 +3495,6 @@ static int stmmac_hw_setup(struct net_device *dev) } } - /* The PCS control register is only relevant for SGMII, TBI and RTBI - * modes. We no longer support TBI or RTBI, so only configure this - * register when operating in SGMII mode with the integrated PCS. - */ - if (priv->hw->pcs & STMMAC_PCS_SGMII) - stmmac_pcs_ctrl_ane(priv, 1, priv->hw->reverse_sgmii_enable); - /* set TX and RX rings length */ stmmac_set_rings_length(priv); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c new file mode 100644 index 0000000000000..50ea51d7a1cc8 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "stmmac.h" +#include "stmmac_pcs.h" + +static void dwmac_integrated_pcs_get_state(struct phylink_pcs *pcs, + unsigned int neg_mode, + struct phylink_link_state *state) +{ + state->link = false; +} + +static int dwmac_integrated_pcs_config(struct phylink_pcs *pcs, + unsigned int neg_mode, + phy_interface_t interface, + const unsigned long *advertising, + bool permit_pause_to_mac) +{ + struct stmmac_pcs *spcs = phylink_pcs_to_stmmac_pcs(pcs); + + dwmac_ctrl_ane(spcs->base, 0, 1, spcs->priv->hw->reverse_sgmii_enable); + + return 0; +} + +static const struct phylink_pcs_ops dwmac_integrated_pcs_ops = { + .pcs_get_state = dwmac_integrated_pcs_get_state, + .pcs_config = dwmac_integrated_pcs_config, +}; + +int stmmac_integrated_pcs_init(struct stmmac_priv *priv, unsigned int offset) +{ + struct stmmac_pcs *spcs; + + spcs = devm_kzalloc(priv->device, sizeof(*spcs), GFP_KERNEL); + if (!spcs) + return -ENOMEM; + + spcs->priv = priv; + spcs->base = priv->ioaddr + offset; + spcs->pcs.ops = &dwmac_integrated_pcs_ops; + + __set_bit(PHY_INTERFACE_MODE_SGMII, spcs->pcs.supported_interfaces); + + priv->integrated_pcs = spcs; + + return 0; +} diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h index 5778f5b2f3139..64397ac8ecab8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h @@ -9,6 +9,7 @@ #ifndef __STMMAC_PCS_H__ #define __STMMAC_PCS_H__ +#include #include #include #include "common.h" @@ -46,6 +47,22 @@ #define GMAC_ANE_RFE_SHIFT 12 #define GMAC_ANE_ACK BIT(14) +struct stmmac_priv; + +struct stmmac_pcs { + struct stmmac_priv *priv; + void __iomem *base; + struct phylink_pcs pcs; +}; + +static inline struct stmmac_pcs * +phylink_pcs_to_stmmac_pcs(struct phylink_pcs *pcs) +{ + return container_of(pcs, struct stmmac_pcs, pcs); +} + +int stmmac_integrated_pcs_init(struct stmmac_priv *priv, unsigned int offset); + /** * dwmac_pcs_isr - TBI, RTBI, or SGMII PHY ISR * @ioaddr: IO registers pointer From ed5d5928bd54f66af19b71ad342ebf0947d50674 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 16 Oct 2025 15:25:09 +0000 Subject: [PATCH 144/867] net: ti: am65-cpsw: move hw timestamping to ndo callback Migrate driver to new API for HW timestamping. Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Signed-off-by: Vadim Fedorenko Link: https://patch.msgid.link/20251016152515.3510991-2-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 44 +++++++++++------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 110eb2da8dbc1..d5f358ec98205 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1788,28 +1788,28 @@ static int am65_cpsw_nuss_ndo_slave_set_mac_address(struct net_device *ndev, } static int am65_cpsw_nuss_hwtstamp_set(struct net_device *ndev, - struct ifreq *ifr) + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack) { struct am65_cpsw_port *port = am65_ndev_to_port(ndev); u32 ts_ctrl, seq_id, ts_ctrl_ltype2, ts_vlan_ltype; - struct hwtstamp_config cfg; - if (!IS_ENABLED(CONFIG_TI_K3_AM65_CPTS)) + if (!IS_ENABLED(CONFIG_TI_K3_AM65_CPTS)) { + NL_SET_ERR_MSG(extack, "Time stamping is not supported"); return -EOPNOTSUPP; - - if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) - return -EFAULT; + } /* TX HW timestamp */ - switch (cfg.tx_type) { + switch (cfg->tx_type) { case HWTSTAMP_TX_OFF: case HWTSTAMP_TX_ON: break; default: + NL_SET_ERR_MSG(extack, "TX mode is not supported"); return -ERANGE; } - switch (cfg.rx_filter) { + switch (cfg->rx_filter) { case HWTSTAMP_FILTER_NONE: port->rx_ts_enabled = false; break; @@ -1826,17 +1826,19 @@ static int am65_cpsw_nuss_hwtstamp_set(struct net_device *ndev, case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: port->rx_ts_enabled = true; - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT | HWTSTAMP_FILTER_PTP_V1_L4_EVENT; + cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT | HWTSTAMP_FILTER_PTP_V1_L4_EVENT; break; case HWTSTAMP_FILTER_ALL: case HWTSTAMP_FILTER_SOME: case HWTSTAMP_FILTER_NTP_ALL: + NL_SET_ERR_MSG(extack, "RX filter is not supported"); return -EOPNOTSUPP; default: + NL_SET_ERR_MSG(extack, "RX filter is not supported"); return -ERANGE; } - port->tx_ts_enabled = (cfg.tx_type == HWTSTAMP_TX_ON); + port->tx_ts_enabled = (cfg->tx_type == HWTSTAMP_TX_ON); /* cfg TX timestamp */ seq_id = (AM65_CPSW_TS_SEQ_ID_OFFSET << @@ -1872,25 +1874,24 @@ static int am65_cpsw_nuss_hwtstamp_set(struct net_device *ndev, AM65_CPSW_PORTN_REG_TS_CTL_LTYPE2); writel(ts_ctrl, port->port_base + AM65_CPSW_PORTN_REG_TS_CTL); - return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; + return 0; } static int am65_cpsw_nuss_hwtstamp_get(struct net_device *ndev, - struct ifreq *ifr) + struct kernel_hwtstamp_config *cfg) { struct am65_cpsw_port *port = am65_ndev_to_port(ndev); - struct hwtstamp_config cfg; if (!IS_ENABLED(CONFIG_TI_K3_AM65_CPTS)) return -EOPNOTSUPP; - cfg.flags = 0; - cfg.tx_type = port->tx_ts_enabled ? + cfg->flags = 0; + cfg->tx_type = port->tx_ts_enabled ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; - cfg.rx_filter = port->rx_ts_enabled ? HWTSTAMP_FILTER_PTP_V2_EVENT | + cfg->rx_filter = port->rx_ts_enabled ? HWTSTAMP_FILTER_PTP_V2_EVENT | HWTSTAMP_FILTER_PTP_V1_L4_EVENT : HWTSTAMP_FILTER_NONE; - return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; + return 0; } static int am65_cpsw_nuss_ndo_slave_ioctl(struct net_device *ndev, @@ -1901,13 +1902,6 @@ static int am65_cpsw_nuss_ndo_slave_ioctl(struct net_device *ndev, if (!netif_running(ndev)) return -EINVAL; - switch (cmd) { - case SIOCSHWTSTAMP: - return am65_cpsw_nuss_hwtstamp_set(ndev, req); - case SIOCGHWTSTAMP: - return am65_cpsw_nuss_hwtstamp_get(ndev, req); - } - return phylink_mii_ioctl(port->slave.phylink, req, cmd); } @@ -1991,6 +1985,8 @@ static const struct net_device_ops am65_cpsw_nuss_netdev_ops = { .ndo_set_tx_maxrate = am65_cpsw_qos_ndo_tx_p0_set_maxrate, .ndo_bpf = am65_cpsw_ndo_bpf, .ndo_xdp_xmit = am65_cpsw_ndo_xdp_xmit, + .ndo_hwtstamp_get = am65_cpsw_nuss_hwtstamp_get, + .ndo_hwtstamp_set = am65_cpsw_nuss_hwtstamp_set, }; static void am65_cpsw_disable_phy(struct phy *phy) From b8fa98ea4a22d949e92ebc755abc5e633db88bf9 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 16 Oct 2025 15:25:10 +0000 Subject: [PATCH 145/867] ti: icssg: convert to ndo_hwtstamp API Convert driver to use .ndo_hwtstamp_get()/.ndo_hwtstamp_set() API. .ndo_eth_ioctl() implementation becomes pure phy_do_ioctl(), remove it from common module, remove exported symbol and replace ndo callback. Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Signed-off-by: Vadim Fedorenko Link: https://patch.msgid.link/20251016152515.3510991-3-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_common.c | 47 ++++++------------- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 4 +- drivers/net/ethernet/ti/icssg/icssg_prueth.h | 6 ++- .../net/ethernet/ti/icssg/icssg_prueth_sr1.c | 4 +- 4 files changed, 26 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 57e5f1c88f509..0eed29d6187a6 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -1223,15 +1223,13 @@ void icssg_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue) } EXPORT_SYMBOL_GPL(icssg_ndo_tx_timeout); -static int emac_set_ts_config(struct net_device *ndev, struct ifreq *ifr) +int icssg_ndo_set_ts_config(struct net_device *ndev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { struct prueth_emac *emac = netdev_priv(ndev); - struct hwtstamp_config config; - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - switch (config.tx_type) { + switch (config->tx_type) { case HWTSTAMP_TX_OFF: emac->tx_ts_enabled = 0; break; @@ -1242,7 +1240,7 @@ static int emac_set_ts_config(struct net_device *ndev, struct ifreq *ifr) return -ERANGE; } - switch (config.rx_filter) { + switch (config->rx_filter) { case HWTSTAMP_FILTER_NONE: emac->rx_ts_enabled = 0; break; @@ -1262,43 +1260,28 @@ static int emac_set_ts_config(struct net_device *ndev, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: emac->rx_ts_enabled = 1; - config.rx_filter = HWTSTAMP_FILTER_ALL; + config->rx_filter = HWTSTAMP_FILTER_ALL; break; default: return -ERANGE; } - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; + return 0; } +EXPORT_SYMBOL_GPL(icssg_ndo_set_ts_config); -static int emac_get_ts_config(struct net_device *ndev, struct ifreq *ifr) +int icssg_ndo_get_ts_config(struct net_device *ndev, + struct kernel_hwtstamp_config *config) { struct prueth_emac *emac = netdev_priv(ndev); - struct hwtstamp_config config; - - config.flags = 0; - config.tx_type = emac->tx_ts_enabled ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; - config.rx_filter = emac->rx_ts_enabled ? HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE; - - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; -} -int icssg_ndo_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd) -{ - switch (cmd) { - case SIOCGHWTSTAMP: - return emac_get_ts_config(ndev, ifr); - case SIOCSHWTSTAMP: - return emac_set_ts_config(ndev, ifr); - default: - break; - } + config->flags = 0; + config->tx_type = emac->tx_ts_enabled ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; + config->rx_filter = emac->rx_ts_enabled ? HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE; - return phy_do_ioctl(ndev, ifr, cmd); + return 0; } -EXPORT_SYMBOL_GPL(icssg_ndo_ioctl); +EXPORT_SYMBOL_GPL(icssg_ndo_get_ts_config); void icssg_ndo_get_stats64(struct net_device *ndev, struct rtnl_link_stats64 *stats) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index e42d0fdefee12..1c1f4394ff1f2 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -1168,7 +1168,7 @@ static const struct net_device_ops emac_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_tx_timeout = icssg_ndo_tx_timeout, .ndo_set_rx_mode = emac_ndo_set_rx_mode, - .ndo_eth_ioctl = icssg_ndo_ioctl, + .ndo_eth_ioctl = phy_do_ioctl, .ndo_get_stats64 = icssg_ndo_get_stats64, .ndo_get_phys_port_name = icssg_ndo_get_phys_port_name, .ndo_fix_features = emac_ndo_fix_features, @@ -1176,6 +1176,8 @@ static const struct net_device_ops emac_netdev_ops = { .ndo_vlan_rx_kill_vid = emac_ndo_vlan_rx_del_vid, .ndo_bpf = emac_ndo_bpf, .ndo_xdp_xmit = emac_xdp_xmit, + .ndo_hwtstamp_get = icssg_ndo_get_ts_config, + .ndo_hwtstamp_set = icssg_ndo_set_ts_config, }; static int prueth_netdev_init(struct prueth *prueth, diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index ca8a22a4a5daa..f0fa9688d9a08 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -479,7 +479,11 @@ void prueth_reset_tx_chan(struct prueth_emac *emac, int ch_num, void prueth_reset_rx_chan(struct prueth_rx_chn *chn, int num_flows, bool disable); void icssg_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue); -int icssg_ndo_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd); +int icssg_ndo_get_ts_config(struct net_device *ndev, + struct kernel_hwtstamp_config *config); +int icssg_ndo_set_ts_config(struct net_device *ndev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); void icssg_ndo_get_stats64(struct net_device *ndev, struct rtnl_link_stats64 *stats); int icssg_ndo_get_phys_port_name(struct net_device *ndev, char *name, diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c b/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c index 5e225310c9dea..2a8c8847a6bd0 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c @@ -747,9 +747,11 @@ static const struct net_device_ops emac_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_tx_timeout = icssg_ndo_tx_timeout, .ndo_set_rx_mode = emac_ndo_set_rx_mode_sr1, - .ndo_eth_ioctl = icssg_ndo_ioctl, + .ndo_eth_ioctl = phy_do_ioctl, .ndo_get_stats64 = icssg_ndo_get_stats64, .ndo_get_phys_port_name = icssg_ndo_get_phys_port_name, + .ndo_hwtstamp_get = icssg_ndo_get_ts_config, + .ndo_hwtstamp_set = icssg_ndo_set_ts_config, }; static int prueth_netdev_init(struct prueth *prueth, From 149cfae71166db1051e1160f4f86ad3ef5a458ae Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 16 Oct 2025 15:25:11 +0000 Subject: [PATCH 146/867] amd-xgbe: convert to ndo_hwtstamp callbacks Convert driver to use .ndo_hwtstamp_get()/.ndo_hwtstamp_set() callbacks. .ndo_eth_ioctl() becomes empty function, remove it. Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Signed-off-by: Vadim Fedorenko Link: https://patch.msgid.link/20251016152515.3510991-4-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 24 ++-------------- drivers/net/ethernet/amd/xgbe/xgbe-hwtstamp.c | 28 +++++++++---------- drivers/net/ethernet/amd/xgbe/xgbe.h | 11 ++++---- 3 files changed, 21 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 4dc631af79332..f3adf29b222bf 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1754,27 +1754,6 @@ static int xgbe_set_mac_address(struct net_device *netdev, void *addr) return 0; } -static int xgbe_ioctl(struct net_device *netdev, struct ifreq *ifreq, int cmd) -{ - struct xgbe_prv_data *pdata = netdev_priv(netdev); - int ret; - - switch (cmd) { - case SIOCGHWTSTAMP: - ret = xgbe_get_hwtstamp_settings(pdata, ifreq); - break; - - case SIOCSHWTSTAMP: - ret = xgbe_set_hwtstamp_settings(pdata, ifreq); - break; - - default: - ret = -EOPNOTSUPP; - } - - return ret; -} - static int xgbe_change_mtu(struct net_device *netdev, int mtu) { struct xgbe_prv_data *pdata = netdev_priv(netdev); @@ -2020,7 +1999,6 @@ static const struct net_device_ops xgbe_netdev_ops = { .ndo_set_rx_mode = xgbe_set_rx_mode, .ndo_set_mac_address = xgbe_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_eth_ioctl = xgbe_ioctl, .ndo_change_mtu = xgbe_change_mtu, .ndo_tx_timeout = xgbe_tx_timeout, .ndo_get_stats64 = xgbe_get_stats64, @@ -2033,6 +2011,8 @@ static const struct net_device_ops xgbe_netdev_ops = { .ndo_fix_features = xgbe_fix_features, .ndo_set_features = xgbe_set_features, .ndo_features_check = xgbe_features_check, + .ndo_hwtstamp_get = xgbe_get_hwtstamp_settings, + .ndo_hwtstamp_set = xgbe_set_hwtstamp_settings, }; const struct net_device_ops *xgbe_get_netdev_ops(void) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-hwtstamp.c b/drivers/net/ethernet/amd/xgbe/xgbe-hwtstamp.c index bc52e5ec64205..0127988e10bee 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-hwtstamp.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-hwtstamp.c @@ -157,26 +157,24 @@ void xgbe_tx_tstamp(struct work_struct *work) spin_unlock_irqrestore(&pdata->tstamp_lock, flags); } -int xgbe_get_hwtstamp_settings(struct xgbe_prv_data *pdata, struct ifreq *ifreq) +int xgbe_get_hwtstamp_settings(struct net_device *netdev, + struct kernel_hwtstamp_config *config) { - if (copy_to_user(ifreq->ifr_data, &pdata->tstamp_config, - sizeof(pdata->tstamp_config))) - return -EFAULT; + struct xgbe_prv_data *pdata = netdev_priv(netdev); + + *config = pdata->tstamp_config; return 0; } -int xgbe_set_hwtstamp_settings(struct xgbe_prv_data *pdata, struct ifreq *ifreq) +int xgbe_set_hwtstamp_settings(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { - struct hwtstamp_config config; - unsigned int mac_tscr; - - if (copy_from_user(&config, ifreq->ifr_data, sizeof(config))) - return -EFAULT; - - mac_tscr = 0; + struct xgbe_prv_data *pdata = netdev_priv(netdev); + unsigned int mac_tscr = 0; - switch (config.tx_type) { + switch (config->tx_type) { case HWTSTAMP_TX_OFF: break; @@ -188,7 +186,7 @@ int xgbe_set_hwtstamp_settings(struct xgbe_prv_data *pdata, struct ifreq *ifreq) return -ERANGE; } - switch (config.rx_filter) { + switch (config->rx_filter) { case HWTSTAMP_FILTER_NONE: break; @@ -290,7 +288,7 @@ int xgbe_set_hwtstamp_settings(struct xgbe_prv_data *pdata, struct ifreq *ifreq) xgbe_config_tstamp(pdata, mac_tscr); - memcpy(&pdata->tstamp_config, &config, sizeof(config)); + pdata->tstamp_config = *config; return 0; } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index e8bbb68059013..381f72a33d1af 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -1146,7 +1146,7 @@ struct xgbe_prv_data { spinlock_t tstamp_lock; struct ptp_clock_info ptp_clock_info; struct ptp_clock *ptp_clock; - struct hwtstamp_config tstamp_config; + struct kernel_hwtstamp_config tstamp_config; unsigned int tstamp_addend; struct work_struct tx_tstamp_work; struct sk_buff *tx_tstamp_skb; @@ -1307,10 +1307,11 @@ void xgbe_update_tstamp_addend(struct xgbe_prv_data *pdata, void xgbe_set_tstamp_time(struct xgbe_prv_data *pdata, unsigned int sec, unsigned int nsec); void xgbe_tx_tstamp(struct work_struct *work); -int xgbe_get_hwtstamp_settings(struct xgbe_prv_data *pdata, - struct ifreq *ifreq); -int xgbe_set_hwtstamp_settings(struct xgbe_prv_data *pdata, - struct ifreq *ifreq); +int xgbe_get_hwtstamp_settings(struct net_device *netdev, + struct kernel_hwtstamp_config *config); +int xgbe_set_hwtstamp_settings(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); void xgbe_prep_tx_tstamp(struct xgbe_prv_data *pdata, struct sk_buff *skb, struct xgbe_packet_data *packet); From 8a15a84e80dcf838062971f32fae653906b131f4 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 16 Oct 2025 15:25:12 +0000 Subject: [PATCH 147/867] net: atlantic: convert to ndo_hwtstamp API Convert driver to .ndo_hwtstamp_get()/.ndo_hwtstamp_set() callbacks. .ndo_eth_ioctl() becomes empty so remove it. Also simplify code with no functional changes. Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Signed-off-by: Vadim Fedorenko Link: https://patch.msgid.link/20251016152515.3510991-5-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- .../net/ethernet/aquantia/atlantic/aq_main.c | 66 +++++-------------- .../net/ethernet/aquantia/atlantic/aq_ptp.c | 6 +- .../net/ethernet/aquantia/atlantic/aq_ptp.h | 8 +-- 3 files changed, 22 insertions(+), 58 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index b565189e59139..4ef4fe64b8ac1 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -258,10 +258,15 @@ static void aq_ndev_set_multicast_settings(struct net_device *ndev) (void)aq_nic_set_multicast_list(aq_nic, ndev); } -#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK) -static int aq_ndev_config_hwtstamp(struct aq_nic_s *aq_nic, - struct hwtstamp_config *config) +static int aq_ndev_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { + struct aq_nic_s *aq_nic = netdev_priv(netdev); + + if (!IS_REACHABLE(CONFIG_PTP_1588_CLOCK) || !aq_nic->aq_ptp) + return -EOPNOTSUPP; + switch (config->tx_type) { case HWTSTAMP_TX_OFF: case HWTSTAMP_TX_ON: @@ -290,59 +295,17 @@ static int aq_ndev_config_hwtstamp(struct aq_nic_s *aq_nic, return aq_ptp_hwtstamp_config_set(aq_nic->aq_ptp, config); } -#endif - -static int aq_ndev_hwtstamp_set(struct aq_nic_s *aq_nic, struct ifreq *ifr) -{ - struct hwtstamp_config config; -#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK) - int ret_val; -#endif - - if (!aq_nic->aq_ptp) - return -EOPNOTSUPP; - - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; -#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK) - ret_val = aq_ndev_config_hwtstamp(aq_nic, &config); - if (ret_val) - return ret_val; -#endif - - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; -} -#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK) -static int aq_ndev_hwtstamp_get(struct aq_nic_s *aq_nic, struct ifreq *ifr) +static int aq_ndev_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config) { - struct hwtstamp_config config; + struct aq_nic_s *aq_nic = netdev_priv(netdev); if (!aq_nic->aq_ptp) return -EOPNOTSUPP; - aq_ptp_hwtstamp_config_get(aq_nic->aq_ptp, &config); - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; -} -#endif - -static int aq_ndev_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) -{ - struct aq_nic_s *aq_nic = netdev_priv(netdev); - - switch (cmd) { - case SIOCSHWTSTAMP: - return aq_ndev_hwtstamp_set(aq_nic, ifr); - -#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK) - case SIOCGHWTSTAMP: - return aq_ndev_hwtstamp_get(aq_nic, ifr); -#endif - } - - return -EOPNOTSUPP; + aq_ptp_hwtstamp_config_get(aq_nic->aq_ptp, config); + return 0; } static int aq_ndo_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, @@ -500,12 +463,13 @@ static const struct net_device_ops aq_ndev_ops = { .ndo_set_mac_address = aq_ndev_set_mac_address, .ndo_set_features = aq_ndev_set_features, .ndo_fix_features = aq_ndev_fix_features, - .ndo_eth_ioctl = aq_ndev_ioctl, .ndo_vlan_rx_add_vid = aq_ndo_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = aq_ndo_vlan_rx_kill_vid, .ndo_setup_tc = aq_ndo_setup_tc, .ndo_bpf = aq_xdp, .ndo_xdp_xmit = aq_xdp_xmit, + .ndo_hwtstamp_get = aq_ndev_hwtstamp_get, + .ndo_hwtstamp_set = aq_ndev_hwtstamp_set, }; static int __init aq_ndev_init_module(void) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c index 5acb3e16b5677..0fa0f891c0e03 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c @@ -51,7 +51,7 @@ struct ptp_tx_timeout { struct aq_ptp_s { struct aq_nic_s *aq_nic; - struct hwtstamp_config hwtstamp_config; + struct kernel_hwtstamp_config hwtstamp_config; spinlock_t ptp_lock; spinlock_t ptp_ring_lock; struct ptp_clock *ptp_clock; @@ -567,7 +567,7 @@ static void aq_ptp_rx_hwtstamp(struct aq_ptp_s *aq_ptp, struct skb_shared_hwtsta } void aq_ptp_hwtstamp_config_get(struct aq_ptp_s *aq_ptp, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config) { *config = aq_ptp->hwtstamp_config; } @@ -588,7 +588,7 @@ static void aq_ptp_prepare_filters(struct aq_ptp_s *aq_ptp) } int aq_ptp_hwtstamp_config_set(struct aq_ptp_s *aq_ptp, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config) { struct aq_nic_s *aq_nic = aq_ptp->aq_nic; const struct aq_hw_ops *hw_ops; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h index 210b723f22072..5e643ec7cc06a 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h @@ -60,9 +60,9 @@ void aq_ptp_tx_hwtstamp(struct aq_nic_s *aq_nic, u64 timestamp); /* Must be to check available of PTP before call */ void aq_ptp_hwtstamp_config_get(struct aq_ptp_s *aq_ptp, - struct hwtstamp_config *config); + struct kernel_hwtstamp_config *config); int aq_ptp_hwtstamp_config_set(struct aq_ptp_s *aq_ptp, - struct hwtstamp_config *config); + struct kernel_hwtstamp_config *config); /* Return either ring is belong to PTP or not*/ bool aq_ptp_ring(struct aq_nic_s *aq_nic, struct aq_ring_s *ring); @@ -130,9 +130,9 @@ static inline int aq_ptp_xmit(struct aq_nic_s *aq_nic, struct sk_buff *skb) static inline void aq_ptp_tx_hwtstamp(struct aq_nic_s *aq_nic, u64 timestamp) {} static inline void aq_ptp_hwtstamp_config_get(struct aq_ptp_s *aq_ptp, - struct hwtstamp_config *config) {} + struct kernel_hwtstamp_config *config) {} static inline int aq_ptp_hwtstamp_config_set(struct aq_ptp_s *aq_ptp, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config) { return 0; } From a6a64bb4115f15367573235292ef23a8c60f5f17 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 16 Oct 2025 15:25:13 +0000 Subject: [PATCH 148/867] cxgb4: convert to ndo_hwtstamp API Convert to use .ndo_hwtstamp_get()/.ndo_hwtstamp_set() callbacks. There is some change in the logic as well. Previously, the driver was storing newly requested configuration regardless of whether it was applied or not. In case of request validation error, inconsistent configuration would be returned by the driver. New logic stores configuration only if was properly validated and applied. It brings the consistency between reported and actual configuration. Signed-off-by: Vadim Fedorenko Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20251016152515.3510991-6-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 2 +- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 154 +++++++++--------- 2 files changed, 79 insertions(+), 77 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 0d85198fb03df..f20f4bc58492b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -674,7 +674,7 @@ struct port_info { struct cxgb_fcoe fcoe; #endif /* CONFIG_CHELSIO_T4_FCOE */ bool rxtstamp; /* Enable TS */ - struct hwtstamp_config tstamp_config; + struct kernel_hwtstamp_config tstamp_config; bool ptp_enable; struct sched_table *sched_tbl; u32 eth_flags; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 392723ef14e51..7e2283c95b97d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3042,12 +3042,87 @@ static void cxgb_get_stats(struct net_device *dev, ns->rx_length_errors + stats.rx_len_err + ns->rx_fifo_errors; } +static int cxgb_hwtstamp_get(struct net_device *dev, + struct kernel_hwtstamp_config *config) +{ + struct port_info *pi = netdev_priv(dev); + + *config = pi->tstamp_config; + return 0; +} + +static int cxgb_hwtstamp_set(struct net_device *dev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) +{ + struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; + + if (is_t4(adapter->params.chip)) { + /* For T4 Adapters */ + switch (config->rx_filter) { + case HWTSTAMP_FILTER_NONE: + pi->rxtstamp = false; + break; + case HWTSTAMP_FILTER_ALL: + pi->rxtstamp = true; + break; + default: + return -ERANGE; + } + pi->tstamp_config = *config; + return 0; + } + + switch (config->tx_type) { + case HWTSTAMP_TX_OFF: + case HWTSTAMP_TX_ON: + break; + default: + return -ERANGE; + } + + switch (config->rx_filter) { + case HWTSTAMP_FILTER_NONE: + pi->rxtstamp = false; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + cxgb4_ptprx_timestamping(pi, pi->port_id, PTP_TS_L4); + break; + case HWTSTAMP_FILTER_PTP_V2_EVENT: + cxgb4_ptprx_timestamping(pi, pi->port_id, PTP_TS_L2_L4); + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + pi->rxtstamp = true; + break; + default: + return -ERANGE; + } + + if (config->tx_type == HWTSTAMP_TX_OFF && + config->rx_filter == HWTSTAMP_FILTER_NONE) { + if (cxgb4_ptp_txtype(adapter, pi->port_id) >= 0) + pi->ptp_enable = false; + } + + if (config->rx_filter != HWTSTAMP_FILTER_NONE) { + if (cxgb4_ptp_redirect_rx_packet(adapter, pi) >= 0) + pi->ptp_enable = true; + } + pi->tstamp_config = *config; + return 0; +} + static int cxgb_ioctl(struct net_device *dev, struct ifreq *req, int cmd) { unsigned int mbox; int ret = 0, prtad, devad; struct port_info *pi = netdev_priv(dev); - struct adapter *adapter = pi->adapter; struct mii_ioctl_data *data = (struct mii_ioctl_data *)&req->ifr_data; switch (cmd) { @@ -3076,81 +3151,6 @@ static int cxgb_ioctl(struct net_device *dev, struct ifreq *req, int cmd) ret = t4_mdio_wr(pi->adapter, mbox, prtad, devad, data->reg_num, data->val_in); break; - case SIOCGHWTSTAMP: - return copy_to_user(req->ifr_data, &pi->tstamp_config, - sizeof(pi->tstamp_config)) ? - -EFAULT : 0; - case SIOCSHWTSTAMP: - if (copy_from_user(&pi->tstamp_config, req->ifr_data, - sizeof(pi->tstamp_config))) - return -EFAULT; - - if (!is_t4(adapter->params.chip)) { - switch (pi->tstamp_config.tx_type) { - case HWTSTAMP_TX_OFF: - case HWTSTAMP_TX_ON: - break; - default: - return -ERANGE; - } - - switch (pi->tstamp_config.rx_filter) { - case HWTSTAMP_FILTER_NONE: - pi->rxtstamp = false; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: - cxgb4_ptprx_timestamping(pi, pi->port_id, - PTP_TS_L4); - break; - case HWTSTAMP_FILTER_PTP_V2_EVENT: - cxgb4_ptprx_timestamping(pi, pi->port_id, - PTP_TS_L2_L4); - break; - case HWTSTAMP_FILTER_ALL: - case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: - pi->rxtstamp = true; - break; - default: - pi->tstamp_config.rx_filter = - HWTSTAMP_FILTER_NONE; - return -ERANGE; - } - - if ((pi->tstamp_config.tx_type == HWTSTAMP_TX_OFF) && - (pi->tstamp_config.rx_filter == - HWTSTAMP_FILTER_NONE)) { - if (cxgb4_ptp_txtype(adapter, pi->port_id) >= 0) - pi->ptp_enable = false; - } - - if (pi->tstamp_config.rx_filter != - HWTSTAMP_FILTER_NONE) { - if (cxgb4_ptp_redirect_rx_packet(adapter, - pi) >= 0) - pi->ptp_enable = true; - } - } else { - /* For T4 Adapters */ - switch (pi->tstamp_config.rx_filter) { - case HWTSTAMP_FILTER_NONE: - pi->rxtstamp = false; - break; - case HWTSTAMP_FILTER_ALL: - pi->rxtstamp = true; - break; - default: - pi->tstamp_config.rx_filter = - HWTSTAMP_FILTER_NONE; - return -ERANGE; - } - } - return copy_to_user(req->ifr_data, &pi->tstamp_config, - sizeof(pi->tstamp_config)) ? - -EFAULT : 0; default: return -EOPNOTSUPP; } @@ -3875,6 +3875,8 @@ static const struct net_device_ops cxgb4_netdev_ops = { .ndo_setup_tc = cxgb_setup_tc, .ndo_features_check = cxgb_features_check, .ndo_fix_features = cxgb_fix_features, + .ndo_hwtstamp_get = cxgb_hwtstamp_get, + .ndo_hwtstamp_set = cxgb_hwtstamp_set, }; #ifdef CONFIG_PCI_IOV From d8db98db0d463299b891f9144b6e8924da051150 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 16 Oct 2025 15:25:14 +0000 Subject: [PATCH 149/867] tsnep: convert to ndo_hwtstatmp API Convert to .ndo_hwtstamp_get()/.ndo_hwtstamp_set() callbacks. After conversions the rest of tsnep_netdev_ioctl() becomes pure phy_do_ioctl_running(), so remove tsnep_netdev_ioctl() and replace it with phy_do_ioctl_running() in .ndo_eth_ioctl. Reviewed-by: Simon Horman Signed-off-by: Vadim Fedorenko Reviewed-by: Jacob Keller Reviewed-by: Gerhard Engleder Link: https://patch.msgid.link/20251016152515.3510991-7-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/engleder/tsnep.h | 8 +- drivers/net/ethernet/engleder/tsnep_main.c | 14 +--- drivers/net/ethernet/engleder/tsnep_ptp.c | 88 +++++++++++----------- 3 files changed, 51 insertions(+), 59 deletions(-) diff --git a/drivers/net/ethernet/engleder/tsnep.h b/drivers/net/ethernet/engleder/tsnep.h index f188fba021a62..03e19aea9ea48 100644 --- a/drivers/net/ethernet/engleder/tsnep.h +++ b/drivers/net/ethernet/engleder/tsnep.h @@ -176,7 +176,7 @@ struct tsnep_adapter { struct tsnep_gcl gcl[2]; int next_gcl; - struct hwtstamp_config hwtstamp_config; + struct kernel_hwtstamp_config hwtstamp_config; struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_clock_info; /* ptp clock lock */ @@ -203,7 +203,11 @@ extern const struct ethtool_ops tsnep_ethtool_ops; int tsnep_ptp_init(struct tsnep_adapter *adapter); void tsnep_ptp_cleanup(struct tsnep_adapter *adapter); -int tsnep_ptp_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd); +int tsnep_ptp_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config); +int tsnep_ptp_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); int tsnep_tc_init(struct tsnep_adapter *adapter); void tsnep_tc_cleanup(struct tsnep_adapter *adapter); diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index eba73246f9866..b118407c30e87 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -2168,16 +2168,6 @@ static netdev_tx_t tsnep_netdev_xmit_frame(struct sk_buff *skb, return tsnep_xmit_frame_ring(skb, &adapter->tx[queue_mapping]); } -static int tsnep_netdev_ioctl(struct net_device *netdev, struct ifreq *ifr, - int cmd) -{ - if (!netif_running(netdev)) - return -EINVAL; - if (cmd == SIOCSHWTSTAMP || cmd == SIOCGHWTSTAMP) - return tsnep_ptp_ioctl(netdev, ifr, cmd); - return phy_mii_ioctl(netdev->phydev, ifr, cmd); -} - static void tsnep_netdev_set_multicast(struct net_device *netdev) { struct tsnep_adapter *adapter = netdev_priv(netdev); @@ -2384,7 +2374,7 @@ static const struct net_device_ops tsnep_netdev_ops = { .ndo_open = tsnep_netdev_open, .ndo_stop = tsnep_netdev_close, .ndo_start_xmit = tsnep_netdev_xmit_frame, - .ndo_eth_ioctl = tsnep_netdev_ioctl, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_set_rx_mode = tsnep_netdev_set_multicast, .ndo_get_stats64 = tsnep_netdev_get_stats64, .ndo_set_mac_address = tsnep_netdev_set_mac_address, @@ -2394,6 +2384,8 @@ static const struct net_device_ops tsnep_netdev_ops = { .ndo_bpf = tsnep_netdev_bpf, .ndo_xdp_xmit = tsnep_netdev_xdp_xmit, .ndo_xsk_wakeup = tsnep_netdev_xsk_wakeup, + .ndo_hwtstamp_get = tsnep_ptp_hwtstamp_get, + .ndo_hwtstamp_set = tsnep_ptp_hwtstamp_set, }; static int tsnep_mac_init(struct tsnep_adapter *adapter) diff --git a/drivers/net/ethernet/engleder/tsnep_ptp.c b/drivers/net/ethernet/engleder/tsnep_ptp.c index 54fbf0126815f..ae1308eb813de 100644 --- a/drivers/net/ethernet/engleder/tsnep_ptp.c +++ b/drivers/net/ethernet/engleder/tsnep_ptp.c @@ -19,57 +19,53 @@ void tsnep_get_system_time(struct tsnep_adapter *adapter, u64 *time) *time = (((u64)high) << 32) | ((u64)low); } -int tsnep_ptp_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +int tsnep_ptp_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config) { struct tsnep_adapter *adapter = netdev_priv(netdev); - struct hwtstamp_config config; - - if (!ifr) - return -EINVAL; - - if (cmd == SIOCSHWTSTAMP) { - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - switch (config.tx_type) { - case HWTSTAMP_TX_OFF: - case HWTSTAMP_TX_ON: - break; - default: - return -ERANGE; - } - - switch (config.rx_filter) { - case HWTSTAMP_FILTER_NONE: - break; - case HWTSTAMP_FILTER_ALL: - case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: - case HWTSTAMP_FILTER_NTP_ALL: - config.rx_filter = HWTSTAMP_FILTER_ALL; - break; - default: - return -ERANGE; - } - - memcpy(&adapter->hwtstamp_config, &config, - sizeof(adapter->hwtstamp_config)); + + *config = adapter->hwtstamp_config; + return 0; +} + +int tsnep_ptp_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) +{ + struct tsnep_adapter *adapter = netdev_priv(netdev); + + switch (config->tx_type) { + case HWTSTAMP_TX_OFF: + case HWTSTAMP_TX_ON: + break; + default: + return -ERANGE; } - if (copy_to_user(ifr->ifr_data, &adapter->hwtstamp_config, - sizeof(adapter->hwtstamp_config))) - return -EFAULT; + switch (config->rx_filter) { + case HWTSTAMP_FILTER_NONE: + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_NTP_ALL: + config->rx_filter = HWTSTAMP_FILTER_ALL; + break; + default: + return -ERANGE; + } + adapter->hwtstamp_config = *config; return 0; } From dc34040654e455564e676006faa89ff1ab2adfd7 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 16 Oct 2025 15:25:15 +0000 Subject: [PATCH 150/867] funeth: convert to ndo_hwtstamp API Convert driver to use .ndo_hwtstamp_get()/.ndo_hwtstamp_set() callbacks. .ndo_eth_ioctl() implementation becomes empty, remove it. Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Signed-off-by: Vadim Fedorenko Link: https://patch.msgid.link/20251016152515.3510991-8-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/fungible/funeth/funeth.h | 4 +- .../ethernet/fungible/funeth/funeth_main.c | 40 +++++++------------ 2 files changed, 16 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/fungible/funeth/funeth.h b/drivers/net/ethernet/fungible/funeth/funeth.h index 1250e10d21db2..55e705e239f82 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth.h +++ b/drivers/net/ethernet/fungible/funeth/funeth.h @@ -4,7 +4,7 @@ #define _FUNETH_H #include -#include +#include #include #include #include @@ -121,7 +121,7 @@ struct funeth_priv { u8 rx_coal_usec; u8 rx_coal_count; - struct hwtstamp_config hwtstamp_cfg; + struct kernel_hwtstamp_config hwtstamp_cfg; /* cumulative queue stats from earlier queue instances */ u64 tx_packets; diff --git a/drivers/net/ethernet/fungible/funeth/funeth_main.c b/drivers/net/ethernet/fungible/funeth/funeth_main.c index ac86179a0a817..792cddac6f1b9 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_main.c +++ b/drivers/net/ethernet/fungible/funeth/funeth_main.c @@ -1014,26 +1014,25 @@ static int fun_get_port_attributes(struct net_device *netdev) return 0; } -static int fun_hwtstamp_get(struct net_device *dev, struct ifreq *ifr) +static int fun_hwtstamp_get(struct net_device *dev, + struct kernel_hwtstamp_config *config) { const struct funeth_priv *fp = netdev_priv(dev); - return copy_to_user(ifr->ifr_data, &fp->hwtstamp_cfg, - sizeof(fp->hwtstamp_cfg)) ? -EFAULT : 0; + *config = fp->hwtstamp_cfg; + return 0; } -static int fun_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) +static int fun_hwtstamp_set(struct net_device *dev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { struct funeth_priv *fp = netdev_priv(dev); - struct hwtstamp_config cfg; - - if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) - return -EFAULT; /* no TX HW timestamps */ - cfg.tx_type = HWTSTAMP_TX_OFF; + config->tx_type = HWTSTAMP_TX_OFF; - switch (cfg.rx_filter) { + switch (config->rx_filter) { case HWTSTAMP_FILTER_NONE: break; case HWTSTAMP_FILTER_ALL: @@ -1051,26 +1050,14 @@ static int fun_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: - cfg.rx_filter = HWTSTAMP_FILTER_ALL; + config->rx_filter = HWTSTAMP_FILTER_ALL; break; default: return -ERANGE; } - fp->hwtstamp_cfg = cfg; - return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; -} - -static int fun_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -{ - switch (cmd) { - case SIOCSHWTSTAMP: - return fun_hwtstamp_set(dev, ifr); - case SIOCGHWTSTAMP: - return fun_hwtstamp_get(dev, ifr); - default: - return -EOPNOTSUPP; - } + fp->hwtstamp_cfg = *config; + return 0; } /* Prepare the queues for XDP. */ @@ -1340,7 +1327,6 @@ static const struct net_device_ops fun_netdev_ops = { .ndo_change_mtu = fun_change_mtu, .ndo_set_mac_address = fun_set_macaddr, .ndo_validate_addr = eth_validate_addr, - .ndo_eth_ioctl = fun_ioctl, .ndo_uninit = fun_uninit, .ndo_bpf = fun_xdp, .ndo_xdp_xmit = fun_xdp_xmit_frames, @@ -1348,6 +1334,8 @@ static const struct net_device_ops fun_netdev_ops = { .ndo_set_vf_vlan = fun_set_vf_vlan, .ndo_set_vf_rate = fun_set_vf_rate, .ndo_get_vf_config = fun_get_vf_config, + .ndo_hwtstamp_get = fun_hwtstamp_get, + .ndo_hwtstamp_set = fun_hwtstamp_set, }; #define GSO_ENCAP_FLAGS (NETIF_F_GSO_GRE | NETIF_F_GSO_IPXIP4 | \ From a5cd3a60aa1d0366265638c43012e1ac1b3f6d6a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Oct 2025 18:29:11 +0000 Subject: [PATCH 151/867] net: shrink napi_skb_cache_{put,get}() and napi_skb_cache_get_bulk() Following loop in napi_skb_cache_put() is unrolled by the compiler even if CONFIG_KASAN is not enabled: for (i = NAPI_SKB_CACHE_HALF; i < NAPI_SKB_CACHE_SIZE; i++) kasan_mempool_unpoison_object(nc->skb_cache[i], kmem_cache_size(net_hotdata.skbuff_cache)); We have 32 times this sequence, for a total of 384 bytes. 48 8b 3d 00 00 00 00 net_hotdata.skbuff_cache,%rdi e8 00 00 00 00 call kmem_cache_size This is because kmem_cache_size() is not an inline and not const, and kasan_unpoison_object_data() is an inline function. Cache kmem_cache_size() result in a variable, so that the compiler can remove dead code (and variable) when/if CONFIG_KASAN is unset. After this patch, napi_skb_cache_put() is inlined in its callers, and we avoid one kmem_cache_size() call in napi_skb_cache_get() and napi_skb_cache_get_bulk(). Signed-off-by: Eric Dumazet Reviewed-by: Jacob Keller Reviewed-by: Kuniyuki Iwashima Reviewed-by: Alexander Lobakin Link: https://patch.msgid.link/20251016182911.1132792-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8eb3c58207243..5b4bc8b1c7d56 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -274,6 +274,11 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) } EXPORT_SYMBOL(__netdev_alloc_frag_align); +/* Cache kmem_cache_size(net_hotdata.skbuff_cache) to help the compiler + * remove dead code (and skbuff_cache_size) when CONFIG_KASAN is unset. + */ +static u32 skbuff_cache_size __read_mostly; + static struct sk_buff *napi_skb_cache_get(void) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); @@ -293,7 +298,7 @@ static struct sk_buff *napi_skb_cache_get(void) skb = nc->skb_cache[--nc->skb_count]; local_unlock_nested_bh(&napi_alloc_cache.bh_lock); - kasan_mempool_unpoison_object(skb, kmem_cache_size(net_hotdata.skbuff_cache)); + kasan_mempool_unpoison_object(skb, skbuff_cache_size); return skb; } @@ -345,11 +350,9 @@ u32 napi_skb_cache_get_bulk(void **skbs, u32 n) get: for (u32 base = nc->skb_count - n, i = 0; i < n; i++) { - u32 cache_size = kmem_cache_size(net_hotdata.skbuff_cache); - skbs[i] = nc->skb_cache[base + i]; - kasan_mempool_unpoison_object(skbs[i], cache_size); + kasan_mempool_unpoison_object(skbs[i], skbuff_cache_size); memset(skbs[i], 0, offsetof(struct sk_buff, tail)); } @@ -1437,7 +1440,7 @@ static void napi_skb_cache_put(struct sk_buff *skb) if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) { for (i = NAPI_SKB_CACHE_HALF; i < NAPI_SKB_CACHE_SIZE; i++) kasan_mempool_unpoison_object(nc->skb_cache[i], - kmem_cache_size(net_hotdata.skbuff_cache)); + skbuff_cache_size); kmem_cache_free_bulk(net_hotdata.skbuff_cache, NAPI_SKB_CACHE_HALF, nc->skb_cache + NAPI_SKB_CACHE_HALF); @@ -5125,6 +5128,8 @@ void __init skb_init(void) offsetof(struct sk_buff, cb), sizeof_field(struct sk_buff, cb), NULL); + skbuff_cache_size = kmem_cache_size(net_hotdata.skbuff_cache); + net_hotdata.skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", sizeof(struct sk_buff_fclones), 0, From c30fd916c4d7760ccf654e52370b0a31be885789 Mon Sep 17 00:00:00 2001 From: Ankit Garg Date: Thu, 16 Oct 2025 18:25:42 -0700 Subject: [PATCH 152/867] gve: Consolidate and persist ethtool ring changes Refactor the ethtool ring parameter configuration logic to address two issues: unnecessary queue resets and lost configuration changes when the interface is down. Previously, `gve_set_ringparam` could trigger multiple queue destructions and recreations for a single command, as different settings (e.g., header split, ring sizes) were applied one by one. Furthermore, if the interface was down, any changes made via ethtool were discarded instead of being saved for the next time the interface was brought up. This patch centralizes the configuration logic. Individual functions like `gve_set_hsplit_config` are modified to only validate and stage changes in a temporary config struct. The main `gve_set_ringparam` function now gathers all staged changes and applies them as a single, combined configuration: 1. If the interface is up, it calls `gve_adjust_config` once. 2. If the interface is down, it saves the settings directly to the driver's private struct, ensuring they persist and are used when the interface is brought back up. Signed-off-by: Ankit Garg Reviewed-by: Harshitha Ramamurthy Reviewed-by: Jordan Rhee Reviewed-by: Willem de Bruijn Signed-off-by: Joshua Washington Link: https://patch.msgid.link/20251017012614.3631351-1-joshwash@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve.h | 3 +- drivers/net/ethernet/google/gve/gve_ethtool.c | 86 +++++++++---------- drivers/net/ethernet/google/gve/gve_main.c | 17 ++-- 3 files changed, 51 insertions(+), 55 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 4cc6dcbfd367b..cf95ec25b11a3 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -1251,7 +1251,8 @@ void gve_rx_start_ring_gqi(struct gve_priv *priv, int idx); void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx); u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hplit); bool gve_header_split_supported(const struct gve_priv *priv); -int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split); +int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg); /* rx buffer handling */ int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs); void gve_free_page_dqo(struct gve_priv *priv, struct gve_rx_buf_state_dqo *bs, diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index d0a223250845b..b030a84b678ce 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -537,34 +537,6 @@ static void gve_get_ringparam(struct net_device *netdev, kernel_cmd->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_DISABLED; } -static int gve_adjust_ring_sizes(struct gve_priv *priv, - u16 new_tx_desc_cnt, - u16 new_rx_desc_cnt) -{ - struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; - struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; - int err; - - /* get current queue configuration */ - gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); - - /* copy over the new ring_size from ethtool */ - tx_alloc_cfg.ring_size = new_tx_desc_cnt; - rx_alloc_cfg.ring_size = new_rx_desc_cnt; - - if (netif_running(priv->dev)) { - err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); - if (err) - return err; - } - - /* Set new ring_size for the next up */ - priv->tx_desc_cnt = new_tx_desc_cnt; - priv->rx_desc_cnt = new_rx_desc_cnt; - - return 0; -} - static int gve_validate_req_ring_size(struct gve_priv *priv, u16 new_tx_desc_cnt, u16 new_rx_desc_cnt) { @@ -584,34 +556,62 @@ static int gve_validate_req_ring_size(struct gve_priv *priv, u16 new_tx_desc_cnt return 0; } +static int gve_set_ring_sizes_config(struct gve_priv *priv, u16 new_tx_desc_cnt, + u16 new_rx_desc_cnt, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +{ + if (new_tx_desc_cnt == priv->tx_desc_cnt && + new_rx_desc_cnt == priv->rx_desc_cnt) + return 0; + + if (!priv->modify_ring_size_enabled) { + dev_err(&priv->pdev->dev, "Modify ring size is not supported.\n"); + return -EOPNOTSUPP; + } + + if (gve_validate_req_ring_size(priv, new_tx_desc_cnt, new_rx_desc_cnt)) + return -EINVAL; + + tx_alloc_cfg->ring_size = new_tx_desc_cnt; + rx_alloc_cfg->ring_size = new_rx_desc_cnt; + return 0; +} + static int gve_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *cmd, struct kernel_ethtool_ringparam *kernel_cmd, struct netlink_ext_ack *extack) { + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; struct gve_priv *priv = netdev_priv(netdev); - u16 new_tx_cnt, new_rx_cnt; int err; - err = gve_set_hsplit_config(priv, kernel_cmd->tcp_data_split); + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + err = gve_set_hsplit_config(priv, kernel_cmd->tcp_data_split, + &rx_alloc_cfg); if (err) return err; - if (cmd->tx_pending == priv->tx_desc_cnt && cmd->rx_pending == priv->rx_desc_cnt) - return 0; + err = gve_set_ring_sizes_config(priv, cmd->tx_pending, cmd->rx_pending, + &tx_alloc_cfg, &rx_alloc_cfg); + if (err) + return err; - if (!priv->modify_ring_size_enabled) { - dev_err(&priv->pdev->dev, "Modify ring size is not supported.\n"); - return -EOPNOTSUPP; + if (netif_running(priv->dev)) { + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); + if (err) + return err; + } else { + /* Set ring params for the next up */ + priv->header_split_enabled = rx_alloc_cfg.enable_header_split; + priv->rx_cfg.packet_buffer_size = + rx_alloc_cfg.packet_buffer_size; + priv->tx_desc_cnt = tx_alloc_cfg.ring_size; + priv->rx_desc_cnt = rx_alloc_cfg.ring_size; } - - new_tx_cnt = cmd->tx_pending; - new_rx_cnt = cmd->rx_pending; - - if (gve_validate_req_ring_size(priv, new_tx_cnt, new_rx_cnt)) - return -EINVAL; - - return gve_adjust_ring_sizes(priv, new_tx_cnt, new_rx_cnt); + return 0; } static int gve_user_reset(struct net_device *netdev, u32 *flags) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 1be1b1ef31ee3..29845e8f3c0dc 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -2058,12 +2058,10 @@ bool gve_header_split_supported(const struct gve_priv *priv) priv->queue_format == GVE_DQO_RDA_FORMAT && !priv->xdp_prog; } -int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) +int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) { - struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; - struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; bool enable_hdr_split; - int err = 0; if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) return 0; @@ -2081,14 +2079,11 @@ int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) if (enable_hdr_split == priv->header_split_enabled) return 0; - gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); - - rx_alloc_cfg.enable_header_split = enable_hdr_split; - rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split); + rx_alloc_cfg->enable_header_split = enable_hdr_split; + rx_alloc_cfg->packet_buffer_size = + gve_get_pkt_buf_size(priv, enable_hdr_split); - if (netif_running(priv->dev)) - err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); - return err; + return 0; } static int gve_set_features(struct net_device *netdev, From 98c2f0b42eead51282fa722f4aedccd2987e0374 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Thu, 16 Oct 2025 23:08:37 -0700 Subject: [PATCH 153/867] net: docs: add missing features that can have stats While trying to figure out ethtool -I | --include-statistics, I noticed some docs got missed when implementing commit 0e9c127729be ("ethtool: add interface to read Tx hardware timestamping statistics"). Fix up the docs to match the kernel code, and while there, sort them in alphabetical order. Reviewed-by: Jacob Keller Reviewed-by: Rahul Rameshbabu Reviewed-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: Jesse Brandeburg Reviewed-by: Paul Menzel Reviewed-by: Aleksandr Loktionov Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20251016-jk-iwl-next-2025-10-15-v2-8-ff3a390d9fc6@intel.com Signed-off-by: Jakub Kicinski --- Documentation/networking/statistics.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/statistics.rst b/Documentation/networking/statistics.rst index 518284e287b04..66b0ef941457c 100644 --- a/Documentation/networking/statistics.rst +++ b/Documentation/networking/statistics.rst @@ -184,9 +184,11 @@ Protocol-related statistics can be requested in get commands by setting the `ETHTOOL_FLAG_STATS` flag in `ETHTOOL_A_HEADER_FLAGS`. Currently statistics are supported in the following commands: - - `ETHTOOL_MSG_PAUSE_GET` - `ETHTOOL_MSG_FEC_GET` + - `ETHTOOL_MSG_LINKSTATE_GET` - `ETHTOOL_MSG_MM_GET` + - `ETHTOOL_MSG_PAUSE_GET` + - `ETHTOOL_MSG_TSINFO_GET` debugfs ------- From 20ae87514ad5159f821951761fc7362bceeca265 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Thu, 16 Oct 2025 23:08:38 -0700 Subject: [PATCH 154/867] ice: implement ethtool standard stats Add support for MAC/pause/RMON stats. This enables reporting hardware statistics in a common way via: ethtool -S eth0 --all-groups and ethtool --include-statistics --show-pause eth0 While doing so, add support for one new stat, receive length error (RLEC), which is extremely unlikely to happen since most L2 frames have a type/length field specifying a "type", and raw ethernet frames aren't used much any longer. NOTE: I didn't implement Ctrl aka control frame stats because the hardware doesn't seem to implement support. Reviewed-by: Marcin Szycik Reviewed-by: Jacob Keller Reviewed-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: Jesse Brandeburg Tested-by: Rinitha S Reviewed-by: Aleksandr Loktionov Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20251016-jk-iwl-next-2025-10-15-v2-9-ff3a390d9fc6@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 78 ++++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_main.c | 3 + drivers/net/ethernet/intel/ice/ice_type.h | 1 + 3 files changed, 82 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index dc131779d4267..d1ec7e6f12bf4 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -4661,6 +4661,81 @@ static void ice_get_fec_stats(struct net_device *netdev, pi->lport, err); } +static void ice_get_eth_mac_stats(struct net_device *netdev, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_hw_port_stats *ps = &pf->stats; + + mac_stats->FramesTransmittedOK = ps->eth.tx_unicast + + ps->eth.tx_multicast + + ps->eth.tx_broadcast; + mac_stats->FramesReceivedOK = ps->eth.rx_unicast + + ps->eth.rx_multicast + + ps->eth.rx_broadcast; + mac_stats->FrameCheckSequenceErrors = ps->crc_errors; + mac_stats->OctetsTransmittedOK = ps->eth.tx_bytes; + mac_stats->OctetsReceivedOK = ps->eth.rx_bytes; + mac_stats->MulticastFramesXmittedOK = ps->eth.tx_multicast; + mac_stats->BroadcastFramesXmittedOK = ps->eth.tx_broadcast; + mac_stats->MulticastFramesReceivedOK = ps->eth.rx_multicast; + mac_stats->BroadcastFramesReceivedOK = ps->eth.rx_broadcast; + mac_stats->InRangeLengthErrors = ps->rx_len_errors; + mac_stats->FrameTooLongErrors = ps->rx_oversize; +} + +static void ice_get_pause_stats(struct net_device *netdev, + struct ethtool_pause_stats *pause_stats) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_hw_port_stats *ps = &pf->stats; + + pause_stats->tx_pause_frames = ps->link_xon_tx + ps->link_xoff_tx; + pause_stats->rx_pause_frames = ps->link_xon_rx + ps->link_xoff_rx; +} + +static const struct ethtool_rmon_hist_range ice_rmon_ranges[] = { + { 0, 64 }, + { 65, 127 }, + { 128, 255 }, + { 256, 511 }, + { 512, 1023 }, + { 1024, 1522 }, + { 1523, 9522 }, + {} +}; + +static void ice_get_rmon_stats(struct net_device *netdev, + struct ethtool_rmon_stats *rmon, + const struct ethtool_rmon_hist_range **ranges) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_hw_port_stats *ps = &pf->stats; + + rmon->undersize_pkts = ps->rx_undersize; + rmon->oversize_pkts = ps->rx_oversize; + rmon->fragments = ps->rx_fragments; + rmon->jabbers = ps->rx_jabber; + + rmon->hist[0] = ps->rx_size_64; + rmon->hist[1] = ps->rx_size_127; + rmon->hist[2] = ps->rx_size_255; + rmon->hist[3] = ps->rx_size_511; + rmon->hist[4] = ps->rx_size_1023; + rmon->hist[5] = ps->rx_size_1522; + rmon->hist[6] = ps->rx_size_big; + + rmon->hist_tx[0] = ps->tx_size_64; + rmon->hist_tx[1] = ps->tx_size_127; + rmon->hist_tx[2] = ps->tx_size_255; + rmon->hist_tx[3] = ps->tx_size_511; + rmon->hist_tx[4] = ps->tx_size_1023; + rmon->hist_tx[5] = ps->tx_size_1522; + rmon->hist_tx[6] = ps->tx_size_big; + + *ranges = ice_rmon_ranges; +} + #define ICE_ETHTOOL_PFR (ETH_RESET_IRQ | ETH_RESET_DMA | \ ETH_RESET_FILTER | ETH_RESET_OFFLOAD) @@ -4744,6 +4819,9 @@ static const struct ethtool_ops ice_ethtool_ops = { .get_link_ksettings = ice_get_link_ksettings, .set_link_ksettings = ice_set_link_ksettings, .get_fec_stats = ice_get_fec_stats, + .get_eth_mac_stats = ice_get_eth_mac_stats, + .get_pause_stats = ice_get_pause_stats, + .get_rmon_stats = ice_get_rmon_stats, .get_drvinfo = ice_get_drvinfo, .get_regs_len = ice_get_regs_len, .get_regs = ice_get_regs, diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 86f5859e88ef5..3d5615caf6d16 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7138,6 +7138,9 @@ void ice_update_pf_stats(struct ice_pf *pf) &prev_ps->mac_remote_faults, &cur_ps->mac_remote_faults); + ice_stat_update32(hw, GLPRT_RLEC(port), pf->stat_prev_loaded, + &prev_ps->rx_len_errors, &cur_ps->rx_len_errors); + ice_stat_update32(hw, GLPRT_RUC(port), pf->stat_prev_loaded, &prev_ps->rx_undersize, &cur_ps->rx_undersize); diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index b0a1b67071c5e..6a2ec8389a8f3 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -1063,6 +1063,7 @@ struct ice_hw_port_stats { u64 error_bytes; /* errbc */ u64 mac_local_faults; /* mlfc */ u64 mac_remote_faults; /* mrfc */ + u64 rx_len_errors; /* rlec */ u64 link_xon_rx; /* lxonrxc */ u64 link_xoff_rx; /* lxoffrxc */ u64 link_xon_tx; /* lxontxc */ From 4368d5fe02f6d2cf36bb3b261a7ca25a2dd9a3ca Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Thu, 16 Oct 2025 23:08:39 -0700 Subject: [PATCH 155/867] ice: add tracking of good transmit timestamps As a pre-requisite to implementing timestamp statistics, start tracking successful PTP timestamps. There already existed a trace event, but add a counter as well so it can be displayed by the next patch. Good count is a u64 as it is much more likely to be incremented. The existing error stats are all u32 as before, and are less likely so will wrap less. Reviewed-by: Jacob Keller Reviewed-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: Jesse Brandeburg Tested-by: Rinitha S Reviewed-by: Aleksandr Loktionov Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20251016-jk-iwl-next-2025-10-15-v2-10-ff3a390d9fc6@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_ptp.c | 9 +++++++++ drivers/net/ethernet/intel/ice/ice_ptp.h | 2 ++ 2 files changed, 11 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index fb0f6365a6d6f..4f50e952bfb57 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -500,6 +500,9 @@ void ice_ptp_complete_tx_single_tstamp(struct ice_ptp_tx *tx) if (tstamp) { shhwtstamps.hwtstamp = ns_to_ktime(tstamp); ice_trace(tx_tstamp_complete, skb, idx); + + /* Count the number of Tx timestamps that succeeded */ + pf->ptp.tx_hwtstamp_good++; } skb_tstamp_tx(skb, &shhwtstamps); @@ -558,6 +561,7 @@ static void ice_ptp_process_tx_tstamp(struct ice_ptp_tx *tx) { struct ice_ptp_port *ptp_port; unsigned long flags; + u32 tstamp_good = 0; struct ice_pf *pf; struct ice_hw *hw; u64 tstamp_ready; @@ -658,11 +662,16 @@ static void ice_ptp_process_tx_tstamp(struct ice_ptp_tx *tx) if (tstamp) { shhwtstamps.hwtstamp = ns_to_ktime(tstamp); ice_trace(tx_tstamp_complete, skb, idx); + + /* Count the number of Tx timestamps that succeeded */ + tstamp_good++; } skb_tstamp_tx(skb, &shhwtstamps); dev_kfree_skb_any(skb); } + + pf->ptp.tx_hwtstamp_good += tstamp_good; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index 137f2070a2d99..27016aac4f1e8 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -237,6 +237,7 @@ struct ice_ptp_pin_desc { * @clock: pointer to registered PTP clock device * @tstamp_config: hardware timestamping configuration * @reset_time: kernel time after clock stop on reset + * @tx_hwtstamp_good: number of completed Tx timestamp requests * @tx_hwtstamp_skipped: number of Tx time stamp requests skipped * @tx_hwtstamp_timeouts: number of Tx skbs discarded with no time stamp * @tx_hwtstamp_flushed: number of Tx skbs flushed due to interface closed @@ -261,6 +262,7 @@ struct ice_ptp { struct ptp_clock *clock; struct kernel_hwtstamp_config tstamp_config; u64 reset_time; + u64 tx_hwtstamp_good; u32 tx_hwtstamp_skipped; u32 tx_hwtstamp_timeouts; u32 tx_hwtstamp_flushed; From 71462475d0020b4001066d5b591ab09ea143002d Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Thu, 16 Oct 2025 23:08:40 -0700 Subject: [PATCH 156/867] ice: implement transmit hardware timestamp statistics The kernel now has common statistics for transmit timestamps, so implement them in the ice driver. use via ethtool -I -T eth0 Reviewed-by: Jacob Keller Reviewed-by: Jakub Kicinski Reviewed-by: Hariprasad Kelam Reviewed-by: Simon Horman Signed-off-by: Jesse Brandeburg Tested-by: Rinitha S Reviewed-by: Aleksandr Loktionov Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20251016-jk-iwl-next-2025-10-15-v2-11-ff3a390d9fc6@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index d1ec7e6f12bf4..75492a720c68c 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -4736,6 +4736,23 @@ static void ice_get_rmon_stats(struct net_device *netdev, *ranges = ice_rmon_ranges; } +/* ice_get_ts_stats - provide timestamping stats + * @netdev: the netdevice pointer from ethtool + * @ts_stats: the ethtool data structure to fill in + */ +static void ice_get_ts_stats(struct net_device *netdev, + struct ethtool_ts_stats *ts_stats) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_ptp *ptp = &pf->ptp; + + ts_stats->pkts = ptp->tx_hwtstamp_good; + ts_stats->err = ptp->tx_hwtstamp_skipped + + ptp->tx_hwtstamp_flushed + + ptp->tx_hwtstamp_discarded; + ts_stats->lost = ptp->tx_hwtstamp_timeouts; +} + #define ICE_ETHTOOL_PFR (ETH_RESET_IRQ | ETH_RESET_DMA | \ ETH_RESET_FILTER | ETH_RESET_OFFLOAD) @@ -4822,6 +4839,7 @@ static const struct ethtool_ops ice_ethtool_ops = { .get_eth_mac_stats = ice_get_eth_mac_stats, .get_pause_stats = ice_get_pause_stats, .get_rmon_stats = ice_get_rmon_stats, + .get_ts_stats = ice_get_ts_stats, .get_drvinfo = ice_get_drvinfo, .get_regs_len = ice_get_regs_len, .get_regs = ice_get_regs, From a308ea9721122d632d1ce2af4cec1c008aff512a Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Thu, 16 Oct 2025 23:08:41 -0700 Subject: [PATCH 157/867] ice: refactor to use helpers Use the ice_netdev_to_pf() helper in more places and remove a bunch of boilerplate code. Not every instance could be replaced due to use of the netdev_priv() output or the vsi variable within a bunch of functions. Signed-off-by: Jesse Brandeburg Reviewed-by: Simon Horman Reviewed-by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Tested-by: Rinitha S Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20251016-jk-iwl-next-2025-10-15-v2-12-ff3a390d9fc6@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 48 ++++++------------- .../net/ethernet/intel/ice/ice_flex_pipe.c | 8 +--- drivers/net/ethernet/intel/ice/ice_lag.c | 3 +- drivers/net/ethernet/intel/ice/ice_main.c | 10 ++-- drivers/net/ethernet/intel/ice/ice_ptp.c | 6 +-- drivers/net/ethernet/intel/ice/ice_sriov.c | 3 +- 6 files changed, 24 insertions(+), 54 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 75492a720c68c..cb34d4675a788 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -794,8 +794,7 @@ static int ice_get_extended_regs(struct net_device *netdev, void *p) static void ice_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_hw *hw = &pf->hw; u32 *regs_buf = (u32 *)p; unsigned int i; @@ -810,8 +809,7 @@ ice_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p) static u32 ice_get_msglevel(struct net_device *netdev) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); #ifndef CONFIG_DYNAMIC_DEBUG if (pf->hw.debug_mask) @@ -824,8 +822,7 @@ static u32 ice_get_msglevel(struct net_device *netdev) static void ice_set_msglevel(struct net_device *netdev, u32 data) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); #ifndef CONFIG_DYNAMIC_DEBUG if (ICE_DBG_USER & data) @@ -840,16 +837,14 @@ static void ice_set_msglevel(struct net_device *netdev, u32 data) static void ice_get_link_ext_stats(struct net_device *netdev, struct ethtool_link_ext_stats *stats) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); stats->link_down_events = pf->link_down_events; } static int ice_get_eeprom_len(struct net_device *netdev) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); return (int)pf->hw.flash.flash_size; } @@ -858,9 +853,7 @@ static int ice_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom, u8 *bytes) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_hw *hw = &pf->hw; struct device *dev; int ret; @@ -959,8 +952,7 @@ static u64 ice_link_test(struct net_device *netdev) */ static u64 ice_eeprom_test(struct net_device *netdev) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); netdev_info(netdev, "EEPROM test\n"); return !!(ice_nvm_validate_checksum(&pf->hw)); @@ -1274,9 +1266,8 @@ static int ice_lbtest_receive_frames(struct ice_rx_ring *rx_ring) */ static u64 ice_loopback_test(struct net_device *netdev) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *orig_vsi = np->vsi, *test_vsi; - struct ice_pf *pf = orig_vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_vsi *test_vsi; u8 *tx_frame __free(kfree) = NULL; u8 broadcast[ETH_ALEN], ret = 0; int num_frames, valid_frames; @@ -1365,8 +1356,7 @@ static u64 ice_loopback_test(struct net_device *netdev) */ static u64 ice_intr_test(struct net_device *netdev) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); u16 swic_old = pf->sw_int_count; netdev_info(netdev, "interrupt test\n"); @@ -1394,9 +1384,8 @@ static void ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test, u64 *data) { - struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = ice_netdev_to_pf(netdev); bool if_running = netif_running(netdev); - struct ice_pf *pf = np->vsi->back; struct device *dev; dev = ice_pf_to_dev(pf); @@ -1720,9 +1709,7 @@ static int ice_nway_reset(struct net_device *netdev) */ static u32 ice_get_priv_flags(struct net_device *netdev) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); u32 i, ret_flags = 0; for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) { @@ -4417,9 +4404,7 @@ static int ice_get_module_info(struct net_device *netdev, struct ethtool_modinfo *modinfo) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_hw *hw = &pf->hw; u8 sff8472_comp = 0; u8 sff8472_swap = 0; @@ -4491,12 +4476,10 @@ static int ice_get_module_eeprom(struct net_device *netdev, struct ethtool_eeprom *ee, u8 *data) { - struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = ice_netdev_to_pf(netdev); #define SFF_READ_BLOCK_SIZE 8 u8 value[SFF_READ_BLOCK_SIZE] = { 0 }; u8 addr = ICE_I2C_EEPROM_DEV_ADDR; - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; bool is_sfp = false; unsigned int i, j; @@ -4774,8 +4757,7 @@ static void ice_get_ts_stats(struct net_device *netdev, */ static int ice_ethtool_reset(struct net_device *dev, u32 *flags) { - struct ice_netdev_priv *np = netdev_priv(dev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(dev); enum ice_reset_req reset; switch (*flags) { diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index 363ae79a3620c..06b8786ae3abd 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -574,9 +574,7 @@ ice_destroy_tunnel(struct ice_hw *hw, u16 index, enum ice_tunnel_type type, int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table, unsigned int idx, struct udp_tunnel_info *ti) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); enum ice_tunnel_type tnl_type; int status; u16 index; @@ -598,9 +596,7 @@ int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table, int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table, unsigned int idx, struct udp_tunnel_info *ti) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); enum ice_tunnel_type tnl_type; int status; diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index aebf8e08a297b..d2576d606e10e 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -2177,8 +2177,7 @@ static void ice_lag_chk_disabled_bond(struct ice_lag *lag, void *ptr) */ static void ice_lag_disable_sriov_bond(struct ice_lag *lag) { - struct ice_netdev_priv *np = netdev_priv(lag->netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(lag->netdev); ice_clear_feature_support(pf, ICE_F_SRIOV_LAG); ice_clear_feature_support(pf, ICE_F_SRIOV_AA_LAG); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 3d5615caf6d16..ca95b8800bb3b 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -8074,9 +8074,7 @@ static int ice_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u32 filter_mask, int nlflags) { - struct ice_netdev_priv *np = netdev_priv(dev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(dev); u16 bmode; bmode = pf->first_sw->bridge_mode; @@ -8146,8 +8144,7 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 __always_unused flags, struct netlink_ext_ack __always_unused *extack) { - struct ice_netdev_priv *np = netdev_priv(dev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(dev); struct nlattr *attr, *br_spec; struct ice_hw *hw = &pf->hw; struct ice_sw *pf_sw; @@ -9581,8 +9578,7 @@ ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch, */ int ice_open(struct net_device *netdev) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); if (ice_is_reset_in_progress(pf->state)) { netdev_err(netdev, "can't open net device while reset is in progress"); diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 4f50e952bfb57..985b3e79b3126 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -2215,8 +2215,7 @@ static int ice_ptp_getcrosststamp(struct ptp_clock_info *info, int ice_ptp_hwtstamp_get(struct net_device *netdev, struct kernel_hwtstamp_config *config) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); if (pf->ptp.state != ICE_PTP_READY) return -EIO; @@ -2287,8 +2286,7 @@ int ice_ptp_hwtstamp_set(struct net_device *netdev, struct kernel_hwtstamp_config *config, struct netlink_ext_ack *extack) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); int err; if (pf->ptp.state != ICE_PTP_READY) diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 843e82fd3bf93..6b1126ddb5616 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1190,8 +1190,7 @@ ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event) */ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_vsi *vf_vsi; struct device *dev; struct ice_vf *vf; From 3c7bf5af2196087f394f9099b53e37569636b259 Mon Sep 17 00:00:00 2001 From: Vitaly Lifshits Date: Thu, 16 Oct 2025 23:08:43 -0700 Subject: [PATCH 158/867] e1000e: Introduce private flag to disable K1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The K1 state reduces power consumption on ICH family network controllers during idle periods, similarly to L1 state on PCI Express NICs. Therefore, it is recommended and enabled by default. However, on some systems it has been observed to have adverse side effects, such as packet loss. It has been established through debug that the problem may be due to firmware misconfiguration of specific systems, interoperability with certain link partners, or marginal electrical conditions of specific units. These problems typically cannot be fixed in the field, and generic workarounds to resolve the side effects on all systems, while keeping K1 enabled, were found infeasible. Therefore, add the option for users to globally disable K1 idle state on the adapter. Additionally, disable K1 by default for MTL and later platforms, due to issues reported with the current configuration. Link: https://lore.kernel.org/intel-wired-lan/CAMqyJG3LVqfgqMcTxeaPur_Jq0oQH7GgdxRuVtRX_6TTH2mX5Q@mail.gmail.com/ Link: https://lore.kernel.org/intel-wired-lan/20250626153544.1853d106@onyx.my.domain/ Link: https://lore.kernel.org/intel-wired-lan/Z_z9EjcKtwHCQcZR@mail-itl/ Link: https://github.com/QubesOS/qubes-issues/issues/9896 Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2115393 Signed-off-by: Vitaly Lifshits Reviewed-by: Timo Teräs Tested-by: Timo Teräs Reviewed-by: Aleksandr Loktionov Reviewed-by: Dima Ruinskiy Tested-by: Avraham Koren Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20251016-jk-iwl-next-2025-10-15-v2-14-ff3a390d9fc6@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/e1000e/e1000.h | 1 + drivers/net/ethernet/intel/e1000e/ethtool.c | 45 ++++++++++++++++++--- drivers/net/ethernet/intel/e1000e/ich8lan.c | 41 ++++++++++--------- drivers/net/ethernet/intel/e1000e/netdev.c | 3 ++ 4 files changed, 67 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index 018e61aea787d..aa08f397988e6 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -461,6 +461,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca); #define FLAG2_CHECK_RX_HWTSTAMP BIT(13) #define FLAG2_CHECK_SYSTIM_OVERFLOW BIT(14) #define FLAG2_ENABLE_S0IX_FLOWS BIT(15) +#define FLAG2_DISABLE_K1 BIT(16) #define E1000_RX_DESC_PS(R, i) \ (&(((union e1000_rx_desc_packet_split *)((R).desc))[i])) diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index 8e40bb50a01e1..cee57a2149abb 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -26,6 +26,8 @@ struct e1000_stats { static const char e1000e_priv_flags_strings[][ETH_GSTRING_LEN] = { #define E1000E_PRIV_FLAGS_S0IX_ENABLED BIT(0) "s0ix-enabled", +#define E1000E_PRIV_FLAGS_DISABLE_K1 BIT(1) + "disable-k1", }; #define E1000E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(e1000e_priv_flags_strings) @@ -2301,26 +2303,59 @@ static u32 e1000e_get_priv_flags(struct net_device *netdev) if (adapter->flags2 & FLAG2_ENABLE_S0IX_FLOWS) priv_flags |= E1000E_PRIV_FLAGS_S0IX_ENABLED; + if (adapter->flags2 & FLAG2_DISABLE_K1) + priv_flags |= E1000E_PRIV_FLAGS_DISABLE_K1; + return priv_flags; } static int e1000e_set_priv_flags(struct net_device *netdev, u32 priv_flags) { struct e1000_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; unsigned int flags2 = adapter->flags2; + unsigned int changed; - flags2 &= ~FLAG2_ENABLE_S0IX_FLOWS; - if (priv_flags & E1000E_PRIV_FLAGS_S0IX_ENABLED) { - struct e1000_hw *hw = &adapter->hw; + flags2 &= ~(FLAG2_ENABLE_S0IX_FLOWS | FLAG2_DISABLE_K1); - if (hw->mac.type < e1000_pch_cnp) + if (priv_flags & E1000E_PRIV_FLAGS_S0IX_ENABLED) { + if (hw->mac.type < e1000_pch_cnp) { + e_err("S0ix is not supported on this device\n"); return -EINVAL; + } + flags2 |= FLAG2_ENABLE_S0IX_FLOWS; } - if (flags2 != adapter->flags2) + if (priv_flags & E1000E_PRIV_FLAGS_DISABLE_K1) { + if (hw->mac.type < e1000_ich8lan) { + e_err("Disabling K1 is not supported on this device\n"); + return -EINVAL; + } + + flags2 |= FLAG2_DISABLE_K1; + } + + changed = adapter->flags2 ^ flags2; + if (changed) adapter->flags2 = flags2; + if (changed & FLAG2_DISABLE_K1) { + /* reset the hardware to apply the changes */ + while (test_and_set_bit(__E1000_RESETTING, + &adapter->state)) + usleep_range(1000, 2000); + + if (netif_running(adapter->netdev)) { + e1000e_down(adapter, true); + e1000e_up(adapter); + } else { + e1000e_reset(adapter); + } + + clear_bit(__E1000_RESETTING, &adapter->state); + } + return 0; } diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index df4e7d781cb1c..0ff8688ac3b84 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -286,21 +286,26 @@ static void e1000_toggle_lanphypc_pch_lpt(struct e1000_hw *hw) } /** - * e1000_reconfigure_k1_exit_timeout - reconfigure K1 exit timeout to - * align to MTP and later platform requirements. + * e1000_reconfigure_k1_params - reconfigure Kumeran K1 parameters. * @hw: pointer to the HW structure * + * By default K1 is enabled after MAC reset, so this function only + * disables it. + * * Context: PHY semaphore must be held by caller. * Return: 0 on success, negative on failure */ -static s32 e1000_reconfigure_k1_exit_timeout(struct e1000_hw *hw) +static s32 e1000_reconfigure_k1_params(struct e1000_hw *hw) { u16 phy_timeout; u32 fextnvm12; s32 ret_val; - if (hw->mac.type < e1000_pch_mtp) + if (hw->mac.type < e1000_pch_mtp) { + if (hw->adapter->flags2 & FLAG2_DISABLE_K1) + return e1000_configure_k1_ich8lan(hw, false); return 0; + } /* Change Kumeran K1 power down state from P0s to P1 */ fextnvm12 = er32(FEXTNVM12); @@ -310,6 +315,8 @@ static s32 e1000_reconfigure_k1_exit_timeout(struct e1000_hw *hw) /* Wait for the interface the settle */ usleep_range(1000, 1100); + if (hw->adapter->flags2 & FLAG2_DISABLE_K1) + return e1000_configure_k1_ich8lan(hw, false); /* Change K1 exit timeout */ ret_val = e1e_rphy_locked(hw, I217_PHY_TIMEOUTS_REG, @@ -373,8 +380,8 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) /* At this point the PHY might be inaccessible so don't * propagate the failure */ - if (e1000_reconfigure_k1_exit_timeout(hw)) - e_dbg("Failed to reconfigure K1 exit timeout\n"); + if (e1000_reconfigure_k1_params(hw)) + e_dbg("Failed to reconfigure K1 parameters\n"); fallthrough; case e1000_pch_lpt: @@ -473,10 +480,10 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) if (hw->mac.type >= e1000_pch_mtp) { ret_val = hw->phy.ops.acquire(hw); if (ret_val) { - e_err("Failed to reconfigure K1 exit timeout\n"); + e_err("Failed to reconfigure K1 parameters\n"); goto out; } - ret_val = e1000_reconfigure_k1_exit_timeout(hw); + ret_val = e1000_reconfigure_k1_params(hw); hw->phy.ops.release(hw); } } @@ -4948,17 +4955,15 @@ static s32 e1000_init_hw_ich8lan(struct e1000_hw *hw) u16 i; e1000_initialize_hw_bits_ich8lan(hw); - if (hw->mac.type >= e1000_pch_mtp) { - ret_val = hw->phy.ops.acquire(hw); - if (ret_val) - return ret_val; + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; - ret_val = e1000_reconfigure_k1_exit_timeout(hw); - hw->phy.ops.release(hw); - if (ret_val) { - e_dbg("Error failed to reconfigure K1 exit timeout\n"); - return ret_val; - } + ret_val = e1000_reconfigure_k1_params(hw); + hw->phy.ops.release(hw); + if (ret_val) { + e_dbg("Error failed to reconfigure K1 parameters\n"); + return ret_val; } /* Initialize identification LED */ diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 201322dac2330..116f3c92b5bc5 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -7675,6 +7675,9 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* init PTP hardware clock */ e1000e_ptp_init(adapter); + if (hw->mac.type >= e1000_pch_mtp) + adapter->flags2 |= FLAG2_DISABLE_K1; + /* reset the hardware with the new settings */ e1000e_reset(adapter); From 38516e3fa4ca2c8f06a25716a7286b48824b386b Mon Sep 17 00:00:00 2001 From: Aswin Karuvally Date: Fri, 17 Oct 2025 11:49:54 +0200 Subject: [PATCH 159/867] s390/iucv: Convert sprintf/snprintf to scnprintf Convert sprintf/snprintf calls to scnprintf to better align with the kernel development community practices [1]. Link: https://lwn.net/Articles/69419 [1] Reviewed-by: Alexandra Winter Signed-off-by: Aswin Karuvally Signed-off-by: Alexandra Winter Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251017094954.1402684-1-wintera@linux.ibm.com Signed-off-by: Jakub Kicinski --- drivers/s390/net/smsgiucv_app.c | 7 ++++--- net/iucv/af_iucv.c | 7 ++++--- net/iucv/iucv.c | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/s390/net/smsgiucv_app.c b/drivers/s390/net/smsgiucv_app.c index 4bd4d6bfc1262..768108c90b325 100644 --- a/drivers/s390/net/smsgiucv_app.c +++ b/drivers/s390/net/smsgiucv_app.c @@ -88,9 +88,10 @@ static struct smsg_app_event *smsg_app_event_alloc(const char *from, ev->envp[3] = NULL; /* setting up environment: sender, prefix name, and message text */ - snprintf(ev->envp[0], ENV_SENDER_LEN, ENV_SENDER_STR "%s", from); - snprintf(ev->envp[1], ENV_PREFIX_LEN, ENV_PREFIX_STR "%s", SMSG_PREFIX); - snprintf(ev->envp[2], ENV_TEXT_LEN(msg), ENV_TEXT_STR "%s", msg); + scnprintf(ev->envp[0], ENV_SENDER_LEN, ENV_SENDER_STR "%s", from); + scnprintf(ev->envp[1], ENV_PREFIX_LEN, ENV_PREFIX_STR "%s", + SMSG_PREFIX); + scnprintf(ev->envp[2], ENV_TEXT_LEN(msg), ENV_TEXT_STR "%s", msg); return ev; } diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 6c717a7ef2928..4ddfc633d30cf 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -553,10 +553,11 @@ static void __iucv_auto_name(struct iucv_sock *iucv) { char name[12]; - sprintf(name, "%08x", atomic_inc_return(&iucv_sk_list.autobind_name)); + scnprintf(name, sizeof(name), + "%08x", atomic_inc_return(&iucv_sk_list.autobind_name)); while (__iucv_get_sock_by_name(name)) { - sprintf(name, "%08x", - atomic_inc_return(&iucv_sk_list.autobind_name)); + scnprintf(name, sizeof(name), "%08x", + atomic_inc_return(&iucv_sk_list.autobind_name)); } memcpy(iucv->src_name, name, 8); } diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 473a7847d80bb..008be0abe3a57 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -95,7 +95,7 @@ struct device *iucv_alloc_device(const struct attribute_group **attrs, if (!dev) goto out_error; va_start(vargs, fmt); - vsnprintf(buf, sizeof(buf), fmt, vargs); + vscnprintf(buf, sizeof(buf), fmt, vargs); rc = dev_set_name(dev, "%s", buf); va_end(vargs); if (rc) From 122d696c1789621a367d700d777c424f8494a5fa Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Fri, 17 Oct 2025 13:45:25 +0700 Subject: [PATCH 160/867] net: nfc: Format userspace interface subsection headings Subsection headings of "Userspace interface" is written in normal paragraph, all-capped. Properly format them as reST section headings. Signed-off-by: Bagas Sanjaya Link: https://patch.msgid.link/20251017064525.28836-3-bagasdotme@gmail.com Signed-off-by: Paolo Abeni --- Documentation/networking/nfc.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/nfc.rst b/Documentation/networking/nfc.rst index 9aab3a88c9b29..4017350061433 100644 --- a/Documentation/networking/nfc.rst +++ b/Documentation/networking/nfc.rst @@ -71,7 +71,8 @@ Userspace interface The userspace interface is divided in control operations and low-level data exchange operation. -CONTROL OPERATIONS: +Control operations +------------------ Generic netlink is used to implement the interface to the control operations. The operations are composed by commands and events, all listed below: @@ -100,7 +101,8 @@ relevant information such as the supported NFC protocols. All polling operations requested through one netlink socket are stopped when it's closed. -LOW-LEVEL DATA EXCHANGE: +Low-level data exchange +----------------------- The userspace must use PF_NFC sockets to perform any data communication with targets. All NFC sockets use AF_NFC:: From 97aa8ecb573213b79667a40b5fe5345417dfe961 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Fri, 17 Oct 2025 13:45:26 +0700 Subject: [PATCH 161/867] net: 6pack: Demote "How to turn on 6pack support" section heading "How to turn on 6pack support" is a subsection of "Building and installing the 6pack driver". Yet, the former is in the same heading level as the latter as sections, making it listed in networking docs toctree. Demote it to subsection. Signed-off-by: Bagas Sanjaya Link: https://patch.msgid.link/20251017064525.28836-4-bagasdotme@gmail.com Signed-off-by: Paolo Abeni --- Documentation/networking/6pack.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/6pack.rst b/Documentation/networking/6pack.rst index bc5bf1f1a98fb..66d5fd4fc8212 100644 --- a/Documentation/networking/6pack.rst +++ b/Documentation/networking/6pack.rst @@ -94,7 +94,7 @@ kernels may lead to a compilation error because the interface to a kernel function has been changed in the 2.1.8x kernels. How to turn on 6pack support: -============================= +----------------------------- - In the linux kernel configuration program, select the code maturity level options menu and turn on the prompting for development drivers. From 51538c0c9d8cd59bac83cce51b77ccc66ca9fdb8 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 17 Oct 2025 11:06:10 +0200 Subject: [PATCH 162/867] dt-bindings: net: airoha: Add AN7583 support Introduce AN7583 ethernet controller support to Airoha EN7581 device-tree bindings. The main difference between EN7581 and AN7583 is the number of reset lines required by the controller (AN7583 does not require hsi-mac). Signed-off-by: Lorenzo Bianconi Reviewed-by: Conor Dooley Link: https://patch.msgid.link/20251017-an7583-eth-support-v3-1-f28319666667@kernel.org Signed-off-by: Paolo Abeni --- .../bindings/net/airoha,en7581-eth.yaml | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/airoha,en7581-eth.yaml b/Documentation/devicetree/bindings/net/airoha,en7581-eth.yaml index 6d22131ac2f9e..fbe2ddcdd909c 100644 --- a/Documentation/devicetree/bindings/net/airoha,en7581-eth.yaml +++ b/Documentation/devicetree/bindings/net/airoha,en7581-eth.yaml @@ -17,6 +17,7 @@ properties: compatible: enum: - airoha,en7581-eth + - airoha,an7583-eth reg: items: @@ -44,6 +45,7 @@ properties: - description: PDMA irq resets: + minItems: 7 maxItems: 8 reset-names: @@ -54,8 +56,9 @@ properties: - const: xsi-mac - const: hsi0-mac - const: hsi1-mac - - const: hsi-mac + - enum: [ hsi-mac, xfp-mac ] - const: xfp-mac + minItems: 7 memory-region: items: @@ -81,6 +84,36 @@ properties: interface to implement hardware flow offloading programming Packet Processor Engine (PPE) flow table. +allOf: + - $ref: ethernet-controller.yaml# + - if: + properties: + compatible: + contains: + enum: + - airoha,en7581-eth + then: + properties: + resets: + minItems: 8 + + reset-names: + minItems: 8 + + - if: + properties: + compatible: + contains: + enum: + - airoha,an7583-eth + then: + properties: + resets: + maxItems: 7 + + reset-names: + maxItems: 7 + patternProperties: "^ethernet@[1-4]$": type: object From 6d5b601d52a27aafff555b480e538507901c672c Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 17 Oct 2025 11:06:11 +0200 Subject: [PATCH 163/867] net: airoha: ppe: Dynamically allocate foe_check_time array in airoha_ppe struct This is a preliminary patch to properly enable PPE support for AN7583 SoC. Reviewed-by: Simon Horman Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20251017-an7583-eth-support-v3-2-f28319666667@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/airoha/airoha_eth.h | 2 +- drivers/net/ethernet/airoha/airoha_ppe.c | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index cd13c1c1224f6..4330b672d99e1 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -554,7 +554,7 @@ struct airoha_ppe { struct rhashtable l2_flows; struct hlist_head *foe_flow; - u16 foe_check_time[PPE_NUM_ENTRIES]; + u16 *foe_check_time; struct airoha_foe_stats *foe_stats; dma_addr_t foe_stats_dma; diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 691361b254075..8d1dceadce0be 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -1440,6 +1440,11 @@ int airoha_ppe_init(struct airoha_eth *eth) return -ENOMEM; } + ppe->foe_check_time = devm_kzalloc(eth->dev, PPE_NUM_ENTRIES, + GFP_KERNEL); + if (!ppe->foe_check_time) + return -ENOMEM; + err = rhashtable_init(ð->flow_table, &airoha_flow_table_params); if (err) return err; From 15f357cd4581ce6e02e5e97719320600783140ec Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 17 Oct 2025 11:06:12 +0200 Subject: [PATCH 164/867] net: airoha: Add airoha_ppe_get_num_stats_entries() and airoha_ppe_get_num_total_stats_entries() Introduce airoha_ppe_get_num_stats_entries and airoha_ppe_get_num_total_stats_entries routines in order to make the code more readable controlling if CONFIG_NET_AIROHA_FLOW_STATS is enabled or disabled. Modify airoha_ppe_foe_get_flow_stats_index routine signature relying on airoha_ppe_get_num_total_stats_entries(). Reviewed-by: Simon Horman Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20251017-an7583-eth-support-v3-3-f28319666667@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/airoha/airoha_eth.h | 10 +-- drivers/net/ethernet/airoha/airoha_ppe.c | 101 ++++++++++++++++++----- 2 files changed, 81 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index 4330b672d99e1..1f7e34a5f457c 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -50,15 +50,9 @@ #define PPE_NUM 2 #define PPE1_SRAM_NUM_ENTRIES (8 * 1024) -#define PPE_SRAM_NUM_ENTRIES (2 * PPE1_SRAM_NUM_ENTRIES) -#ifdef CONFIG_NET_AIROHA_FLOW_STATS +#define PPE_SRAM_NUM_ENTRIES (PPE_NUM * PPE1_SRAM_NUM_ENTRIES) #define PPE1_STATS_NUM_ENTRIES (4 * 1024) -#else -#define PPE1_STATS_NUM_ENTRIES 0 -#endif /* CONFIG_NET_AIROHA_FLOW_STATS */ -#define PPE_STATS_NUM_ENTRIES (2 * PPE1_STATS_NUM_ENTRIES) -#define PPE1_SRAM_NUM_DATA_ENTRIES (PPE1_SRAM_NUM_ENTRIES - PPE1_STATS_NUM_ENTRIES) -#define PPE_SRAM_NUM_DATA_ENTRIES (2 * PPE1_SRAM_NUM_DATA_ENTRIES) +#define PPE_STATS_NUM_ENTRIES (PPE_NUM * PPE1_STATS_NUM_ENTRIES) #define PPE_DRAM_NUM_ENTRIES (16 * 1024) #define PPE_NUM_ENTRIES (PPE_SRAM_NUM_ENTRIES + PPE_DRAM_NUM_ENTRIES) #define PPE_HASH_MASK (PPE_NUM_ENTRIES - 1) diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 8d1dceadce0be..22ecece0e33ef 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -32,6 +32,24 @@ static const struct rhashtable_params airoha_l2_flow_table_params = { .automatic_shrinking = true, }; +static int airoha_ppe_get_num_stats_entries(struct airoha_ppe *ppe) +{ + if (!IS_ENABLED(CONFIG_NET_AIROHA_FLOW_STATS)) + return -EOPNOTSUPP; + + return PPE1_STATS_NUM_ENTRIES; +} + +static int airoha_ppe_get_total_num_stats_entries(struct airoha_ppe *ppe) +{ + int num_stats = airoha_ppe_get_num_stats_entries(ppe); + + if (num_stats > 0) + num_stats = num_stats * PPE_NUM; + + return num_stats; +} + static bool airoha_ppe2_is_enabled(struct airoha_eth *eth) { return airoha_fe_rr(eth, REG_PPE_GLO_CFG(1)) & PPE_GLO_CFG_EN_MASK; @@ -48,7 +66,7 @@ static void airoha_ppe_hw_init(struct airoha_ppe *ppe) { u32 sram_tb_size, sram_num_entries, dram_num_entries; struct airoha_eth *eth = ppe->eth; - int i; + int i, sram_num_stats_entries; sram_tb_size = PPE_SRAM_NUM_ENTRIES * sizeof(struct airoha_foe_entry); dram_num_entries = PPE_RAM_NUM_ENTRIES_SHIFT(PPE_DRAM_NUM_ENTRIES); @@ -103,8 +121,13 @@ static void airoha_ppe_hw_init(struct airoha_ppe *ppe) } if (airoha_ppe2_is_enabled(eth)) { - sram_num_entries = - PPE_RAM_NUM_ENTRIES_SHIFT(PPE1_SRAM_NUM_DATA_ENTRIES); + sram_num_entries = PPE1_SRAM_NUM_ENTRIES; + sram_num_stats_entries = + airoha_ppe_get_num_stats_entries(ppe); + if (sram_num_stats_entries > 0) + sram_num_entries -= sram_num_stats_entries; + sram_num_entries = PPE_RAM_NUM_ENTRIES_SHIFT(sram_num_entries); + airoha_fe_rmw(eth, REG_PPE_TB_CFG(0), PPE_SRAM_TB_NUM_ENTRY_MASK | PPE_DRAM_TB_NUM_ENTRY_MASK, @@ -120,8 +143,13 @@ static void airoha_ppe_hw_init(struct airoha_ppe *ppe) FIELD_PREP(PPE_DRAM_TB_NUM_ENTRY_MASK, dram_num_entries)); } else { - sram_num_entries = - PPE_RAM_NUM_ENTRIES_SHIFT(PPE_SRAM_NUM_DATA_ENTRIES); + sram_num_entries = PPE_SRAM_NUM_ENTRIES; + sram_num_stats_entries = + airoha_ppe_get_total_num_stats_entries(ppe); + if (sram_num_stats_entries > 0) + sram_num_entries -= sram_num_stats_entries; + sram_num_entries = PPE_RAM_NUM_ENTRIES_SHIFT(sram_num_entries); + airoha_fe_rmw(eth, REG_PPE_TB_CFG(0), PPE_SRAM_TB_NUM_ENTRY_MASK | PPE_DRAM_TB_NUM_ENTRY_MASK, @@ -480,13 +508,21 @@ static u32 airoha_ppe_foe_get_entry_hash(struct airoha_foe_entry *hwe) return hash; } -static u32 airoha_ppe_foe_get_flow_stats_index(struct airoha_ppe *ppe, u32 hash) +static int airoha_ppe_foe_get_flow_stats_index(struct airoha_ppe *ppe, + u32 hash, u32 *index) { - if (!airoha_ppe2_is_enabled(ppe->eth)) - return hash; + int ppe_num_stats_entries; + + ppe_num_stats_entries = airoha_ppe_get_total_num_stats_entries(ppe); + if (ppe_num_stats_entries < 0) + return ppe_num_stats_entries; - return hash >= PPE_STATS_NUM_ENTRIES ? hash - PPE1_STATS_NUM_ENTRIES - : hash; + *index = hash; + if (airoha_ppe2_is_enabled(ppe->eth) && + hash >= ppe_num_stats_entries) + *index = *index - PPE_STATS_NUM_ENTRIES; + + return 0; } static void airoha_ppe_foe_flow_stat_entry_reset(struct airoha_ppe *ppe, @@ -500,9 +536,13 @@ static void airoha_ppe_foe_flow_stat_entry_reset(struct airoha_ppe *ppe, static void airoha_ppe_foe_flow_stats_reset(struct airoha_ppe *ppe, struct airoha_npu *npu) { - int i; + int i, ppe_num_stats_entries; + + ppe_num_stats_entries = airoha_ppe_get_total_num_stats_entries(ppe); + if (ppe_num_stats_entries < 0) + return; - for (i = 0; i < PPE_STATS_NUM_ENTRIES; i++) + for (i = 0; i < ppe_num_stats_entries; i++) airoha_ppe_foe_flow_stat_entry_reset(ppe, npu, i); } @@ -513,10 +553,17 @@ static void airoha_ppe_foe_flow_stats_update(struct airoha_ppe *ppe, { int type = FIELD_GET(AIROHA_FOE_IB1_BIND_PACKET_TYPE, hwe->ib1); u32 index, pse_port, val, *data, *ib2, *meter; + int ppe_num_stats_entries; u8 nbq; - index = airoha_ppe_foe_get_flow_stats_index(ppe, hash); - if (index >= PPE_STATS_NUM_ENTRIES) + ppe_num_stats_entries = airoha_ppe_get_total_num_stats_entries(ppe); + if (ppe_num_stats_entries < 0) + return; + + if (airoha_ppe_foe_get_flow_stats_index(ppe, hash, &index)) + return; + + if (index >= ppe_num_stats_entries) return; if (type == PPE_PKT_TYPE_BRIDGE) { @@ -1158,11 +1205,19 @@ static int airoha_ppe_flow_offload_destroy(struct airoha_eth *eth, void airoha_ppe_foe_entry_get_stats(struct airoha_ppe *ppe, u32 hash, struct airoha_foe_stats64 *stats) { - u32 index = airoha_ppe_foe_get_flow_stats_index(ppe, hash); struct airoha_eth *eth = ppe->eth; + int ppe_num_stats_entries; struct airoha_npu *npu; + u32 index; + + ppe_num_stats_entries = airoha_ppe_get_total_num_stats_entries(ppe); + if (ppe_num_stats_entries < 0) + return; - if (index >= PPE_STATS_NUM_ENTRIES) + if (airoha_ppe_foe_get_flow_stats_index(ppe, hash, &index)) + return; + + if (index >= ppe_num_stats_entries) return; rcu_read_lock(); @@ -1257,7 +1312,7 @@ static int airoha_ppe_offload_setup(struct airoha_eth *eth) { struct airoha_npu *npu = airoha_ppe_npu_get(eth); struct airoha_ppe *ppe = eth->ppe; - int err; + int err, ppe_num_stats_entries; if (IS_ERR(npu)) return PTR_ERR(npu); @@ -1266,9 +1321,10 @@ static int airoha_ppe_offload_setup(struct airoha_eth *eth) if (err) goto error_npu_put; - if (PPE_STATS_NUM_ENTRIES) { + ppe_num_stats_entries = airoha_ppe_get_total_num_stats_entries(ppe); + if (ppe_num_stats_entries > 0) { err = npu->ops.ppe_init_stats(npu, ppe->foe_stats_dma, - PPE_STATS_NUM_ENTRIES); + ppe_num_stats_entries); if (err) goto error_npu_put; } @@ -1405,8 +1461,8 @@ EXPORT_SYMBOL_GPL(airoha_ppe_put_dev); int airoha_ppe_init(struct airoha_eth *eth) { + int foe_size, err, ppe_num_stats_entries; struct airoha_ppe *ppe; - int foe_size, err; ppe = devm_kzalloc(eth->dev, sizeof(*ppe), GFP_KERNEL); if (!ppe) @@ -1431,8 +1487,9 @@ int airoha_ppe_init(struct airoha_eth *eth) if (!ppe->foe_flow) return -ENOMEM; - foe_size = PPE_STATS_NUM_ENTRIES * sizeof(*ppe->foe_stats); - if (foe_size) { + ppe_num_stats_entries = airoha_ppe_get_total_num_stats_entries(ppe); + if (ppe_num_stats_entries > 0) { + foe_size = ppe_num_stats_entries * sizeof(*ppe->foe_stats); ppe->foe_stats = dmam_alloc_coherent(eth->dev, foe_size, &ppe->foe_stats_dma, GFP_KERNEL); From 5863b4e065e2253ef05684f728a04e4972046bcb Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 17 Oct 2025 11:06:13 +0200 Subject: [PATCH 165/867] net: airoha: Add airoha_eth_soc_data struct Introduce airoha_eth_soc_data struct to contain differences between various SoC. Move XSI reset names in airoha_eth_soc_data. This is a preliminary patch to enable AN7583 ethernet controller support in airoha-eth driver. Co-developed-by: Christian Marangi Signed-off-by: Christian Marangi Reviewed-by: Simon Horman Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20251017-an7583-eth-support-v3-4-f28319666667@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/airoha/airoha_eth.c | 42 +++++++++++++++++++----- drivers/net/ethernet/airoha/airoha_eth.h | 17 ++++++++-- 2 files changed, 48 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 5825f6f29a92e..c9cebe6752eb5 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -1387,8 +1387,7 @@ static int airoha_hw_init(struct platform_device *pdev, int err, i; /* disable xsi */ - err = reset_control_bulk_assert(ARRAY_SIZE(eth->xsi_rsts), - eth->xsi_rsts); + err = reset_control_bulk_assert(eth->soc->num_xsi_rsts, eth->xsi_rsts); if (err) return err; @@ -2922,6 +2921,7 @@ static int airoha_alloc_gdm_port(struct airoha_eth *eth, static int airoha_probe(struct platform_device *pdev) { + struct reset_control_bulk_data *xsi_rsts; struct device_node *np; struct airoha_eth *eth; int i, err; @@ -2930,6 +2930,10 @@ static int airoha_probe(struct platform_device *pdev) if (!eth) return -ENOMEM; + eth->soc = of_device_get_match_data(&pdev->dev); + if (!eth->soc) + return -EINVAL; + eth->dev = &pdev->dev; err = dma_set_mask_and_coherent(eth->dev, DMA_BIT_MASK(32)); @@ -2954,13 +2958,18 @@ static int airoha_probe(struct platform_device *pdev) return err; } - eth->xsi_rsts[0].id = "xsi-mac"; - eth->xsi_rsts[1].id = "hsi0-mac"; - eth->xsi_rsts[2].id = "hsi1-mac"; - eth->xsi_rsts[3].id = "hsi-mac"; - eth->xsi_rsts[4].id = "xfp-mac"; + xsi_rsts = devm_kzalloc(eth->dev, + eth->soc->num_xsi_rsts * sizeof(*xsi_rsts), + GFP_KERNEL); + if (err) + return err; + + eth->xsi_rsts = xsi_rsts; + for (i = 0; i < eth->soc->num_xsi_rsts; i++) + eth->xsi_rsts[i].id = eth->soc->xsi_rsts_names[i]; + err = devm_reset_control_bulk_get_exclusive(eth->dev, - ARRAY_SIZE(eth->xsi_rsts), + eth->soc->num_xsi_rsts, eth->xsi_rsts); if (err) { dev_err(eth->dev, "failed to get bulk xsi reset lines\n"); @@ -3048,8 +3057,23 @@ static void airoha_remove(struct platform_device *pdev) platform_set_drvdata(pdev, NULL); } +static const char * const en7581_xsi_rsts_names[] = { + "xsi-mac", + "hsi0-mac", + "hsi1-mac", + "hsi-mac", + "xfp-mac", +}; + +static const struct airoha_eth_soc_data en7581_soc_data = { + .version = 0x7581, + .xsi_rsts_names = en7581_xsi_rsts_names, + .num_xsi_rsts = ARRAY_SIZE(en7581_xsi_rsts_names), + .num_ppe = 2, +}; + static const struct of_device_id of_airoha_match[] = { - { .compatible = "airoha,en7581-eth" }, + { .compatible = "airoha,en7581-eth", .data = &en7581_soc_data }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, of_airoha_match); diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index 1f7e34a5f457c..cb7e198e40eeb 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -21,7 +21,6 @@ #define AIROHA_MAX_NUM_IRQ_BANKS 4 #define AIROHA_MAX_DSA_PORTS 7 #define AIROHA_MAX_NUM_RSTS 3 -#define AIROHA_MAX_NUM_XSI_RSTS 5 #define AIROHA_MAX_MTU 9216 #define AIROHA_MAX_PACKET_SIZE 2048 #define AIROHA_NUM_QOS_CHANNELS 4 @@ -556,9 +555,18 @@ struct airoha_ppe { struct dentry *debugfs_dir; }; +struct airoha_eth_soc_data { + u16 version; + const char * const *xsi_rsts_names; + int num_xsi_rsts; + int num_ppe; +}; + struct airoha_eth { struct device *dev; + const struct airoha_eth_soc_data *soc; + unsigned long state; void __iomem *fe_regs; @@ -568,7 +576,7 @@ struct airoha_eth { struct rhashtable flow_table; struct reset_control_bulk_data rsts[AIROHA_MAX_NUM_RSTS]; - struct reset_control_bulk_data xsi_rsts[AIROHA_MAX_NUM_XSI_RSTS]; + struct reset_control_bulk_data *xsi_rsts; struct net_device *napi_dev; @@ -611,6 +619,11 @@ static inline bool airhoa_is_lan_gdm_port(struct airoha_gdm_port *port) return port->id == 1; } +static inline bool airoha_is_7581(struct airoha_eth *eth) +{ + return eth->soc->version == 0x7581; +} + bool airoha_is_valid_gdm_port(struct airoha_eth *eth, struct airoha_gdm_port *port); From ef9449f080b61920cdc3d3106f8ffc2d9ba8b861 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 17 Oct 2025 11:06:14 +0200 Subject: [PATCH 166/867] net: airoha: Generalize airoha_ppe2_is_enabled routine Rename airoha_ppe2_is_enabled() in airoha_ppe_is_enabled() and generalize it in order to check if each PPE module is enabled. Rely on airoha_ppe_is_enabled routine to properly initialize PPE for AN7583 SoC since AN7583 does not support PPE2. Reviewed-by: Simon Horman Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20251017-an7583-eth-support-v3-5-f28319666667@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/airoha/airoha_eth.c | 32 ++++++++++++++++-------- drivers/net/ethernet/airoha/airoha_eth.h | 1 + drivers/net/ethernet/airoha/airoha_ppe.c | 17 +++++++------ 3 files changed, 32 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index c9cebe6752eb5..dea856ddf242d 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -297,8 +297,11 @@ static void airoha_fe_pse_ports_init(struct airoha_eth *eth) int q; all_rsv = airoha_fe_get_pse_all_rsv(eth); - /* hw misses PPE2 oq rsv */ - all_rsv += PSE_RSV_PAGES * pse_port_num_queues[FE_PSE_PORT_PPE2]; + if (airoha_ppe_is_enabled(eth, 1)) { + /* hw misses PPE2 oq rsv */ + all_rsv += PSE_RSV_PAGES * + pse_port_num_queues[FE_PSE_PORT_PPE2]; + } airoha_fe_set(eth, REG_FE_PSE_BUF_SET, all_rsv); /* CMD1 */ @@ -335,13 +338,17 @@ static void airoha_fe_pse_ports_init(struct airoha_eth *eth) for (q = 4; q < pse_port_num_queues[FE_PSE_PORT_CDM4]; q++) airoha_fe_set_pse_oq_rsv(eth, FE_PSE_PORT_CDM4, q, PSE_QUEUE_RSV_PAGES); - /* PPE2 */ - for (q = 0; q < pse_port_num_queues[FE_PSE_PORT_PPE2]; q++) { - if (q < pse_port_num_queues[FE_PSE_PORT_PPE2] / 2) - airoha_fe_set_pse_oq_rsv(eth, FE_PSE_PORT_PPE2, q, - PSE_QUEUE_RSV_PAGES); - else - airoha_fe_set_pse_oq_rsv(eth, FE_PSE_PORT_PPE2, q, 0); + if (airoha_ppe_is_enabled(eth, 1)) { + /* PPE2 */ + for (q = 0; q < pse_port_num_queues[FE_PSE_PORT_PPE2]; q++) { + if (q < pse_port_num_queues[FE_PSE_PORT_PPE2] / 2) + airoha_fe_set_pse_oq_rsv(eth, FE_PSE_PORT_PPE2, + q, + PSE_QUEUE_RSV_PAGES); + else + airoha_fe_set_pse_oq_rsv(eth, FE_PSE_PORT_PPE2, + q, 0); + } } /* GMD4 */ for (q = 0; q < pse_port_num_queues[FE_PSE_PORT_GDM4]; q++) @@ -1762,8 +1769,11 @@ static int airoha_dev_init(struct net_device *dev) airhoha_set_gdm2_loopback(port); fallthrough; case 2: - pse_port = FE_PSE_PORT_PPE2; - break; + if (airoha_ppe_is_enabled(eth, 1)) { + pse_port = FE_PSE_PORT_PPE2; + break; + } + fallthrough; default: pse_port = FE_PSE_PORT_PPE1; break; diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index cb7e198e40eeb..81b1e5f273df2 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -627,6 +627,7 @@ static inline bool airoha_is_7581(struct airoha_eth *eth) bool airoha_is_valid_gdm_port(struct airoha_eth *eth, struct airoha_gdm_port *port); +bool airoha_ppe_is_enabled(struct airoha_eth *eth, int index); void airoha_ppe_check_skb(struct airoha_ppe_dev *dev, struct sk_buff *skb, u16 hash, bool rx_wlan); int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev, void *type_data); diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 22ecece0e33ef..505a3005f7db1 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -50,9 +50,12 @@ static int airoha_ppe_get_total_num_stats_entries(struct airoha_ppe *ppe) return num_stats; } -static bool airoha_ppe2_is_enabled(struct airoha_eth *eth) +bool airoha_ppe_is_enabled(struct airoha_eth *eth, int index) { - return airoha_fe_rr(eth, REG_PPE_GLO_CFG(1)) & PPE_GLO_CFG_EN_MASK; + if (index >= eth->soc->num_ppe) + return false; + + return airoha_fe_rr(eth, REG_PPE_GLO_CFG(index)) & PPE_GLO_CFG_EN_MASK; } static u32 airoha_ppe_get_timestamp(struct airoha_ppe *ppe) @@ -120,7 +123,7 @@ static void airoha_ppe_hw_init(struct airoha_ppe *ppe) AIROHA_MAX_MTU)); } - if (airoha_ppe2_is_enabled(eth)) { + if (airoha_ppe_is_enabled(eth, 1)) { sram_num_entries = PPE1_SRAM_NUM_ENTRIES; sram_num_stats_entries = airoha_ppe_get_num_stats_entries(ppe); @@ -518,7 +521,7 @@ static int airoha_ppe_foe_get_flow_stats_index(struct airoha_ppe *ppe, return ppe_num_stats_entries; *index = hash; - if (airoha_ppe2_is_enabled(ppe->eth) && + if (airoha_ppe_is_enabled(ppe->eth, 1) && hash >= ppe_num_stats_entries) *index = *index - PPE_STATS_NUM_ENTRIES; @@ -613,7 +616,7 @@ airoha_ppe_foe_get_entry_locked(struct airoha_ppe *ppe, u32 hash) u32 val; int i; - ppe2 = airoha_ppe2_is_enabled(ppe->eth) && + ppe2 = airoha_ppe_is_enabled(ppe->eth, 1) && hash >= PPE1_SRAM_NUM_ENTRIES; airoha_fe_wr(ppe->eth, REG_PPE_RAM_CTRL(ppe2), FIELD_PREP(PPE_SRAM_CTRL_ENTRY_MASK, hash) | @@ -691,7 +694,7 @@ static int airoha_ppe_foe_commit_entry(struct airoha_ppe *ppe, if (hash < PPE_SRAM_NUM_ENTRIES) { dma_addr_t addr = ppe->foe_dma + hash * sizeof(*hwe); - bool ppe2 = airoha_ppe2_is_enabled(eth) && + bool ppe2 = airoha_ppe_is_enabled(eth, 1) && hash >= PPE1_SRAM_NUM_ENTRIES; err = npu->ops.ppe_foe_commit_entry(npu, addr, sizeof(*hwe), @@ -1286,7 +1289,7 @@ static int airoha_ppe_flush_sram_entries(struct airoha_ppe *ppe, int i, sram_num_entries = PPE_SRAM_NUM_ENTRIES; struct airoha_foe_entry *hwe = ppe->foe; - if (airoha_ppe2_is_enabled(ppe->eth)) + if (airoha_ppe_is_enabled(ppe->eth, 1)) sram_num_entries = sram_num_entries / 2; for (i = 0; i < sram_num_entries; i++) From 5bd1d1fd48ea9f8300b211540d946899c7f96480 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 17 Oct 2025 11:06:15 +0200 Subject: [PATCH 167/867] net: airoha: ppe: Move PPE memory info in airoha_eth_soc_data struct AN7583 SoC runs a single PPE device while EN7581 runs two of them. Moreover PPE SRAM in AN7583 SoC is reduced to 8K (while SRAM is 16K on EN7581). Take into account PPE memory layout during PPE configuration. Reviewed-by: Simon Horman Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20251017-an7583-eth-support-v3-6-f28319666667@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/airoha/airoha_eth.h | 10 +- drivers/net/ethernet/airoha/airoha_ppe.c | 133 +++++++++--------- .../net/ethernet/airoha/airoha_ppe_debugfs.c | 3 +- 3 files changed, 70 insertions(+), 76 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index 81b1e5f273df2..df168d798699d 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -47,14 +47,9 @@ #define QDMA_METER_IDX(_n) ((_n) & 0xff) #define QDMA_METER_GROUP(_n) (((_n) >> 8) & 0x3) -#define PPE_NUM 2 -#define PPE1_SRAM_NUM_ENTRIES (8 * 1024) -#define PPE_SRAM_NUM_ENTRIES (PPE_NUM * PPE1_SRAM_NUM_ENTRIES) -#define PPE1_STATS_NUM_ENTRIES (4 * 1024) -#define PPE_STATS_NUM_ENTRIES (PPE_NUM * PPE1_STATS_NUM_ENTRIES) +#define PPE_SRAM_NUM_ENTRIES (8 * 1024) +#define PPE_STATS_NUM_ENTRIES (4 * 1024) #define PPE_DRAM_NUM_ENTRIES (16 * 1024) -#define PPE_NUM_ENTRIES (PPE_SRAM_NUM_ENTRIES + PPE_DRAM_NUM_ENTRIES) -#define PPE_HASH_MASK (PPE_NUM_ENTRIES - 1) #define PPE_ENTRY_SIZE 80 #define PPE_RAM_NUM_ENTRIES_SHIFT(_n) (__ffs((_n) >> 10)) @@ -634,6 +629,7 @@ int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev, void *type_data); int airoha_ppe_init(struct airoha_eth *eth); void airoha_ppe_deinit(struct airoha_eth *eth); void airoha_ppe_init_upd_mem(struct airoha_gdm_port *port); +u32 airoha_ppe_get_total_num_entries(struct airoha_ppe *ppe); struct airoha_foe_entry *airoha_ppe_foe_get_entry(struct airoha_ppe *ppe, u32 hash); void airoha_ppe_foe_entry_get_stats(struct airoha_ppe *ppe, u32 hash, diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 505a3005f7db1..d142660e79104 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -37,19 +37,36 @@ static int airoha_ppe_get_num_stats_entries(struct airoha_ppe *ppe) if (!IS_ENABLED(CONFIG_NET_AIROHA_FLOW_STATS)) return -EOPNOTSUPP; - return PPE1_STATS_NUM_ENTRIES; + return PPE_STATS_NUM_ENTRIES; } static int airoha_ppe_get_total_num_stats_entries(struct airoha_ppe *ppe) { int num_stats = airoha_ppe_get_num_stats_entries(ppe); - if (num_stats > 0) - num_stats = num_stats * PPE_NUM; + if (num_stats > 0) { + struct airoha_eth *eth = ppe->eth; + + num_stats = num_stats * eth->soc->num_ppe; + } return num_stats; } +static u32 airoha_ppe_get_total_sram_num_entries(struct airoha_ppe *ppe) +{ + struct airoha_eth *eth = ppe->eth; + + return PPE_SRAM_NUM_ENTRIES * eth->soc->num_ppe; +} + +u32 airoha_ppe_get_total_num_entries(struct airoha_ppe *ppe) +{ + u32 sram_num_entries = airoha_ppe_get_total_sram_num_entries(ppe); + + return sram_num_entries + PPE_DRAM_NUM_ENTRIES; +} + bool airoha_ppe_is_enabled(struct airoha_eth *eth, int index) { if (index >= eth->soc->num_ppe) @@ -67,14 +84,22 @@ static u32 airoha_ppe_get_timestamp(struct airoha_ppe *ppe) static void airoha_ppe_hw_init(struct airoha_ppe *ppe) { - u32 sram_tb_size, sram_num_entries, dram_num_entries; + u32 sram_ppe_num_data_entries = PPE_SRAM_NUM_ENTRIES, sram_num_entries; + u32 sram_tb_size, dram_num_entries; struct airoha_eth *eth = ppe->eth; int i, sram_num_stats_entries; - sram_tb_size = PPE_SRAM_NUM_ENTRIES * sizeof(struct airoha_foe_entry); + sram_num_entries = airoha_ppe_get_total_sram_num_entries(ppe); + sram_tb_size = sram_num_entries * sizeof(struct airoha_foe_entry); dram_num_entries = PPE_RAM_NUM_ENTRIES_SHIFT(PPE_DRAM_NUM_ENTRIES); - for (i = 0; i < PPE_NUM; i++) { + sram_num_stats_entries = airoha_ppe_get_num_stats_entries(ppe); + if (sram_num_stats_entries > 0) + sram_ppe_num_data_entries -= sram_num_stats_entries; + sram_ppe_num_data_entries = + PPE_RAM_NUM_ENTRIES_SHIFT(sram_ppe_num_data_entries); + + for (i = 0; i < eth->soc->num_ppe; i++) { int p; airoha_fe_wr(eth, REG_PPE_TB_BASE(i), @@ -106,10 +131,16 @@ static void airoha_ppe_hw_init(struct airoha_ppe *ppe) airoha_fe_rmw(eth, REG_PPE_TB_CFG(i), PPE_TB_CFG_SEARCH_MISS_MASK | + PPE_SRAM_TB_NUM_ENTRY_MASK | + PPE_DRAM_TB_NUM_ENTRY_MASK | PPE_TB_CFG_KEEPALIVE_MASK | PPE_TB_ENTRY_SIZE_MASK, FIELD_PREP(PPE_TB_CFG_SEARCH_MISS_MASK, 3) | - FIELD_PREP(PPE_TB_ENTRY_SIZE_MASK, 0)); + FIELD_PREP(PPE_TB_ENTRY_SIZE_MASK, 0) | + FIELD_PREP(PPE_SRAM_TB_NUM_ENTRY_MASK, + sram_ppe_num_data_entries) | + FIELD_PREP(PPE_DRAM_TB_NUM_ENTRY_MASK, + dram_num_entries)); airoha_fe_wr(eth, REG_PPE_HASH_SEED(i), PPE_HASH_SEED); @@ -122,45 +153,6 @@ static void airoha_ppe_hw_init(struct airoha_ppe *ppe) FIELD_PREP(FP1_EGRESS_MTU_MASK, AIROHA_MAX_MTU)); } - - if (airoha_ppe_is_enabled(eth, 1)) { - sram_num_entries = PPE1_SRAM_NUM_ENTRIES; - sram_num_stats_entries = - airoha_ppe_get_num_stats_entries(ppe); - if (sram_num_stats_entries > 0) - sram_num_entries -= sram_num_stats_entries; - sram_num_entries = PPE_RAM_NUM_ENTRIES_SHIFT(sram_num_entries); - - airoha_fe_rmw(eth, REG_PPE_TB_CFG(0), - PPE_SRAM_TB_NUM_ENTRY_MASK | - PPE_DRAM_TB_NUM_ENTRY_MASK, - FIELD_PREP(PPE_SRAM_TB_NUM_ENTRY_MASK, - sram_num_entries) | - FIELD_PREP(PPE_DRAM_TB_NUM_ENTRY_MASK, - dram_num_entries)); - airoha_fe_rmw(eth, REG_PPE_TB_CFG(1), - PPE_SRAM_TB_NUM_ENTRY_MASK | - PPE_DRAM_TB_NUM_ENTRY_MASK, - FIELD_PREP(PPE_SRAM_TB_NUM_ENTRY_MASK, - sram_num_entries) | - FIELD_PREP(PPE_DRAM_TB_NUM_ENTRY_MASK, - dram_num_entries)); - } else { - sram_num_entries = PPE_SRAM_NUM_ENTRIES; - sram_num_stats_entries = - airoha_ppe_get_total_num_stats_entries(ppe); - if (sram_num_stats_entries > 0) - sram_num_entries -= sram_num_stats_entries; - sram_num_entries = PPE_RAM_NUM_ENTRIES_SHIFT(sram_num_entries); - - airoha_fe_rmw(eth, REG_PPE_TB_CFG(0), - PPE_SRAM_TB_NUM_ENTRY_MASK | - PPE_DRAM_TB_NUM_ENTRY_MASK, - FIELD_PREP(PPE_SRAM_TB_NUM_ENTRY_MASK, - sram_num_entries) | - FIELD_PREP(PPE_DRAM_TB_NUM_ENTRY_MASK, - dram_num_entries)); - } } static void airoha_ppe_flow_mangle_eth(const struct flow_action_entry *act, void *eth) @@ -459,9 +451,11 @@ static int airoha_ppe_foe_entry_set_ipv6_tuple(struct airoha_foe_entry *hwe, return 0; } -static u32 airoha_ppe_foe_get_entry_hash(struct airoha_foe_entry *hwe) +static u32 airoha_ppe_foe_get_entry_hash(struct airoha_ppe *ppe, + struct airoha_foe_entry *hwe) { int type = FIELD_GET(AIROHA_FOE_IB1_BIND_PACKET_TYPE, hwe->ib1); + u32 ppe_hash_mask = airoha_ppe_get_total_num_entries(ppe) - 1; u32 hash, hv1, hv2, hv3; switch (type) { @@ -499,14 +493,14 @@ static u32 airoha_ppe_foe_get_entry_hash(struct airoha_foe_entry *hwe) case PPE_PKT_TYPE_IPV6_6RD: default: WARN_ON_ONCE(1); - return PPE_HASH_MASK; + return ppe_hash_mask; } hash = (hv1 & hv2) | ((~hv1) & hv3); hash = (hash >> 24) | ((hash & 0xffffff) << 8); hash ^= hv1 ^ hv2 ^ hv3; hash ^= hash >> 16; - hash &= PPE_NUM_ENTRIES - 1; + hash &= ppe_hash_mask; return hash; } @@ -607,9 +601,11 @@ static void airoha_ppe_foe_flow_stats_update(struct airoha_ppe *ppe, static struct airoha_foe_entry * airoha_ppe_foe_get_entry_locked(struct airoha_ppe *ppe, u32 hash) { + u32 sram_num_entries = airoha_ppe_get_total_sram_num_entries(ppe); + lockdep_assert_held(&ppe_lock); - if (hash < PPE_SRAM_NUM_ENTRIES) { + if (hash < sram_num_entries) { u32 *hwe = ppe->foe + hash * sizeof(struct airoha_foe_entry); struct airoha_eth *eth = ppe->eth; bool ppe2; @@ -617,7 +613,7 @@ airoha_ppe_foe_get_entry_locked(struct airoha_ppe *ppe, u32 hash) int i; ppe2 = airoha_ppe_is_enabled(ppe->eth, 1) && - hash >= PPE1_SRAM_NUM_ENTRIES; + hash >= PPE_SRAM_NUM_ENTRIES; airoha_fe_wr(ppe->eth, REG_PPE_RAM_CTRL(ppe2), FIELD_PREP(PPE_SRAM_CTRL_ENTRY_MASK, hash) | PPE_SRAM_CTRL_REQ_MASK); @@ -668,6 +664,7 @@ static int airoha_ppe_foe_commit_entry(struct airoha_ppe *ppe, struct airoha_foe_entry *e, u32 hash, bool rx_wlan) { + u32 sram_num_entries = airoha_ppe_get_total_sram_num_entries(ppe); struct airoha_foe_entry *hwe = ppe->foe + hash * sizeof(*hwe); u32 ts = airoha_ppe_get_timestamp(ppe); struct airoha_eth *eth = ppe->eth; @@ -692,10 +689,10 @@ static int airoha_ppe_foe_commit_entry(struct airoha_ppe *ppe, if (!rx_wlan) airoha_ppe_foe_flow_stats_update(ppe, npu, hwe, hash); - if (hash < PPE_SRAM_NUM_ENTRIES) { + if (hash < sram_num_entries) { dma_addr_t addr = ppe->foe_dma + hash * sizeof(*hwe); bool ppe2 = airoha_ppe_is_enabled(eth, 1) && - hash >= PPE1_SRAM_NUM_ENTRIES; + hash >= PPE_SRAM_NUM_ENTRIES; err = npu->ops.ppe_foe_commit_entry(npu, addr, sizeof(*hwe), hash, ppe2); @@ -822,7 +819,7 @@ static void airoha_ppe_foe_insert_entry(struct airoha_ppe *ppe, if (state == AIROHA_FOE_STATE_BIND) goto unlock; - index = airoha_ppe_foe_get_entry_hash(hwe); + index = airoha_ppe_foe_get_entry_hash(ppe, hwe); hlist_for_each_entry_safe(e, n, &ppe->foe_flow[index], list) { if (e->type == FLOW_TYPE_L2_SUBFLOW) { state = FIELD_GET(AIROHA_FOE_IB1_BIND_STATE, hwe->ib1); @@ -882,7 +879,7 @@ static int airoha_ppe_foe_flow_commit_entry(struct airoha_ppe *ppe, if (type == PPE_PKT_TYPE_BRIDGE) return airoha_ppe_foe_l2_flow_commit_entry(ppe, e); - hash = airoha_ppe_foe_get_entry_hash(&e->data); + hash = airoha_ppe_foe_get_entry_hash(ppe, &e->data); e->type = FLOW_TYPE_L4; e->hash = 0xffff; @@ -1286,17 +1283,15 @@ static int airoha_ppe_flow_offload_cmd(struct airoha_eth *eth, static int airoha_ppe_flush_sram_entries(struct airoha_ppe *ppe, struct airoha_npu *npu) { - int i, sram_num_entries = PPE_SRAM_NUM_ENTRIES; + u32 sram_num_entries = airoha_ppe_get_total_sram_num_entries(ppe); struct airoha_foe_entry *hwe = ppe->foe; + int i; - if (airoha_ppe_is_enabled(ppe->eth, 1)) - sram_num_entries = sram_num_entries / 2; - - for (i = 0; i < sram_num_entries; i++) + for (i = 0; i < PPE_SRAM_NUM_ENTRIES; i++) memset(&hwe[i], 0, sizeof(*hwe)); return npu->ops.ppe_flush_sram_entries(npu, ppe->foe_dma, - PPE_SRAM_NUM_ENTRIES); + sram_num_entries); } static struct airoha_npu *airoha_ppe_npu_get(struct airoha_eth *eth) @@ -1372,9 +1367,10 @@ void airoha_ppe_check_skb(struct airoha_ppe_dev *dev, struct sk_buff *skb, u16 hash, bool rx_wlan) { struct airoha_ppe *ppe = dev->priv; + u32 ppe_hash_mask = airoha_ppe_get_total_num_entries(ppe) - 1; u16 now, diff; - if (hash > PPE_HASH_MASK) + if (hash > ppe_hash_mask) return; now = (u16)jiffies; @@ -1465,6 +1461,7 @@ EXPORT_SYMBOL_GPL(airoha_ppe_put_dev); int airoha_ppe_init(struct airoha_eth *eth) { int foe_size, err, ppe_num_stats_entries; + u32 ppe_num_entries; struct airoha_ppe *ppe; ppe = devm_kzalloc(eth->dev, sizeof(*ppe), GFP_KERNEL); @@ -1474,18 +1471,18 @@ int airoha_ppe_init(struct airoha_eth *eth) ppe->dev.ops.setup_tc_block_cb = airoha_ppe_setup_tc_block_cb; ppe->dev.ops.check_skb = airoha_ppe_check_skb; ppe->dev.priv = ppe; + ppe->eth = eth; + eth->ppe = ppe; - foe_size = PPE_NUM_ENTRIES * sizeof(struct airoha_foe_entry); + ppe_num_entries = airoha_ppe_get_total_num_entries(ppe); + foe_size = ppe_num_entries * sizeof(struct airoha_foe_entry); ppe->foe = dmam_alloc_coherent(eth->dev, foe_size, &ppe->foe_dma, GFP_KERNEL); if (!ppe->foe) return -ENOMEM; - ppe->eth = eth; - eth->ppe = ppe; - ppe->foe_flow = devm_kzalloc(eth->dev, - PPE_NUM_ENTRIES * sizeof(*ppe->foe_flow), + ppe_num_entries * sizeof(*ppe->foe_flow), GFP_KERNEL); if (!ppe->foe_flow) return -ENOMEM; @@ -1500,7 +1497,7 @@ int airoha_ppe_init(struct airoha_eth *eth) return -ENOMEM; } - ppe->foe_check_time = devm_kzalloc(eth->dev, PPE_NUM_ENTRIES, + ppe->foe_check_time = devm_kzalloc(eth->dev, ppe_num_entries, GFP_KERNEL); if (!ppe->foe_check_time) return -ENOMEM; diff --git a/drivers/net/ethernet/airoha/airoha_ppe_debugfs.c b/drivers/net/ethernet/airoha/airoha_ppe_debugfs.c index 05a756233f6a4..0112c41150bb0 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe_debugfs.c +++ b/drivers/net/ethernet/airoha/airoha_ppe_debugfs.c @@ -53,9 +53,10 @@ static int airoha_ppe_debugfs_foe_show(struct seq_file *m, void *private, [AIROHA_FOE_STATE_FIN] = "FIN", }; struct airoha_ppe *ppe = m->private; + u32 ppe_num_entries = airoha_ppe_get_total_num_entries(ppe); int i; - for (i = 0; i < PPE_NUM_ENTRIES; i++) { + for (i = 0; i < ppe_num_entries; i++) { const char *state_str, *type_str = "UNKNOWN"; void *src_addr = NULL, *dest_addr = NULL; u16 *src_port = NULL, *dest_port = NULL; From 41139125f5c70e0f66f0cc4ac1b3a62f5801ab42 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 17 Oct 2025 11:06:16 +0200 Subject: [PATCH 168/867] net: airoha: ppe: Remove airoha_ppe_is_enabled() where not necessary Now each PPE has always PPE_STATS_NUM_ENTRIES entries so we do not need to run airoha_ppe_is_enabled routine to check if the hash refers to PPE1 or PPE2. Reviewed-by: Simon Horman Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20251017-an7583-eth-support-v3-7-f28319666667@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/airoha/airoha_ppe.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index d142660e79104..195d97e61197e 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -514,10 +514,8 @@ static int airoha_ppe_foe_get_flow_stats_index(struct airoha_ppe *ppe, if (ppe_num_stats_entries < 0) return ppe_num_stats_entries; - *index = hash; - if (airoha_ppe_is_enabled(ppe->eth, 1) && - hash >= ppe_num_stats_entries) - *index = *index - PPE_STATS_NUM_ENTRIES; + *index = hash >= ppe_num_stats_entries ? hash - PPE_STATS_NUM_ENTRIES + : hash; return 0; } @@ -607,13 +605,11 @@ airoha_ppe_foe_get_entry_locked(struct airoha_ppe *ppe, u32 hash) if (hash < sram_num_entries) { u32 *hwe = ppe->foe + hash * sizeof(struct airoha_foe_entry); + bool ppe2 = hash >= PPE_SRAM_NUM_ENTRIES; struct airoha_eth *eth = ppe->eth; - bool ppe2; u32 val; int i; - ppe2 = airoha_ppe_is_enabled(ppe->eth, 1) && - hash >= PPE_SRAM_NUM_ENTRIES; airoha_fe_wr(ppe->eth, REG_PPE_RAM_CTRL(ppe2), FIELD_PREP(PPE_SRAM_CTRL_ENTRY_MASK, hash) | PPE_SRAM_CTRL_REQ_MASK); @@ -691,8 +687,7 @@ static int airoha_ppe_foe_commit_entry(struct airoha_ppe *ppe, if (hash < sram_num_entries) { dma_addr_t addr = ppe->foe_dma + hash * sizeof(*hwe); - bool ppe2 = airoha_ppe_is_enabled(eth, 1) && - hash >= PPE_SRAM_NUM_ENTRIES; + bool ppe2 = hash >= PPE_SRAM_NUM_ENTRIES; err = npu->ops.ppe_foe_commit_entry(npu, addr, sizeof(*hwe), hash, ppe2); From 306b78f5035af4bd011753c5a6b12812515caa6c Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 17 Oct 2025 11:06:17 +0200 Subject: [PATCH 169/867] net: airoha: ppe: Configure SRAM PPE entries via the cpu Introduce airoha_ppe_foe_commit_sram_entry routine in order to configure the SRAM PPE entries directly via the CPU instead of using the NPU APIs. This is a preliminary patch to enable netfilter flowtable hw offload for AN7583 SoC. Reviewed-by: Simon Horman Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20251017-an7583-eth-support-v3-8-f28319666667@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/airoha/airoha_ppe.c | 30 ++++++++++++++++++------ 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 195d97e61197e..46755bc60a8e8 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -656,6 +656,27 @@ static bool airoha_ppe_foe_compare_entry(struct airoha_flow_table_entry *e, return !memcmp(&e->data.d, &hwe->d, len - sizeof(hwe->ib1)); } +static int airoha_ppe_foe_commit_sram_entry(struct airoha_ppe *ppe, u32 hash) +{ + struct airoha_foe_entry *hwe = ppe->foe + hash * sizeof(*hwe); + bool ppe2 = hash >= PPE_SRAM_NUM_ENTRIES; + u32 *ptr = (u32 *)hwe, val; + int i; + + for (i = 0; i < sizeof(*hwe) / sizeof(*ptr); i++) + airoha_fe_wr(ppe->eth, REG_PPE_RAM_ENTRY(ppe2, i), ptr[i]); + + wmb(); + airoha_fe_wr(ppe->eth, REG_PPE_RAM_CTRL(ppe2), + FIELD_PREP(PPE_SRAM_CTRL_ENTRY_MASK, hash) | + PPE_SRAM_CTRL_WR_MASK | PPE_SRAM_CTRL_REQ_MASK); + + return read_poll_timeout_atomic(airoha_fe_rr, val, + val & PPE_SRAM_CTRL_ACK_MASK, + 10, 100, false, ppe->eth, + REG_PPE_RAM_CTRL(ppe2)); +} + static int airoha_ppe_foe_commit_entry(struct airoha_ppe *ppe, struct airoha_foe_entry *e, u32 hash, bool rx_wlan) @@ -685,13 +706,8 @@ static int airoha_ppe_foe_commit_entry(struct airoha_ppe *ppe, if (!rx_wlan) airoha_ppe_foe_flow_stats_update(ppe, npu, hwe, hash); - if (hash < sram_num_entries) { - dma_addr_t addr = ppe->foe_dma + hash * sizeof(*hwe); - bool ppe2 = hash >= PPE_SRAM_NUM_ENTRIES; - - err = npu->ops.ppe_foe_commit_entry(npu, addr, sizeof(*hwe), - hash, ppe2); - } + if (hash < sram_num_entries) + err = airoha_ppe_foe_commit_sram_entry(ppe, hash); unlock: rcu_read_unlock(); From 620d7b91aadbd4eb930895b07e34f0a155a9d3c1 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 17 Oct 2025 11:06:18 +0200 Subject: [PATCH 170/867] net: airoha: ppe: Flush PPE SRAM table during PPE setup Rely on airoha_ppe_foe_commit_sram_entry routine to flush SRAM PPE table entries. This patch allow moving PPE SRAM flush during PPE setup and avoid dumping uninitialized values via the debugfs if no entries are offloaded yet. Reviewed-by: Simon Horman Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20251017-an7583-eth-support-v3-9-f28319666667@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/airoha/airoha_ppe.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 46755bc60a8e8..4b038673cefe2 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -1291,18 +1291,22 @@ static int airoha_ppe_flow_offload_cmd(struct airoha_eth *eth, return -EOPNOTSUPP; } -static int airoha_ppe_flush_sram_entries(struct airoha_ppe *ppe, - struct airoha_npu *npu) +static int airoha_ppe_flush_sram_entries(struct airoha_ppe *ppe) { u32 sram_num_entries = airoha_ppe_get_total_sram_num_entries(ppe); struct airoha_foe_entry *hwe = ppe->foe; - int i; + int i, err = 0; + + for (i = 0; i < sram_num_entries; i++) { + int err; - for (i = 0; i < PPE_SRAM_NUM_ENTRIES; i++) memset(&hwe[i], 0, sizeof(*hwe)); + err = airoha_ppe_foe_commit_sram_entry(ppe, i); + if (err) + break; + } - return npu->ops.ppe_flush_sram_entries(npu, ppe->foe_dma, - sram_num_entries); + return err; } static struct airoha_npu *airoha_ppe_npu_get(struct airoha_eth *eth) @@ -1339,10 +1343,6 @@ static int airoha_ppe_offload_setup(struct airoha_eth *eth) } airoha_ppe_hw_init(ppe); - err = airoha_ppe_flush_sram_entries(ppe, npu); - if (err) - goto error_npu_put; - airoha_ppe_foe_flow_stats_reset(ppe, npu); rcu_assign_pointer(eth->npu, npu); @@ -1513,6 +1513,10 @@ int airoha_ppe_init(struct airoha_eth *eth) if (!ppe->foe_check_time) return -ENOMEM; + err = airoha_ppe_flush_sram_entries(ppe); + if (err) + return err; + err = rhashtable_init(ð->flow_table, &airoha_flow_table_params); if (err) return err; From c71a7a861ef02aa2bebb18c2f3385aa3f19094e0 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 17 Oct 2025 11:06:19 +0200 Subject: [PATCH 171/867] net: airoha: Select default ppe cpu port in airoha_dev_init() Select the PPE default cpu port in airoha_dev_init routine. This patch allows to distribute the load between the two available cpu ports (FE_PSE_PORT_CDM1 and FE_PSE_PORT_CDM2) if the device is running a single PPE module (e.g. 7583) selecting the cpu port based on the use QDMA device. For multi-PPE device (e.g. 7581) assign FE_PSE_PORT_CDM1 to PPE1 and FE_PSE_PORT_CDM2 to PPE2. Reviewed-by: Simon Horman Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20251017-an7583-eth-support-v3-10-f28319666667@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/airoha/airoha_eth.c | 38 ++++++++++-------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index dea856ddf242d..4e338c126dd3f 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -531,25 +531,6 @@ static int airoha_fe_init(struct airoha_eth *eth) /* disable IFC by default */ airoha_fe_clear(eth, REG_FE_CSR_IFC_CFG, FE_IFC_EN_MASK); - airoha_fe_wr(eth, REG_PPE_DFT_CPORT0(0), - FIELD_PREP(DFT_CPORT_MASK(7), FE_PSE_PORT_CDM1) | - FIELD_PREP(DFT_CPORT_MASK(6), FE_PSE_PORT_CDM1) | - FIELD_PREP(DFT_CPORT_MASK(5), FE_PSE_PORT_CDM1) | - FIELD_PREP(DFT_CPORT_MASK(4), FE_PSE_PORT_CDM1) | - FIELD_PREP(DFT_CPORT_MASK(3), FE_PSE_PORT_CDM1) | - FIELD_PREP(DFT_CPORT_MASK(2), FE_PSE_PORT_CDM1) | - FIELD_PREP(DFT_CPORT_MASK(1), FE_PSE_PORT_CDM1) | - FIELD_PREP(DFT_CPORT_MASK(0), FE_PSE_PORT_CDM1)); - airoha_fe_wr(eth, REG_PPE_DFT_CPORT0(1), - FIELD_PREP(DFT_CPORT_MASK(7), FE_PSE_PORT_CDM2) | - FIELD_PREP(DFT_CPORT_MASK(6), FE_PSE_PORT_CDM2) | - FIELD_PREP(DFT_CPORT_MASK(5), FE_PSE_PORT_CDM2) | - FIELD_PREP(DFT_CPORT_MASK(4), FE_PSE_PORT_CDM2) | - FIELD_PREP(DFT_CPORT_MASK(3), FE_PSE_PORT_CDM2) | - FIELD_PREP(DFT_CPORT_MASK(2), FE_PSE_PORT_CDM2) | - FIELD_PREP(DFT_CPORT_MASK(1), FE_PSE_PORT_CDM2) | - FIELD_PREP(DFT_CPORT_MASK(0), FE_PSE_PORT_CDM2)); - /* enable 1:N vlan action, init vlan table */ airoha_fe_set(eth, REG_MC_VLAN_EN, MC_VLAN_EN_MASK); @@ -1756,8 +1737,10 @@ static void airhoha_set_gdm2_loopback(struct airoha_gdm_port *port) static int airoha_dev_init(struct net_device *dev) { struct airoha_gdm_port *port = netdev_priv(dev); - struct airoha_eth *eth = port->qdma->eth; - u32 pse_port; + struct airoha_qdma *qdma = port->qdma; + struct airoha_eth *eth = qdma->eth; + u32 pse_port, fe_cpu_port; + u8 ppe_id; airoha_set_macaddr(port, dev->dev_addr); @@ -1770,16 +1753,27 @@ static int airoha_dev_init(struct net_device *dev) fallthrough; case 2: if (airoha_ppe_is_enabled(eth, 1)) { + /* For PPE2 always use secondary cpu port. */ + fe_cpu_port = FE_PSE_PORT_CDM2; pse_port = FE_PSE_PORT_PPE2; break; } fallthrough; - default: + default: { + u8 qdma_id = qdma - ð->qdma[0]; + + /* For PPE1 select cpu port according to the running QDMA. */ + fe_cpu_port = qdma_id ? FE_PSE_PORT_CDM2 : FE_PSE_PORT_CDM1; pse_port = FE_PSE_PORT_PPE1; break; } + } airoha_set_gdm_port_fwd_cfg(eth, REG_GDM_FWD_CFG(port->id), pse_port); + ppe_id = pse_port == FE_PSE_PORT_PPE2 ? 1 : 0; + airoha_fe_rmw(eth, REG_PPE_DFT_CPORT0(ppe_id), + DFT_CPORT_MASK(port->id), + fe_cpu_port << __ffs(DFT_CPORT_MASK(port->id))); return 0; } From 9d5b5219f672c80bed4d4e15f0068e648cdca43b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 17 Oct 2025 11:06:20 +0200 Subject: [PATCH 172/867] net: airoha: Refactor src port configuration in airhoha_set_gdm2_loopback AN7583 chipset relies on different definitions for source-port identifier used for hw offloading. In order to support hw offloading in AN7583 controller, refactor src port configuration in airhoha_set_gdm2_loopback routine and introduce get_src_port_id callback. Reviewed-by: Simon Horman Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20251017-an7583-eth-support-v3-11-f28319666667@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/airoha/airoha_eth.c | 82 ++++++++++++++++------- drivers/net/ethernet/airoha/airoha_eth.h | 18 +++-- drivers/net/ethernet/airoha/airoha_regs.h | 6 +- 3 files changed, 73 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 4e338c126dd3f..4671f906a68c7 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -1682,13 +1682,17 @@ static int airoha_dev_set_macaddr(struct net_device *dev, void *p) return 0; } -static void airhoha_set_gdm2_loopback(struct airoha_gdm_port *port) +static int airhoha_set_gdm2_loopback(struct airoha_gdm_port *port) { - u32 pse_port = port->id == 3 ? FE_PSE_PORT_GDM3 : FE_PSE_PORT_GDM4; + u32 val, pse_port, chan = port->id == AIROHA_GDM3_IDX ? 4 : 0; struct airoha_eth *eth = port->qdma->eth; - u32 chan = port->id == 3 ? 4 : 0; + /* XXX: handle XSI_USB_PORT and XSI_PCE1_PORT */ + u32 nbq = port->id == AIROHA_GDM3_IDX ? 4 : 0; + int src_port; /* Forward the traffic to the proper GDM port */ + pse_port = port->id == AIROHA_GDM3_IDX ? FE_PSE_PORT_GDM3 + : FE_PSE_PORT_GDM4; airoha_set_gdm_port_fwd_cfg(eth, REG_GDM_FWD_CFG(2), pse_port); airoha_fe_clear(eth, REG_GDM_FWD_CFG(2), GDM_STRIP_CRC); @@ -1709,29 +1713,25 @@ static void airhoha_set_gdm2_loopback(struct airoha_gdm_port *port) airoha_fe_clear(eth, REG_FE_VIP_PORT_EN, BIT(2)); airoha_fe_clear(eth, REG_FE_IFC_PORT_EN, BIT(2)); - if (port->id == 3) { - /* FIXME: handle XSI_PCE1_PORT */ - airoha_fe_rmw(eth, REG_FE_WAN_PORT, - WAN1_EN_MASK | WAN1_MASK | WAN0_MASK, - FIELD_PREP(WAN0_MASK, HSGMII_LAN_PCIE0_SRCPORT)); - airoha_fe_rmw(eth, - REG_SP_DFT_CPORT(HSGMII_LAN_PCIE0_SRCPORT >> 3), - SP_CPORT_PCIE0_MASK, - FIELD_PREP(SP_CPORT_PCIE0_MASK, - FE_PSE_PORT_CDM2)); - } else { - /* FIXME: handle XSI_USB_PORT */ + src_port = eth->soc->ops.get_src_port_id(port, nbq); + if (src_port < 0) + return src_port; + + airoha_fe_rmw(eth, REG_FE_WAN_PORT, + WAN1_EN_MASK | WAN1_MASK | WAN0_MASK, + FIELD_PREP(WAN0_MASK, src_port)); + val = src_port & SP_CPORT_DFT_MASK; + airoha_fe_rmw(eth, + REG_SP_DFT_CPORT(src_port >> fls(SP_CPORT_DFT_MASK)), + SP_CPORT_MASK(val), + FE_PSE_PORT_CDM2 << __ffs(SP_CPORT_MASK(val))); + + if (port->id != AIROHA_GDM3_IDX) airoha_fe_rmw(eth, REG_SRC_PORT_FC_MAP6, FC_ID_OF_SRC_PORT24_MASK, FIELD_PREP(FC_ID_OF_SRC_PORT24_MASK, 2)); - airoha_fe_rmw(eth, REG_FE_WAN_PORT, - WAN1_EN_MASK | WAN1_MASK | WAN0_MASK, - FIELD_PREP(WAN0_MASK, HSGMII_LAN_ETH_SRCPORT)); - airoha_fe_rmw(eth, - REG_SP_DFT_CPORT(HSGMII_LAN_ETH_SRCPORT >> 3), - SP_CPORT_ETH_MASK, - FIELD_PREP(SP_CPORT_ETH_MASK, FE_PSE_PORT_CDM2)); - } + + return 0; } static int airoha_dev_init(struct net_device *dev) @@ -1748,8 +1748,13 @@ static int airoha_dev_init(struct net_device *dev) case 3: case 4: /* If GDM2 is active we can't enable loopback */ - if (!eth->ports[1]) - airhoha_set_gdm2_loopback(port); + if (!eth->ports[1]) { + int err; + + err = airhoha_set_gdm2_loopback(port); + if (err) + return err; + } fallthrough; case 2: if (airoha_ppe_is_enabled(eth, 1)) { @@ -3069,11 +3074,38 @@ static const char * const en7581_xsi_rsts_names[] = { "xfp-mac", }; +static int airoha_en7581_get_src_port_id(struct airoha_gdm_port *port, int nbq) +{ + switch (port->id) { + case 3: + /* 7581 SoC supports PCIe serdes on GDM3 port */ + if (nbq == 4) + return HSGMII_LAN_7581_PCIE0_SRCPORT; + if (nbq == 5) + return HSGMII_LAN_7581_PCIE1_SRCPORT; + break; + case 4: + /* 7581 SoC supports eth and usb serdes on GDM4 port */ + if (!nbq) + return HSGMII_LAN_7581_ETH_SRCPORT; + if (nbq == 1) + return HSGMII_LAN_7581_USB_SRCPORT; + break; + default: + break; + } + + return -EINVAL; +} + static const struct airoha_eth_soc_data en7581_soc_data = { .version = 0x7581, .xsi_rsts_names = en7581_xsi_rsts_names, .num_xsi_rsts = ARRAY_SIZE(en7581_xsi_rsts_names), .num_ppe = 2, + .ops = { + .get_src_port_id = airoha_en7581_get_src_port_id, + }, }; static const struct of_device_id of_airoha_match[] = { diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index df168d798699d..e09579da8c78a 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -67,10 +67,10 @@ enum { }; enum { - HSGMII_LAN_PCIE0_SRCPORT = 0x16, - HSGMII_LAN_PCIE1_SRCPORT, - HSGMII_LAN_ETH_SRCPORT, - HSGMII_LAN_USB_SRCPORT, + HSGMII_LAN_7581_PCIE0_SRCPORT = 0x16, + HSGMII_LAN_7581_PCIE1_SRCPORT, + HSGMII_LAN_7581_ETH_SRCPORT, + HSGMII_LAN_7581_USB_SRCPORT, }; enum { @@ -99,6 +99,13 @@ enum { CRSN_25 = 0x19, }; +enum airoha_gdm_index { + AIROHA_GDM1_IDX = 1, + AIROHA_GDM2_IDX = 2, + AIROHA_GDM3_IDX = 3, + AIROHA_GDM4_IDX = 4, +}; + enum { FE_PSE_PORT_CDM1, FE_PSE_PORT_GDM1, @@ -555,6 +562,9 @@ struct airoha_eth_soc_data { const char * const *xsi_rsts_names; int num_xsi_rsts; int num_ppe; + struct { + int (*get_src_port_id)(struct airoha_gdm_port *port, int nbq); + } ops; }; struct airoha_eth { diff --git a/drivers/net/ethernet/airoha/airoha_regs.h b/drivers/net/ethernet/airoha/airoha_regs.h index 69c5a143db8c0..ebcce00d9bc6f 100644 --- a/drivers/net/ethernet/airoha/airoha_regs.h +++ b/drivers/net/ethernet/airoha/airoha_regs.h @@ -383,10 +383,8 @@ #define REG_MC_VLAN_DATA 0x2108 #define REG_SP_DFT_CPORT(_n) (0x20e0 + ((_n) << 2)) -#define SP_CPORT_PCIE1_MASK GENMASK(31, 28) -#define SP_CPORT_PCIE0_MASK GENMASK(27, 24) -#define SP_CPORT_USB_MASK GENMASK(7, 4) -#define SP_CPORT_ETH_MASK GENMASK(7, 4) +#define SP_CPORT_DFT_MASK GENMASK(2, 0) +#define SP_CPORT_MASK(_n) GENMASK(3 + ((_n) << 2), ((_n) << 2)) #define REG_SRC_PORT_FC_MAP6 0x2298 #define FC_ID_OF_SRC_PORT27_MASK GENMASK(28, 24) From 63f283d36b1fb06b55ae609a1f679544f5f66057 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 17 Oct 2025 11:06:21 +0200 Subject: [PATCH 173/867] net: airoha: ppe: Do not use magic numbers in airoha_ppe_foe_get_entry_locked() Explicit the size of entries pointed by hwe pointer in airoha_ppe_foe_get_entry_locked routine Reviewed-by: Simon Horman Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20251017-an7583-eth-support-v3-12-f28319666667@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/airoha/airoha_ppe.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 4b038673cefe2..eda95107cd1da 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -619,7 +619,8 @@ airoha_ppe_foe_get_entry_locked(struct airoha_ppe *ppe, u32 hash) REG_PPE_RAM_CTRL(ppe2))) return NULL; - for (i = 0; i < sizeof(struct airoha_foe_entry) / 4; i++) + for (i = 0; i < sizeof(struct airoha_foe_entry) / sizeof(*hwe); + i++) hwe[i] = airoha_fe_rr(eth, REG_PPE_RAM_ENTRY(ppe2, i)); } From e4e5ce823bdd4601bd75ae7c206ae35e7c2fa60b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 17 Oct 2025 11:06:22 +0200 Subject: [PATCH 174/867] net: airoha: Add AN7583 SoC support Introduce support for AN7583 ethernet controller to airoha-eth dirver. Reviewed-by: Simon Horman Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20251017-an7583-eth-support-v3-13-f28319666667@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/airoha/airoha_eth.c | 68 ++++++++++++++++++++++-- drivers/net/ethernet/airoha/airoha_eth.h | 11 ++++ drivers/net/ethernet/airoha/airoha_ppe.c | 3 ++ 3 files changed, 77 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 4671f906a68c7..8483ea02603e2 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -1684,10 +1684,8 @@ static int airoha_dev_set_macaddr(struct net_device *dev, void *p) static int airhoha_set_gdm2_loopback(struct airoha_gdm_port *port) { - u32 val, pse_port, chan = port->id == AIROHA_GDM3_IDX ? 4 : 0; struct airoha_eth *eth = port->qdma->eth; - /* XXX: handle XSI_USB_PORT and XSI_PCE1_PORT */ - u32 nbq = port->id == AIROHA_GDM3_IDX ? 4 : 0; + u32 val, pse_port, chan, nbq; int src_port; /* Forward the traffic to the proper GDM port */ @@ -1699,6 +1697,8 @@ static int airhoha_set_gdm2_loopback(struct airoha_gdm_port *port) /* Enable GDM2 loopback */ airoha_fe_wr(eth, REG_GDM_TXCHN_EN(2), 0xffffffff); airoha_fe_wr(eth, REG_GDM_RXCHN_EN(2), 0xffff); + + chan = port->id == AIROHA_GDM3_IDX ? airoha_is_7581(eth) ? 4 : 3 : 0; airoha_fe_rmw(eth, REG_GDM_LPBK_CFG(2), LPBK_CHAN_MASK | LPBK_MODE_MASK | LPBK_EN_MASK, FIELD_PREP(LPBK_CHAN_MASK, chan) | @@ -1713,6 +1713,8 @@ static int airhoha_set_gdm2_loopback(struct airoha_gdm_port *port) airoha_fe_clear(eth, REG_FE_VIP_PORT_EN, BIT(2)); airoha_fe_clear(eth, REG_FE_IFC_PORT_EN, BIT(2)); + /* XXX: handle XSI_USB_PORT and XSI_PCE1_PORT */ + nbq = port->id == AIROHA_GDM3_IDX && airoha_is_7581(eth) ? 4 : 0; src_port = eth->soc->ops.get_src_port_id(port, nbq); if (src_port < 0) return src_port; @@ -1726,7 +1728,7 @@ static int airhoha_set_gdm2_loopback(struct airoha_gdm_port *port) SP_CPORT_MASK(val), FE_PSE_PORT_CDM2 << __ffs(SP_CPORT_MASK(val))); - if (port->id != AIROHA_GDM3_IDX) + if (port->id != AIROHA_GDM3_IDX && airoha_is_7581(eth)) airoha_fe_rmw(eth, REG_SRC_PORT_FC_MAP6, FC_ID_OF_SRC_PORT24_MASK, FIELD_PREP(FC_ID_OF_SRC_PORT24_MASK, 2)); @@ -1895,6 +1897,22 @@ static bool airoha_dev_tx_queue_busy(struct airoha_queue *q, u32 nr_frags) return index >= tail; } +static int airoha_get_fe_port(struct airoha_gdm_port *port) +{ + struct airoha_qdma *qdma = port->qdma; + struct airoha_eth *eth = qdma->eth; + + switch (eth->soc->version) { + case 0x7583: + return port->id == AIROHA_GDM3_IDX ? FE_PSE_PORT_GDM3 + : port->id; + case 0x7581: + default: + return port->id == AIROHA_GDM4_IDX ? FE_PSE_PORT_GDM4 + : port->id; + } +} + static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -1935,7 +1953,7 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, } } - fport = port->id == 4 ? FE_PSE_PORT_GDM4 : port->id; + fport = airoha_get_fe_port(port); msg1 = FIELD_PREP(QDMA_ETH_TXMSG_FPORT_MASK, fport) | FIELD_PREP(QDMA_ETH_TXMSG_METER_MASK, 0x7f); @@ -3098,6 +3116,35 @@ static int airoha_en7581_get_src_port_id(struct airoha_gdm_port *port, int nbq) return -EINVAL; } +static const char * const an7583_xsi_rsts_names[] = { + "xsi-mac", + "hsi0-mac", + "hsi1-mac", + "xfp-mac", +}; + +static int airoha_an7583_get_src_port_id(struct airoha_gdm_port *port, int nbq) +{ + switch (port->id) { + case 3: + /* 7583 SoC supports eth serdes on GDM3 port */ + if (!nbq) + return HSGMII_LAN_7583_ETH_SRCPORT; + break; + case 4: + /* 7583 SoC supports PCIe and USB serdes on GDM4 port */ + if (!nbq) + return HSGMII_LAN_7583_PCIE_SRCPORT; + if (nbq == 1) + return HSGMII_LAN_7583_USB_SRCPORT; + break; + default: + break; + } + + return -EINVAL; +} + static const struct airoha_eth_soc_data en7581_soc_data = { .version = 0x7581, .xsi_rsts_names = en7581_xsi_rsts_names, @@ -3108,8 +3155,19 @@ static const struct airoha_eth_soc_data en7581_soc_data = { }, }; +static const struct airoha_eth_soc_data an7583_soc_data = { + .version = 0x7583, + .xsi_rsts_names = an7583_xsi_rsts_names, + .num_xsi_rsts = ARRAY_SIZE(an7583_xsi_rsts_names), + .num_ppe = 1, + .ops = { + .get_src_port_id = airoha_an7583_get_src_port_id, + }, +}; + static const struct of_device_id of_airoha_match[] = { { .compatible = "airoha,en7581-eth", .data = &en7581_soc_data }, + { .compatible = "airoha,an7583-eth", .data = &an7583_soc_data }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, of_airoha_match); diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index e09579da8c78a..eb27a4ff51984 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -73,6 +73,12 @@ enum { HSGMII_LAN_7581_USB_SRCPORT, }; +enum { + HSGMII_LAN_7583_ETH_SRCPORT = 0x16, + HSGMII_LAN_7583_PCIE_SRCPORT = 0x18, + HSGMII_LAN_7583_USB_SRCPORT, +}; + enum { XSI_PCIE0_VIP_PORT_MASK = BIT(22), XSI_PCIE1_VIP_PORT_MASK = BIT(23), @@ -629,6 +635,11 @@ static inline bool airoha_is_7581(struct airoha_eth *eth) return eth->soc->version == 0x7581; } +static inline bool airoha_is_7583(struct airoha_eth *eth) +{ + return eth->soc->version == 0x7583; +} + bool airoha_is_valid_gdm_port(struct airoha_eth *eth, struct airoha_gdm_port *port); diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index eda95107cd1da..c373f21d95f5a 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -37,6 +37,9 @@ static int airoha_ppe_get_num_stats_entries(struct airoha_ppe *ppe) if (!IS_ENABLED(CONFIG_NET_AIROHA_FLOW_STATS)) return -EOPNOTSUPP; + if (airoha_is_7583(ppe->eth)) + return -EOPNOTSUPP; + return PPE_STATS_NUM_ENTRIES; } From 3ff9bcecce83f12169ab3e42671bd76554ca521a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Oct 2025 13:37:12 +0000 Subject: [PATCH 175/867] net: avoid extra access to sk->sk_wmem_alloc in sock_wfree() UDP TX packets destructor is sock_wfree(). It suffers from a cache line bouncing in sock_def_write_space_wfree(). Instead of reading sk->sk_wmem_alloc after we just did an atomic RMW on it, use __refcount_sub_and_test() to get the old value for free, and pass the new value to sock_def_write_space_wfree(). Add __sock_writeable() helper. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20251017133712.2842665-1-edumazet@google.com Signed-off-by: Paolo Abeni --- include/net/sock.h | 6 +++++- net/core/sock.c | 14 ++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 5c564f114ae95..01ce231603db0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2607,12 +2607,16 @@ static inline struct page_frag *sk_page_frag(struct sock *sk) bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag); +static inline bool __sock_writeable(const struct sock *sk, int wmem_alloc) +{ + return wmem_alloc < (READ_ONCE(sk->sk_sndbuf) >> 1); +} /* * Default write policy as shown to user space via poll/select/SIGIO */ static inline bool sock_writeable(const struct sock *sk) { - return refcount_read(&sk->sk_wmem_alloc) < (READ_ONCE(sk->sk_sndbuf) >> 1); + return __sock_writeable(sk, refcount_read(&sk->sk_wmem_alloc)); } static inline gfp_t gfp_any(void) diff --git a/net/core/sock.c b/net/core/sock.c index b78533fb92686..a99132cc09656 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -155,7 +155,7 @@ static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); -static void sock_def_write_space_wfree(struct sock *sk); +static void sock_def_write_space_wfree(struct sock *sk, int wmem_alloc); static void sock_def_write_space(struct sock *sk); /** @@ -2659,16 +2659,18 @@ EXPORT_SYMBOL_GPL(sk_setup_caps); */ void sock_wfree(struct sk_buff *skb) { - struct sock *sk = skb->sk; unsigned int len = skb->truesize; + struct sock *sk = skb->sk; bool free; + int old; if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) { if (sock_flag(sk, SOCK_RCU_FREE) && sk->sk_write_space == sock_def_write_space) { rcu_read_lock(); - free = refcount_sub_and_test(len, &sk->sk_wmem_alloc); - sock_def_write_space_wfree(sk); + free = __refcount_sub_and_test(len, &sk->sk_wmem_alloc, + &old); + sock_def_write_space_wfree(sk, old - len); rcu_read_unlock(); if (unlikely(free)) __sk_free(sk); @@ -3612,12 +3614,12 @@ static void sock_def_write_space(struct sock *sk) * for SOCK_RCU_FREE sockets under RCU read section and after putting * ->sk_wmem_alloc. */ -static void sock_def_write_space_wfree(struct sock *sk) +static void sock_def_write_space_wfree(struct sock *sk, int wmem_alloc) { /* Do not wake up a writer until he can make "significant" * progress. --DaveM */ - if (sock_writeable(sk)) { + if (__sock_writeable(sk, wmem_alloc)) { struct socket_wq *wq = rcu_dereference(sk->sk_wq); /* rely on refcount_sub from sock_wfree() */ From 0364ca33097da247ef6539c765405657324f243e Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Mon, 20 Oct 2025 10:09:13 -0700 Subject: [PATCH 176/867] devlink: region: correct port region lookup to use port_ops The function devlink_port_region_get_by_name() incorrectly uses region->ops->name to compare the region name. as it is not any critical impact as ops and port_ops define as union for devlink_region but as per code logic it should refer port_ops here. No functional impact as ops and port_ops are part of same union, and name is the first member of both. Update it to use region->port_ops->name to properly reference the name of the devlink port region. Signed-off-by: Alok Tiwari Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20251020170916.1741808-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- net/devlink/region.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/devlink/region.c b/net/devlink/region.c index 63fb297f6d678..d6e5805cf3a09 100644 --- a/net/devlink/region.c +++ b/net/devlink/region.c @@ -50,7 +50,7 @@ devlink_port_region_get_by_name(struct devlink_port *port, struct devlink_region *region; list_for_each_entry(region, &port->region_list, list) - if (!strcmp(region->ops->name, region_name)) + if (!strcmp(region->port_ops->name, region_name)) return region; return NULL; From 28098defc79fe7d29e6bfe4eb6312991f6bdc3d3 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 17 Oct 2025 03:41:52 +0000 Subject: [PATCH 177/867] net: add a common function to compute features for upper devices Some high level software drivers need to compute features from lower devices. But each has their own implementations and may lost some feature compute. Let's use one common function to compute features for kinds of these devices. The new helper uses the current bond implementation as the reference one, as the latter already handles all the relevant aspects: netdev features, TSO limits and dst retention. Suggested-by: Paolo Abeni Signed-off-by: Hangbin Liu Reviewed-by: Sabrina Dubroca Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20251017034155.61990-2-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/netdev_features.h | 18 +++++++ include/linux/netdevice.h | 1 + net/core/dev.c | 88 +++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+) diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 7a01c518e5730..93e4da7046a10 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -255,6 +255,24 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) NETIF_F_GSO_UDP_TUNNEL | \ NETIF_F_GSO_UDP_TUNNEL_CSUM) +/* virtual device features */ +#define MASTER_UPPER_DEV_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ + NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | \ + NETIF_F_GSO_ENCAP_ALL | \ + NETIF_F_HIGHDMA | NETIF_F_LRO) + +#define MASTER_UPPER_DEV_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ + NETIF_F_RXCSUM | NETIF_F_GSO_SOFTWARE | \ + NETIF_F_GSO_PARTIAL) + +#define MASTER_UPPER_DEV_MPLS_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ + NETIF_F_GSO_SOFTWARE) + +#define MASTER_UPPER_DEV_XFRM_FEATURES (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM | \ + NETIF_F_GSO_ESP) + +#define MASTER_UPPER_DEV_GSO_PARTIAL_FEATURES (NETIF_F_GSO_ESP) + static inline netdev_features_t netdev_base_features(netdev_features_t features) { features &= ~NETIF_F_ONE_FOR_ALL; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d1a687444b275..7f5aad5cc9a19 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -5304,6 +5304,7 @@ static inline netdev_features_t netdev_add_tso_features(netdev_features_t featur int __netdev_update_features(struct net_device *dev); void netdev_update_features(struct net_device *dev); void netdev_change_features(struct net_device *dev); +void netdev_compute_master_upper_features(struct net_device *dev, bool update_header); void netif_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev); diff --git a/net/core/dev.c b/net/core/dev.c index 9482b905c66a5..378c2d010faf2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -12693,6 +12693,94 @@ netdev_features_t netdev_increment_features(netdev_features_t all, } EXPORT_SYMBOL(netdev_increment_features); +/** + * netdev_compute_master_upper_features - compute feature from lowers + * @dev: the upper device + * @update_header: whether to update upper device's header_len/headroom/tailroom + * + * Recompute the upper device's feature based on all lower devices. + */ +void netdev_compute_master_upper_features(struct net_device *dev, bool update_header) +{ + unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; + netdev_features_t gso_partial_features = MASTER_UPPER_DEV_GSO_PARTIAL_FEATURES; + netdev_features_t xfrm_features = MASTER_UPPER_DEV_XFRM_FEATURES; + netdev_features_t mpls_features = MASTER_UPPER_DEV_MPLS_FEATURES; + netdev_features_t vlan_features = MASTER_UPPER_DEV_VLAN_FEATURES; + netdev_features_t enc_features = MASTER_UPPER_DEV_ENC_FEATURES; + unsigned short max_header_len = ETH_HLEN; + unsigned int tso_max_size = TSO_MAX_SIZE; + unsigned short max_headroom = 0; + unsigned short max_tailroom = 0; + u16 tso_max_segs = TSO_MAX_SEGS; + struct net_device *lower_dev; + struct list_head *iter; + + mpls_features = netdev_base_features(mpls_features); + vlan_features = netdev_base_features(vlan_features); + enc_features = netdev_base_features(enc_features); + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + gso_partial_features = netdev_increment_features(gso_partial_features, + lower_dev->gso_partial_features, + MASTER_UPPER_DEV_GSO_PARTIAL_FEATURES); + + vlan_features = netdev_increment_features(vlan_features, + lower_dev->vlan_features, + MASTER_UPPER_DEV_VLAN_FEATURES); + + enc_features = netdev_increment_features(enc_features, + lower_dev->hw_enc_features, + MASTER_UPPER_DEV_ENC_FEATURES); + + if (IS_ENABLED(CONFIG_XFRM_OFFLOAD)) + xfrm_features = netdev_increment_features(xfrm_features, + lower_dev->hw_enc_features, + MASTER_UPPER_DEV_XFRM_FEATURES); + + mpls_features = netdev_increment_features(mpls_features, + lower_dev->mpls_features, + MASTER_UPPER_DEV_MPLS_FEATURES); + + dst_release_flag &= lower_dev->priv_flags; + + if (update_header) { + max_header_len = max(max_header_len, lower_dev->hard_header_len); + max_headroom = max(max_headroom, lower_dev->needed_headroom); + max_tailroom = max(max_tailroom, lower_dev->needed_tailroom); + } + + tso_max_size = min(tso_max_size, lower_dev->tso_max_size); + tso_max_segs = min(tso_max_segs, lower_dev->tso_max_segs); + } + + dev->gso_partial_features = gso_partial_features; + dev->vlan_features = vlan_features; + dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; + if (IS_ENABLED(CONFIG_XFRM_OFFLOAD)) + dev->hw_enc_features |= xfrm_features; + dev->mpls_features = mpls_features; + + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + if ((dev->priv_flags & IFF_XMIT_DST_RELEASE_PERM) && + dst_release_flag == (IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM)) + dev->priv_flags |= IFF_XMIT_DST_RELEASE; + + if (update_header) { + dev->hard_header_len = max_header_len; + dev->needed_headroom = max_headroom; + dev->needed_tailroom = max_tailroom; + } + + netif_set_tso_max_segs(dev, tso_max_segs); + netif_set_tso_max_size(dev, tso_max_size); + + netdev_change_features(dev); +} +EXPORT_SYMBOL(netdev_compute_master_upper_features); + static struct hlist_head * __net_init netdev_create_hash(void) { int i; From d4fde269a970666a30dd3abd0413273a06dd972d Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 17 Oct 2025 03:41:53 +0000 Subject: [PATCH 178/867] bonding: use common function to compute the features Use the new functon netdev_compute_master_upper_features() to compute the bonding features. Note that bond_compute_features() currently uses bond_for_each_slave() to traverse the lower devices list, and that is just a macro wrapper of netdev_for_each_lower_private(). We use similar helper netdev_for_each_lower_dev() in netdev_compute_master_upper_features() to iterate the slave device, as there is not need to get the private data. No functional change intended. Signed-off-by: Hangbin Liu Reviewed-by: Sabrina Dubroca Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20251017034155.61990-3-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 99 ++------------------------------- 1 file changed, 4 insertions(+), 95 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 4da619210c1fa..cd7da6ed8c6bb 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1468,97 +1468,6 @@ static netdev_features_t bond_fix_features(struct net_device *dev, return features; } -#define BOND_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ - NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | \ - NETIF_F_GSO_ENCAP_ALL | \ - NETIF_F_HIGHDMA | NETIF_F_LRO) - -#define BOND_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ - NETIF_F_RXCSUM | NETIF_F_GSO_SOFTWARE | \ - NETIF_F_GSO_PARTIAL) - -#define BOND_MPLS_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ - NETIF_F_GSO_SOFTWARE) - -#define BOND_GSO_PARTIAL_FEATURES (NETIF_F_GSO_ESP) - - -static void bond_compute_features(struct bonding *bond) -{ - netdev_features_t gso_partial_features = BOND_GSO_PARTIAL_FEATURES; - unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE | - IFF_XMIT_DST_RELEASE_PERM; - netdev_features_t vlan_features = BOND_VLAN_FEATURES; - netdev_features_t enc_features = BOND_ENC_FEATURES; -#ifdef CONFIG_XFRM_OFFLOAD - netdev_features_t xfrm_features = BOND_XFRM_FEATURES; -#endif /* CONFIG_XFRM_OFFLOAD */ - netdev_features_t mpls_features = BOND_MPLS_FEATURES; - struct net_device *bond_dev = bond->dev; - struct list_head *iter; - struct slave *slave; - unsigned short max_hard_header_len = ETH_HLEN; - unsigned int tso_max_size = TSO_MAX_SIZE; - u16 tso_max_segs = TSO_MAX_SEGS; - - if (!bond_has_slaves(bond)) - goto done; - - vlan_features = netdev_base_features(vlan_features); - mpls_features = netdev_base_features(mpls_features); - - bond_for_each_slave(bond, slave, iter) { - vlan_features = netdev_increment_features(vlan_features, - slave->dev->vlan_features, BOND_VLAN_FEATURES); - - enc_features = netdev_increment_features(enc_features, - slave->dev->hw_enc_features, - BOND_ENC_FEATURES); - -#ifdef CONFIG_XFRM_OFFLOAD - xfrm_features = netdev_increment_features(xfrm_features, - slave->dev->hw_enc_features, - BOND_XFRM_FEATURES); -#endif /* CONFIG_XFRM_OFFLOAD */ - - gso_partial_features = netdev_increment_features(gso_partial_features, - slave->dev->gso_partial_features, - BOND_GSO_PARTIAL_FEATURES); - - mpls_features = netdev_increment_features(mpls_features, - slave->dev->mpls_features, - BOND_MPLS_FEATURES); - - dst_release_flag &= slave->dev->priv_flags; - if (slave->dev->hard_header_len > max_hard_header_len) - max_hard_header_len = slave->dev->hard_header_len; - - tso_max_size = min(tso_max_size, slave->dev->tso_max_size); - tso_max_segs = min(tso_max_segs, slave->dev->tso_max_segs); - } - bond_dev->hard_header_len = max_hard_header_len; - -done: - bond_dev->gso_partial_features = gso_partial_features; - bond_dev->vlan_features = vlan_features; - bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL | - NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX; -#ifdef CONFIG_XFRM_OFFLOAD - bond_dev->hw_enc_features |= xfrm_features; -#endif /* CONFIG_XFRM_OFFLOAD */ - bond_dev->mpls_features = mpls_features; - netif_set_tso_max_segs(bond_dev, tso_max_segs); - netif_set_tso_max_size(bond_dev, tso_max_size); - - bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; - if ((bond_dev->priv_flags & IFF_XMIT_DST_RELEASE_PERM) && - dst_release_flag == (IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM)) - bond_dev->priv_flags |= IFF_XMIT_DST_RELEASE; - - netdev_change_features(bond_dev); -} - static void bond_setup_by_slave(struct net_device *bond_dev, struct net_device *slave_dev) { @@ -2273,7 +2182,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, } bond->slave_cnt++; - bond_compute_features(bond); + netdev_compute_master_upper_features(bond->dev, true); bond_set_carrier(bond); /* Needs to be called before bond_select_active_slave(), which will @@ -2525,7 +2434,7 @@ static int __bond_release_one(struct net_device *bond_dev, call_netdevice_notifiers(NETDEV_RELEASE, bond->dev); } - bond_compute_features(bond); + netdev_compute_master_upper_features(bond->dev, true); if (!(bond_dev->features & NETIF_F_VLAN_CHALLENGED) && (old_features & NETIF_F_VLAN_CHALLENGED)) slave_info(bond_dev, slave_dev, "last VLAN challenged slave left bond - VLAN blocking is removed\n"); @@ -4028,7 +3937,7 @@ static int bond_slave_netdev_event(unsigned long event, case NETDEV_FEAT_CHANGE: if (!bond->notifier_ctx) { bond->notifier_ctx = true; - bond_compute_features(bond); + netdev_compute_master_upper_features(bond->dev, true); bond->notifier_ctx = false; } break; @@ -6011,7 +5920,7 @@ void bond_setup(struct net_device *bond_dev) * capable */ - bond_dev->hw_features = BOND_VLAN_FEATURES | + bond_dev->hw_features = MASTER_UPPER_DEV_VLAN_FEATURES | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_RX | From 745cd46c2a47144dd656185b9be0a1e5e9b02d2d Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 17 Oct 2025 03:41:54 +0000 Subject: [PATCH 179/867] team: use common function to compute the features Use the new helper netdev_compute_master_upper_features() to compute the team device features. This helper performs both the feature computation and the netdev_change_features() call. Note that such change replace the lower layer traversing currently done using team->port_list with netdev_for_each_lower_dev(). Such change is safe as `port_list` contains exactly the same elements as `team->dev->adj_list.lower` and the helper is always invoked under the RTNL lock. With this change, the explicit netdev_change_features() in team_add_slave() can be safely removed, as team_port_add() already takes care of the notification via netdev_compute_master_upper_features(), and same thing for team_del_slave() This also fixes missing computations for MPLS, XFRM, and TSO/GSO partial features. Signed-off-by: Hangbin Liu Reviewed-by: Sabrina Dubroca Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20251017034155.61990-4-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/team/team_core.c | 83 +++--------------------------------- 1 file changed, 6 insertions(+), 77 deletions(-) diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index 17f07eb0ee52a..29dc04c299a3b 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -982,63 +982,6 @@ static void team_port_disable(struct team *team, team_lower_state_changed(port); } -#define TEAM_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ - NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | \ - NETIF_F_HIGHDMA | NETIF_F_LRO | \ - NETIF_F_GSO_ENCAP_ALL) - -#define TEAM_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ - NETIF_F_RXCSUM | NETIF_F_GSO_SOFTWARE) - -static void __team_compute_features(struct team *team) -{ - struct team_port *port; - netdev_features_t vlan_features = TEAM_VLAN_FEATURES; - netdev_features_t enc_features = TEAM_ENC_FEATURES; - unsigned short max_hard_header_len = ETH_HLEN; - unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE | - IFF_XMIT_DST_RELEASE_PERM; - - rcu_read_lock(); - if (list_empty(&team->port_list)) - goto done; - - vlan_features = netdev_base_features(vlan_features); - enc_features = netdev_base_features(enc_features); - - list_for_each_entry_rcu(port, &team->port_list, list) { - vlan_features = netdev_increment_features(vlan_features, - port->dev->vlan_features, - TEAM_VLAN_FEATURES); - enc_features = - netdev_increment_features(enc_features, - port->dev->hw_enc_features, - TEAM_ENC_FEATURES); - - dst_release_flag &= port->dev->priv_flags; - if (port->dev->hard_header_len > max_hard_header_len) - max_hard_header_len = port->dev->hard_header_len; - } -done: - rcu_read_unlock(); - - team->dev->vlan_features = vlan_features; - team->dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL | - NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX; - team->dev->hard_header_len = max_hard_header_len; - - team->dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; - if (dst_release_flag == (IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM)) - team->dev->priv_flags |= IFF_XMIT_DST_RELEASE; -} - -static void team_compute_features(struct team *team) -{ - __team_compute_features(team); - netdev_change_features(team->dev); -} - static int team_port_enter(struct team *team, struct team_port *port) { int err = 0; @@ -1300,7 +1243,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev, port->index = -1; list_add_tail_rcu(&port->list, &team->port_list); team_port_enable(team, port); - __team_compute_features(team); + netdev_compute_master_upper_features(team->dev, true); __team_port_change_port_added(port, !!netif_oper_up(port_dev)); __team_options_change_check(team); @@ -1382,7 +1325,7 @@ static int team_port_del(struct team *team, struct net_device *port_dev) dev_set_mtu(port_dev, port->orig.mtu); kfree_rcu(port, rcu); netdev_info(dev, "Port device %s removed\n", portname); - __team_compute_features(team); + netdev_compute_master_upper_features(team->dev, true); return 0; } @@ -1970,33 +1913,19 @@ static int team_add_slave(struct net_device *dev, struct net_device *port_dev, struct netlink_ext_ack *extack) { struct team *team = netdev_priv(dev); - int err; ASSERT_RTNL(); - err = team_port_add(team, port_dev, extack); - - if (!err) - netdev_change_features(dev); - - return err; + return team_port_add(team, port_dev, extack); } static int team_del_slave(struct net_device *dev, struct net_device *port_dev) { struct team *team = netdev_priv(dev); - int err; ASSERT_RTNL(); - err = team_port_del(team, port_dev); - - if (err) - return err; - - netdev_change_features(dev); - - return err; + return team_port_del(team, port_dev); } static netdev_features_t team_fix_features(struct net_device *dev, @@ -2190,7 +2119,7 @@ static void team_setup(struct net_device *dev) dev->features |= NETIF_F_GRO; - dev->hw_features = TEAM_VLAN_FEATURES | + dev->hw_features = MASTER_UPPER_DEV_VLAN_FEATURES | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_RX | @@ -2994,7 +2923,7 @@ static int team_device_event(struct notifier_block *unused, case NETDEV_FEAT_CHANGE: if (!port->team->notifier_ctx) { port->team->notifier_ctx = true; - team_compute_features(port->team); + netdev_compute_master_upper_features(port->team->dev, true); port->team->notifier_ctx = false; } break; From 0152747a528a185182bdb2cab973848a52d715ac Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 17 Oct 2025 03:41:55 +0000 Subject: [PATCH 180/867] net: bridge: use common function to compute the features Previously, bridge ignored all features propagation and DST retention, only handling explicitly the GSO limits. By switching to the new helper netdev_compute_master_upper_features(), the bridge now expose additional features, depending on the lowers capabilities. Since br_set_gso_limits() is already covered by the helper, it can be removed safely. Bridge has it's own way to update needed_headroom. So we don't need to update it in the helper. Signed-off-by: Hangbin Liu Reviewed-by: Sabrina Dubroca Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20251017034155.61990-5-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- net/bridge/br_if.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 98c5b9c3145f3..a6d4c44890fd6 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -525,20 +525,6 @@ void br_mtu_auto_adjust(struct net_bridge *br) br_opt_toggle(br, BROPT_MTU_SET_BY_USER, false); } -static void br_set_gso_limits(struct net_bridge *br) -{ - unsigned int tso_max_size = TSO_MAX_SIZE; - const struct net_bridge_port *p; - u16 tso_max_segs = TSO_MAX_SEGS; - - list_for_each_entry(p, &br->port_list, list) { - tso_max_size = min(tso_max_size, p->dev->tso_max_size); - tso_max_segs = min(tso_max_segs, p->dev->tso_max_segs); - } - netif_set_tso_max_size(br->dev, tso_max_size); - netif_set_tso_max_segs(br->dev, tso_max_segs); -} - /* * Recomputes features using slave's features */ @@ -652,8 +638,6 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, netdev_err(dev, "failed to sync bridge static fdb addresses to this port\n"); } - netdev_update_features(br->dev); - br_hr = br->dev->needed_headroom; dev_hr = netdev_get_fwd_headroom(dev); if (br_hr < dev_hr) @@ -694,7 +678,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); br_mtu_auto_adjust(br); - br_set_gso_limits(br); + + netdev_compute_master_upper_features(br->dev, false); kobject_uevent(&p->kobj, KOBJ_ADD); @@ -740,7 +725,6 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) del_nbp(p); br_mtu_auto_adjust(br); - br_set_gso_limits(br); spin_lock_bh(&br->lock); changed_addr = br_stp_recalculate_bridge_id(br); @@ -749,7 +733,7 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) if (changed_addr) call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); - netdev_update_features(br->dev); + netdev_compute_master_upper_features(br->dev, false); return 0; } From a9dff2b5f72b5ec21cec8e754a4fc2d1c03fd0a1 Mon Sep 17 00:00:00 2001 From: David Yang Date: Fri, 17 Oct 2025 14:08:53 +0800 Subject: [PATCH 181/867] dt-bindings: net: dsa: yt921x: Add Motorcomm YT921x switch support The Motorcomm YT921x series is a family of Ethernet switches with up to 8 internal GbE PHYs and up to 2 GMACs. Signed-off-by: David Yang Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20251017060859.326450-2-mmyangfl@gmail.com Signed-off-by: Jakub Kicinski --- .../bindings/net/dsa/motorcomm,yt921x.yaml | 167 ++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/dsa/motorcomm,yt921x.yaml diff --git a/Documentation/devicetree/bindings/net/dsa/motorcomm,yt921x.yaml b/Documentation/devicetree/bindings/net/dsa/motorcomm,yt921x.yaml new file mode 100644 index 0000000000000..33a6552e46fc0 --- /dev/null +++ b/Documentation/devicetree/bindings/net/dsa/motorcomm,yt921x.yaml @@ -0,0 +1,167 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/dsa/motorcomm,yt921x.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Motorcomm YT921x Ethernet switch family + +maintainers: + - David Yang + +description: | + The Motorcomm YT921x series is a family of Ethernet switches with up to 8 + internal GbE PHYs and up to 2 GMACs, including: + + - YT9215S / YT9215RB / YT9215SC: 5 GbE PHYs (Port 0-4) + 2 GMACs (Port 8-9) + - YT9213NB: 2 GbE PHYs (Port 1/3) + 1 GMAC (Port 9) + - YT9214NB: 2 GbE PHYs (Port 1/3) + 2 GMACs (Port 8-9) + - YT9218N: 8 GbE PHYs (Port 0-7) + - YT9218MB: 8 GbE PHYs (Port 0-7) + 2 GMACs (Port 8-9) + + Any port can be used as the CPU port. + +properties: + compatible: + const: motorcomm,yt9215 + + reg: + enum: [0x0, 0x1d] + + reset-gpios: + maxItems: 1 + + mdio: + $ref: /schemas/net/mdio.yaml# + unevaluatedProperties: false + description: + Internal MDIO bus for the internal GbE PHYs. PHY 0-7 are used for Port + 0-7 respectively. + + mdio-external: + $ref: /schemas/net/mdio.yaml# + unevaluatedProperties: false + description: + External MDIO bus to access external components. External PHYs for GMACs + (Port 8-9) are expected to be connected to the external MDIO bus in + vendor's reference design, but that is not a hard limitation from the + chip. + +required: + - compatible + - reg + +allOf: + - $ref: dsa.yaml#/$defs/ethernet-ports + +unevaluatedProperties: false + +examples: + - | + #include + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + switch@1d { + compatible = "motorcomm,yt9215"; + /* default 0x1d, alternate 0x0 */ + reg = <0x1d>; + reset-gpios = <&tlmm 39 GPIO_ACTIVE_LOW>; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + sw_phy0: phy@0 { + reg = <0x0>; + }; + + sw_phy1: phy@1 { + reg = <0x1>; + }; + + sw_phy2: phy@2 { + reg = <0x2>; + }; + + sw_phy3: phy@3 { + reg = <0x3>; + }; + + sw_phy4: phy@4 { + reg = <0x4>; + }; + }; + + mdio-external { + #address-cells = <1>; + #size-cells = <0>; + + phy1: phy@b { + reg = <0xb>; + }; + }; + + ethernet-ports { + #address-cells = <1>; + #size-cells = <0>; + + ethernet-port@0 { + reg = <0>; + label = "lan1"; + phy-mode = "internal"; + phy-handle = <&sw_phy0>; + }; + + ethernet-port@1 { + reg = <1>; + label = "lan2"; + phy-mode = "internal"; + phy-handle = <&sw_phy1>; + }; + + ethernet-port@2 { + reg = <2>; + label = "lan3"; + phy-mode = "internal"; + phy-handle = <&sw_phy2>; + }; + + ethernet-port@3 { + reg = <3>; + label = "lan4"; + phy-mode = "internal"; + phy-handle = <&sw_phy3>; + }; + + ethernet-port@4 { + reg = <4>; + label = "lan5"; + phy-mode = "internal"; + phy-handle = <&sw_phy4>; + }; + + /* CPU port */ + ethernet-port@8 { + reg = <8>; + phy-mode = "2500base-x"; + ethernet = <ð0>; + + fixed-link { + speed = <2500>; + full-duplex; + }; + }; + + /* if external phy is connected to a MAC */ + ethernet-port@9 { + reg = <9>; + label = "wan"; + phy-mode = "rgmii-id"; + phy-handle = <&phy1>; + }; + }; + }; + }; From ca4709843b7e72f96976cd6b35bca148a4071673 Mon Sep 17 00:00:00 2001 From: David Yang Date: Fri, 17 Oct 2025 14:08:54 +0800 Subject: [PATCH 182/867] net: dsa: tag_yt921x: add support for Motorcomm YT921x tags Add support for Motorcomm YT921x tags, which includes a proper configurable ethertype field (default to 0x9988). Signed-off-by: David Yang Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251017060859.326450-3-mmyangfl@gmail.com Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 2 + include/uapi/linux/if_ether.h | 1 + net/dsa/Kconfig | 6 ++ net/dsa/Makefile | 1 + net/dsa/tag_yt921x.c | 141 ++++++++++++++++++++++++++++++++++ 5 files changed, 151 insertions(+) create mode 100644 net/dsa/tag_yt921x.c diff --git a/include/net/dsa.h b/include/net/dsa.h index d73ea08800660..67762fdaf3c7a 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -55,6 +55,7 @@ struct tc_action; #define DSA_TAG_PROTO_LAN937X_VALUE 27 #define DSA_TAG_PROTO_VSC73XX_8021Q_VALUE 28 #define DSA_TAG_PROTO_BRCM_LEGACY_FCS_VALUE 29 +#define DSA_TAG_PROTO_YT921X_VALUE 30 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, @@ -87,6 +88,7 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_RZN1_A5PSW = DSA_TAG_PROTO_RZN1_A5PSW_VALUE, DSA_TAG_PROTO_LAN937X = DSA_TAG_PROTO_LAN937X_VALUE, DSA_TAG_PROTO_VSC73XX_8021Q = DSA_TAG_PROTO_VSC73XX_8021Q_VALUE, + DSA_TAG_PROTO_YT921X = DSA_TAG_PROTO_YT921X_VALUE, }; struct dsa_switch; diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 69e0457eb2000..cfd200c87e5ea 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -114,6 +114,7 @@ #define ETH_P_QINQ1 0x9100 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_QINQ2 0x9200 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_QINQ3 0x9300 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ +#define ETH_P_YT921X 0x9988 /* Motorcomm YT921x DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_DSA_8021Q 0xDADB /* Fake VLAN Header for DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_DSA_A5PSW 0xE001 /* A5PSW Tag Value [ NOT AN OFFICIALLY REGISTERED ID ] */ diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 869cbe57162f9..6b94028b1fcc2 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -190,4 +190,10 @@ config NET_DSA_TAG_XRS700X Say Y or M if you want to enable support for tagging frames for Arrow SpeedChips XRS700x switches that use a single byte tag trailer. +config NET_DSA_TAG_YT921X + tristate "Tag driver for Motorcomm YT921x switches" + help + Say Y or M if you want to enable support for tagging frames for + Motorcomm YT921x switches. + endif diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 555c07cfeb712..4b011a1d5c87e 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -39,6 +39,7 @@ obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o obj-$(CONFIG_NET_DSA_TAG_VSC73XX_8021Q) += tag_vsc73xx_8021q.o obj-$(CONFIG_NET_DSA_TAG_XRS700X) += tag_xrs700x.o +obj-$(CONFIG_NET_DSA_TAG_YT921X) += tag_yt921x.o # for tracing framework to find trace.h CFLAGS_trace.o := -I$(src) diff --git a/net/dsa/tag_yt921x.c b/net/dsa/tag_yt921x.c new file mode 100644 index 0000000000000..995da44f0a2a1 --- /dev/null +++ b/net/dsa/tag_yt921x.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Motorcomm YT921x Switch Extended CPU Port Tagging + * + * Copyright (c) 2025 David Yang + * + * +----+----+-------+-----+----+--------- + * | DA | SA | TagET | Tag | ET | Payload ... + * +----+----+-------+-----+----+--------- + * 6 6 2 6 2 N + * + * Tag Ethertype: CPU_TAG_TPID_TPID (default: ETH_P_YT921X = 0x9988) + * * Hardcoded for the moment, but still configurable. Discuss it if there + * are conflicts somewhere and/or you want to change it for some reason. + * Tag: + * 2: VLAN Tag + * 2: Rx Port + * 15b: Rx Port Valid + * 14b-11b: Rx Port + * 10b-0b: Cmd? + * 2: Tx Port(s) + * 15b: Tx Port(s) Valid + * 10b-0b: Tx Port(s) Mask + */ + +#include + +#include "tag.h" + +#define YT921X_TAG_NAME "yt921x" + +#define YT921X_TAG_LEN 8 + +#define YT921X_TAG_PORT_EN BIT(15) +#define YT921X_TAG_RX_PORT_M GENMASK(14, 11) +#define YT921X_TAG_RX_CMD_M GENMASK(10, 0) +#define YT921X_TAG_RX_CMD(x) FIELD_PREP(YT921X_TAG_RX_CMD_M, (x)) +#define YT921X_TAG_RX_CMD_FORWARDED 0x80 +#define YT921X_TAG_RX_CMD_UNK_UCAST 0xb2 +#define YT921X_TAG_RX_CMD_UNK_MCAST 0xb4 +#define YT921X_TAG_TX_PORTS_M GENMASK(10, 0) +#define YT921X_TAG_TX_PORTn(port) BIT(port) + +static struct sk_buff * +yt921x_tag_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct dsa_port *dp = dsa_user_to_port(netdev); + unsigned int port = dp->index; + __be16 *tag; + u16 tx; + + skb_push(skb, YT921X_TAG_LEN); + dsa_alloc_etype_header(skb, YT921X_TAG_LEN); + + tag = dsa_etype_header_pos_tx(skb); + + tag[0] = htons(ETH_P_YT921X); + /* VLAN tag unrelated when TX */ + tag[1] = 0; + tag[2] = 0; + tx = YT921X_TAG_PORT_EN | YT921X_TAG_TX_PORTn(port); + tag[3] = htons(tx); + + return skb; +} + +static struct sk_buff * +yt921x_tag_rcv(struct sk_buff *skb, struct net_device *netdev) +{ + unsigned int port; + __be16 *tag; + u16 cmd; + u16 rx; + + if (unlikely(!pskb_may_pull(skb, YT921X_TAG_LEN))) + return NULL; + + tag = dsa_etype_header_pos_rx(skb); + + if (unlikely(tag[0] != htons(ETH_P_YT921X))) { + dev_warn_ratelimited(&netdev->dev, + "Unexpected EtherType 0x%04x\n", + ntohs(tag[0])); + return NULL; + } + + /* Locate which port this is coming from */ + rx = ntohs(tag[2]); + if (unlikely((rx & YT921X_TAG_PORT_EN) == 0)) { + dev_warn_ratelimited(&netdev->dev, + "Unexpected rx tag 0x%04x\n", rx); + return NULL; + } + + port = FIELD_GET(YT921X_TAG_RX_PORT_M, rx); + skb->dev = dsa_conduit_find_user(netdev, 0, port); + if (unlikely(!skb->dev)) { + dev_warn_ratelimited(&netdev->dev, + "Couldn't decode source port %u\n", port); + return NULL; + } + + cmd = FIELD_GET(YT921X_TAG_RX_CMD_M, rx); + switch (cmd) { + case YT921X_TAG_RX_CMD_FORWARDED: + /* Already forwarded by hardware */ + dsa_default_offload_fwd_mark(skb); + break; + case YT921X_TAG_RX_CMD_UNK_UCAST: + case YT921X_TAG_RX_CMD_UNK_MCAST: + /* NOTE: hardware doesn't distinguish between TRAP (copy to CPU + * only) and COPY (forward and copy to CPU). In order to perform + * a soft switch, NEVER use COPY action in the switch driver. + */ + break; + default: + dev_warn_ratelimited(&netdev->dev, + "Unexpected rx cmd 0x%02x\n", cmd); + break; + } + + /* Remove YT921x tag and update checksum */ + skb_pull_rcsum(skb, YT921X_TAG_LEN); + dsa_strip_etype_header(skb, YT921X_TAG_LEN); + + return skb; +} + +static const struct dsa_device_ops yt921x_netdev_ops = { + .name = YT921X_TAG_NAME, + .proto = DSA_TAG_PROTO_YT921X, + .xmit = yt921x_tag_xmit, + .rcv = yt921x_tag_rcv, + .needed_headroom = YT921X_TAG_LEN, +}; + +MODULE_DESCRIPTION("DSA tag driver for Motorcomm YT921x switches"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_YT921X, YT921X_TAG_NAME); + +module_dsa_tag_driver(yt921x_netdev_ops); From 186623f4aa724c46cbb4dbd5235cf6942215f5b5 Mon Sep 17 00:00:00 2001 From: David Yang Date: Fri, 17 Oct 2025 14:08:55 +0800 Subject: [PATCH 183/867] net: dsa: yt921x: Add support for Motorcomm YT921x Motorcomm YT921x is a series of ethernet switches developed by Shanghai Motorcomm Electronic Technology, including: - YT9215S / YT9215RB / YT9215SC: 5 GbE PHYs - YT9213NB / YT9214NB: 2 GbE PHYs - YT9218N / YT9218MB: 8 GbE PHYs and up to 2 GMACs. Driver verified on a stock wireless router with IPQ5018 + YT9215S. Signed-off-by: David Yang Link: https://patch.msgid.link/20251017060859.326450-4-mmyangfl@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/Kconfig | 7 + drivers/net/dsa/Makefile | 1 + drivers/net/dsa/yt921x.c | 2891 ++++++++++++++++++++++++++++++++++++++ drivers/net/dsa/yt921x.h | 504 +++++++ 4 files changed, 3403 insertions(+) create mode 100644 drivers/net/dsa/yt921x.c create mode 100644 drivers/net/dsa/yt921x.h diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index 4d9af691b9896..7eb301fd987d1 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -154,4 +154,11 @@ config NET_DSA_VITESSE_VSC73XX_PLATFORM This enables support for the Vitesse VSC7385, VSC7388, VSC7395 and VSC7398 SparX integrated ethernet switches, connected over a CPU-attached address bus and work in memory-mapped I/O mode. + +config NET_DSA_YT921X + tristate "Motorcomm YT9215 ethernet switch chip support" + select NET_DSA_TAG_YT921X + help + This enables support for the Motorcomm YT9215 ethernet switch + chip. endmenu diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile index 0f8ff4a1a313b..16de4ba3fa388 100644 --- a/drivers/net/dsa/Makefile +++ b/drivers/net/dsa/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_NET_DSA_SMSC_LAN9303_MDIO) += lan9303_mdio.o obj-$(CONFIG_NET_DSA_VITESSE_VSC73XX) += vitesse-vsc73xx-core.o obj-$(CONFIG_NET_DSA_VITESSE_VSC73XX_PLATFORM) += vitesse-vsc73xx-platform.o obj-$(CONFIG_NET_DSA_VITESSE_VSC73XX_SPI) += vitesse-vsc73xx-spi.o +obj-$(CONFIG_NET_DSA_YT921X) += yt921x.o obj-y += b53/ obj-y += hirschmann/ obj-y += lantiq/ diff --git a/drivers/net/dsa/yt921x.c b/drivers/net/dsa/yt921x.c new file mode 100644 index 0000000000000..ab762ffc46611 --- /dev/null +++ b/drivers/net/dsa/yt921x.c @@ -0,0 +1,2891 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Driver for Motorcomm YT921x Switch + * + * Should work on YT9213/YT9214/YT9215/YT9218, but only tested on YT9215+SGMII, + * be sure to do your own checks before porting to another chip. + * + * Copyright (c) 2025 David Yang + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "yt921x.h" + +struct yt921x_mib_desc { + unsigned int size; + unsigned int offset; + const char *name; +}; + +#define MIB_DESC(_size, _offset, _name) \ + {_size, _offset, _name} + +/* Must agree with yt921x_mib + * + * Unstructured fields (name != NULL) will appear in get_ethtool_stats(), + * structured go to their *_stats() methods, but we need their sizes and offsets + * to perform 32bit MIB overflow wraparound. + */ +static const struct yt921x_mib_desc yt921x_mib_descs[] = { + MIB_DESC(1, 0x00, NULL), /* RxBroadcast */ + MIB_DESC(1, 0x04, NULL), /* RxPause */ + MIB_DESC(1, 0x08, NULL), /* RxMulticast */ + MIB_DESC(1, 0x0c, NULL), /* RxCrcErr */ + + MIB_DESC(1, 0x10, NULL), /* RxAlignErr */ + MIB_DESC(1, 0x14, NULL), /* RxUnderSizeErr */ + MIB_DESC(1, 0x18, NULL), /* RxFragErr */ + MIB_DESC(1, 0x1c, NULL), /* RxPktSz64 */ + + MIB_DESC(1, 0x20, NULL), /* RxPktSz65To127 */ + MIB_DESC(1, 0x24, NULL), /* RxPktSz128To255 */ + MIB_DESC(1, 0x28, NULL), /* RxPktSz256To511 */ + MIB_DESC(1, 0x2c, NULL), /* RxPktSz512To1023 */ + + MIB_DESC(1, 0x30, NULL), /* RxPktSz1024To1518 */ + MIB_DESC(1, 0x34, NULL), /* RxPktSz1519ToMax */ + MIB_DESC(2, 0x38, NULL), /* RxGoodBytes */ + /* 0x3c */ + + MIB_DESC(2, 0x40, "RxBadBytes"), + /* 0x44 */ + MIB_DESC(2, 0x48, NULL), /* RxOverSzErr */ + /* 0x4c */ + + MIB_DESC(1, 0x50, NULL), /* RxDropped */ + MIB_DESC(1, 0x54, NULL), /* TxBroadcast */ + MIB_DESC(1, 0x58, NULL), /* TxPause */ + MIB_DESC(1, 0x5c, NULL), /* TxMulticast */ + + MIB_DESC(1, 0x60, NULL), /* TxUnderSizeErr */ + MIB_DESC(1, 0x64, NULL), /* TxPktSz64 */ + MIB_DESC(1, 0x68, NULL), /* TxPktSz65To127 */ + MIB_DESC(1, 0x6c, NULL), /* TxPktSz128To255 */ + + MIB_DESC(1, 0x70, NULL), /* TxPktSz256To511 */ + MIB_DESC(1, 0x74, NULL), /* TxPktSz512To1023 */ + MIB_DESC(1, 0x78, NULL), /* TxPktSz1024To1518 */ + MIB_DESC(1, 0x7c, NULL), /* TxPktSz1519ToMax */ + + MIB_DESC(2, 0x80, NULL), /* TxGoodBytes */ + /* 0x84 */ + MIB_DESC(2, 0x88, NULL), /* TxCollision */ + /* 0x8c */ + + MIB_DESC(1, 0x90, NULL), /* TxExcessiveCollistion */ + MIB_DESC(1, 0x94, NULL), /* TxMultipleCollision */ + MIB_DESC(1, 0x98, NULL), /* TxSingleCollision */ + MIB_DESC(1, 0x9c, NULL), /* TxPkt */ + + MIB_DESC(1, 0xa0, NULL), /* TxDeferred */ + MIB_DESC(1, 0xa4, NULL), /* TxLateCollision */ + MIB_DESC(1, 0xa8, "RxOAM"), + MIB_DESC(1, 0xac, "TxOAM"), +}; + +struct yt921x_info { + const char *name; + u16 major; + /* Unknown, seems to be plain enumeration */ + u8 mode; + u8 extmode; + /* Ports with integral GbE PHYs, not including MCU Port 10 */ + u16 internal_mask; + /* TODO: see comments in yt921x_dsa_phylink_get_caps() */ + u16 external_mask; +}; + +#define YT921X_PORT_MASK_INTn(port) BIT(port) +#define YT921X_PORT_MASK_INT0_n(n) GENMASK((n) - 1, 0) +#define YT921X_PORT_MASK_EXT0 BIT(8) +#define YT921X_PORT_MASK_EXT1 BIT(9) + +static const struct yt921x_info yt921x_infos[] = { + { + "YT9215SC", YT9215_MAJOR, 1, 0, + YT921X_PORT_MASK_INT0_n(5), + YT921X_PORT_MASK_EXT0 | YT921X_PORT_MASK_EXT1, + }, + { + "YT9215S", YT9215_MAJOR, 2, 0, + YT921X_PORT_MASK_INT0_n(5), + YT921X_PORT_MASK_EXT0 | YT921X_PORT_MASK_EXT1, + }, + { + "YT9215RB", YT9215_MAJOR, 3, 0, + YT921X_PORT_MASK_INT0_n(5), + YT921X_PORT_MASK_EXT0 | YT921X_PORT_MASK_EXT1, + }, + { + "YT9214NB", YT9215_MAJOR, 3, 2, + YT921X_PORT_MASK_INTn(1) | YT921X_PORT_MASK_INTn(3), + YT921X_PORT_MASK_EXT0 | YT921X_PORT_MASK_EXT1, + }, + { + "YT9213NB", YT9215_MAJOR, 3, 3, + YT921X_PORT_MASK_INTn(1) | YT921X_PORT_MASK_INTn(3), + YT921X_PORT_MASK_EXT1, + }, + { + "YT9218N", YT9218_MAJOR, 0, 0, + YT921X_PORT_MASK_INT0_n(8), + 0, + }, + { + "YT9218MB", YT9218_MAJOR, 1, 0, + YT921X_PORT_MASK_INT0_n(8), + YT921X_PORT_MASK_EXT0 | YT921X_PORT_MASK_EXT1, + }, + {} +}; + +#define YT921X_NAME "yt921x" + +#define YT921X_VID_UNWARE 4095 + +#define YT921X_POLL_SLEEP_US 10000 +#define YT921X_POLL_TIMEOUT_US 100000 + +/* The interval should be small enough to avoid overflow of 32bit MIBs. + * + * Until we can read MIBs from stats64 call directly (i.e. sleep + * there), we have to poll stats more frequently then it is actually needed. + * For overflow protection, normally, 100 sec interval should have been OK. + */ +#define YT921X_STATS_INTERVAL_JIFFIES (3 * HZ) + +struct yt921x_reg_mdio { + struct mii_bus *bus; + int addr; + /* SWITCH_ID_1 / SWITCH_ID_0 of the device + * + * This is a way to multiplex multiple devices on the same MII phyaddr + * and should be configurable in DT. However, MDIO core simply doesn't + * allow multiple devices over one reg addr, so this is a fixed value + * for now until a solution is found. + * + * Keep this because we need switchid to form MII regaddrs anyway. + */ + unsigned char switchid; +}; + +/* TODO: SPI/I2C */ + +#define to_yt921x_priv(_ds) container_of_const(_ds, struct yt921x_priv, ds) +#define to_device(priv) ((priv)->ds.dev) + +static int yt921x_reg_read(struct yt921x_priv *priv, u32 reg, u32 *valp) +{ + WARN_ON(!mutex_is_locked(&priv->reg_lock)); + + return priv->reg_ops->read(priv->reg_ctx, reg, valp); +} + +static int yt921x_reg_write(struct yt921x_priv *priv, u32 reg, u32 val) +{ + WARN_ON(!mutex_is_locked(&priv->reg_lock)); + + return priv->reg_ops->write(priv->reg_ctx, reg, val); +} + +static int +yt921x_reg_wait(struct yt921x_priv *priv, u32 reg, u32 mask, u32 *valp) +{ + u32 val; + int res; + int ret; + + ret = read_poll_timeout(yt921x_reg_read, res, + res || (val & mask) == *valp, + YT921X_POLL_SLEEP_US, YT921X_POLL_TIMEOUT_US, + false, priv, reg, &val); + if (ret) + return ret; + if (res) + return res; + + *valp = val; + return 0; +} + +static int +yt921x_reg_update_bits(struct yt921x_priv *priv, u32 reg, u32 mask, u32 val) +{ + int res; + u32 v; + u32 u; + + res = yt921x_reg_read(priv, reg, &v); + if (res) + return res; + + u = v; + u &= ~mask; + u |= val; + if (u == v) + return 0; + + return yt921x_reg_write(priv, reg, u); +} + +static int yt921x_reg_set_bits(struct yt921x_priv *priv, u32 reg, u32 mask) +{ + return yt921x_reg_update_bits(priv, reg, 0, mask); +} + +static int yt921x_reg_clear_bits(struct yt921x_priv *priv, u32 reg, u32 mask) +{ + return yt921x_reg_update_bits(priv, reg, mask, 0); +} + +static int +yt921x_reg_toggle_bits(struct yt921x_priv *priv, u32 reg, u32 mask, bool set) +{ + return yt921x_reg_update_bits(priv, reg, mask, !set ? 0 : mask); +} + +/* Some registers, like VLANn_CTRL, should always be written in 64-bit, even if + * you are to write only the lower / upper 32 bits. + * + * There is no such restriction for reading, but we still provide 64-bit read + * wrappers so that we always handle u64 values. + */ + +static int yt921x_reg64_read(struct yt921x_priv *priv, u32 reg, u64 *valp) +{ + u32 lo; + u32 hi; + int res; + + res = yt921x_reg_read(priv, reg, &lo); + if (res) + return res; + res = yt921x_reg_read(priv, reg + 4, &hi); + if (res) + return res; + + *valp = ((u64)hi << 32) | lo; + return 0; +} + +static int yt921x_reg64_write(struct yt921x_priv *priv, u32 reg, u64 val) +{ + int res; + + res = yt921x_reg_write(priv, reg, (u32)val); + if (res) + return res; + return yt921x_reg_write(priv, reg + 4, (u32)(val >> 32)); +} + +static int +yt921x_reg64_update_bits(struct yt921x_priv *priv, u32 reg, u64 mask, u64 val) +{ + int res; + u64 v; + u64 u; + + res = yt921x_reg64_read(priv, reg, &v); + if (res) + return res; + + u = v; + u &= ~mask; + u |= val; + if (u == v) + return 0; + + return yt921x_reg64_write(priv, reg, u); +} + +static int yt921x_reg64_clear_bits(struct yt921x_priv *priv, u32 reg, u64 mask) +{ + return yt921x_reg64_update_bits(priv, reg, mask, 0); +} + +static int yt921x_reg_mdio_read(void *context, u32 reg, u32 *valp) +{ + struct yt921x_reg_mdio *mdio = context; + struct mii_bus *bus = mdio->bus; + int addr = mdio->addr; + u32 reg_addr; + u32 reg_data; + u32 val; + int res; + + /* Hold the mdio bus lock to avoid (un)locking for 4 times */ + mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED); + + reg_addr = YT921X_SMI_SWITCHID(mdio->switchid) | YT921X_SMI_ADDR | + YT921X_SMI_READ; + res = __mdiobus_write(bus, addr, reg_addr, (u16)(reg >> 16)); + if (res) + goto end; + res = __mdiobus_write(bus, addr, reg_addr, (u16)reg); + if (res) + goto end; + + reg_data = YT921X_SMI_SWITCHID(mdio->switchid) | YT921X_SMI_DATA | + YT921X_SMI_READ; + res = __mdiobus_read(bus, addr, reg_data); + if (res < 0) + goto end; + val = (u16)res; + res = __mdiobus_read(bus, addr, reg_data); + if (res < 0) + goto end; + val = (val << 16) | (u16)res; + + *valp = val; + res = 0; + +end: + mutex_unlock(&bus->mdio_lock); + return res; +} + +static int yt921x_reg_mdio_write(void *context, u32 reg, u32 val) +{ + struct yt921x_reg_mdio *mdio = context; + struct mii_bus *bus = mdio->bus; + int addr = mdio->addr; + u32 reg_addr; + u32 reg_data; + int res; + + mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED); + + reg_addr = YT921X_SMI_SWITCHID(mdio->switchid) | YT921X_SMI_ADDR | + YT921X_SMI_WRITE; + res = __mdiobus_write(bus, addr, reg_addr, (u16)(reg >> 16)); + if (res) + goto end; + res = __mdiobus_write(bus, addr, reg_addr, (u16)reg); + if (res) + goto end; + + reg_data = YT921X_SMI_SWITCHID(mdio->switchid) | YT921X_SMI_DATA | + YT921X_SMI_WRITE; + res = __mdiobus_write(bus, addr, reg_data, (u16)(val >> 16)); + if (res) + goto end; + res = __mdiobus_write(bus, addr, reg_data, (u16)val); + if (res) + goto end; + + res = 0; + +end: + mutex_unlock(&bus->mdio_lock); + return res; +} + +static const struct yt921x_reg_ops yt921x_reg_ops_mdio = { + .read = yt921x_reg_mdio_read, + .write = yt921x_reg_mdio_write, +}; + +/* TODO: SPI/I2C */ + +static int yt921x_intif_wait(struct yt921x_priv *priv) +{ + u32 val = 0; + + return yt921x_reg_wait(priv, YT921X_INT_MBUS_OP, YT921X_MBUS_OP_START, + &val); +} + +static int +yt921x_intif_read(struct yt921x_priv *priv, int port, int reg, u16 *valp) +{ + struct device *dev = to_device(priv); + u32 mask; + u32 ctrl; + u32 val; + int res; + + res = yt921x_intif_wait(priv); + if (res) + return res; + + mask = YT921X_MBUS_CTRL_PORT_M | YT921X_MBUS_CTRL_REG_M | + YT921X_MBUS_CTRL_OP_M; + ctrl = YT921X_MBUS_CTRL_PORT(port) | YT921X_MBUS_CTRL_REG(reg) | + YT921X_MBUS_CTRL_READ; + res = yt921x_reg_update_bits(priv, YT921X_INT_MBUS_CTRL, mask, ctrl); + if (res) + return res; + res = yt921x_reg_write(priv, YT921X_INT_MBUS_OP, YT921X_MBUS_OP_START); + if (res) + return res; + + res = yt921x_intif_wait(priv); + if (res) + return res; + res = yt921x_reg_read(priv, YT921X_INT_MBUS_DIN, &val); + if (res) + return res; + + if ((u16)val != val) + dev_info(dev, + "%s: port %d, reg 0x%x: Expected u16, got 0x%08x\n", + __func__, port, reg, val); + *valp = (u16)val; + return 0; +} + +static int +yt921x_intif_write(struct yt921x_priv *priv, int port, int reg, u16 val) +{ + u32 mask; + u32 ctrl; + int res; + + res = yt921x_intif_wait(priv); + if (res) + return res; + + mask = YT921X_MBUS_CTRL_PORT_M | YT921X_MBUS_CTRL_REG_M | + YT921X_MBUS_CTRL_OP_M; + ctrl = YT921X_MBUS_CTRL_PORT(port) | YT921X_MBUS_CTRL_REG(reg) | + YT921X_MBUS_CTRL_WRITE; + res = yt921x_reg_update_bits(priv, YT921X_INT_MBUS_CTRL, mask, ctrl); + if (res) + return res; + res = yt921x_reg_write(priv, YT921X_INT_MBUS_DOUT, val); + if (res) + return res; + res = yt921x_reg_write(priv, YT921X_INT_MBUS_OP, YT921X_MBUS_OP_START); + if (res) + return res; + + return yt921x_intif_wait(priv); +} + +static int yt921x_mbus_int_read(struct mii_bus *mbus, int port, int reg) +{ + struct yt921x_priv *priv = mbus->priv; + u16 val; + int res; + + if (port >= YT921X_PORT_NUM) + return U16_MAX; + + mutex_lock(&priv->reg_lock); + res = yt921x_intif_read(priv, port, reg, &val); + mutex_unlock(&priv->reg_lock); + + if (res) + return res; + return val; +} + +static int +yt921x_mbus_int_write(struct mii_bus *mbus, int port, int reg, u16 data) +{ + struct yt921x_priv *priv = mbus->priv; + int res; + + if (port >= YT921X_PORT_NUM) + return -ENODEV; + + mutex_lock(&priv->reg_lock); + res = yt921x_intif_write(priv, port, reg, data); + mutex_unlock(&priv->reg_lock); + + return res; +} + +static int +yt921x_mbus_int_init(struct yt921x_priv *priv, struct device_node *mnp) +{ + struct device *dev = to_device(priv); + struct mii_bus *mbus; + int res; + + mbus = devm_mdiobus_alloc(dev); + if (!mbus) + return -ENOMEM; + + mbus->name = "YT921x internal MDIO bus"; + snprintf(mbus->id, MII_BUS_ID_SIZE, "%s", dev_name(dev)); + mbus->priv = priv; + mbus->read = yt921x_mbus_int_read; + mbus->write = yt921x_mbus_int_write; + mbus->parent = dev; + mbus->phy_mask = (u32)~GENMASK(YT921X_PORT_NUM - 1, 0); + + res = devm_of_mdiobus_register(dev, mbus, mnp); + if (res) + return res; + + priv->mbus_int = mbus; + + return 0; +} + +static int yt921x_extif_wait(struct yt921x_priv *priv) +{ + u32 val = 0; + + return yt921x_reg_wait(priv, YT921X_EXT_MBUS_OP, YT921X_MBUS_OP_START, + &val); +} + +static int +yt921x_extif_read(struct yt921x_priv *priv, int port, int reg, u16 *valp) +{ + struct device *dev = to_device(priv); + u32 mask; + u32 ctrl; + u32 val; + int res; + + res = yt921x_extif_wait(priv); + if (res) + return res; + + mask = YT921X_MBUS_CTRL_PORT_M | YT921X_MBUS_CTRL_REG_M | + YT921X_MBUS_CTRL_TYPE_M | YT921X_MBUS_CTRL_OP_M; + ctrl = YT921X_MBUS_CTRL_PORT(port) | YT921X_MBUS_CTRL_REG(reg) | + YT921X_MBUS_CTRL_TYPE_C22 | YT921X_MBUS_CTRL_READ; + res = yt921x_reg_update_bits(priv, YT921X_EXT_MBUS_CTRL, mask, ctrl); + if (res) + return res; + res = yt921x_reg_write(priv, YT921X_EXT_MBUS_OP, YT921X_MBUS_OP_START); + if (res) + return res; + + res = yt921x_extif_wait(priv); + if (res) + return res; + res = yt921x_reg_read(priv, YT921X_EXT_MBUS_DIN, &val); + if (res) + return res; + + if ((u16)val != val) + dev_info(dev, + "%s: port %d, reg 0x%x: Expected u16, got 0x%08x\n", + __func__, port, reg, val); + *valp = (u16)val; + return 0; +} + +static int +yt921x_extif_write(struct yt921x_priv *priv, int port, int reg, u16 val) +{ + u32 mask; + u32 ctrl; + int res; + + res = yt921x_extif_wait(priv); + if (res) + return res; + + mask = YT921X_MBUS_CTRL_PORT_M | YT921X_MBUS_CTRL_REG_M | + YT921X_MBUS_CTRL_TYPE_M | YT921X_MBUS_CTRL_OP_M; + ctrl = YT921X_MBUS_CTRL_PORT(port) | YT921X_MBUS_CTRL_REG(reg) | + YT921X_MBUS_CTRL_TYPE_C22 | YT921X_MBUS_CTRL_WRITE; + res = yt921x_reg_update_bits(priv, YT921X_EXT_MBUS_CTRL, mask, ctrl); + if (res) + return res; + res = yt921x_reg_write(priv, YT921X_EXT_MBUS_DOUT, val); + if (res) + return res; + res = yt921x_reg_write(priv, YT921X_EXT_MBUS_OP, YT921X_MBUS_OP_START); + if (res) + return res; + + return yt921x_extif_wait(priv); +} + +static int yt921x_mbus_ext_read(struct mii_bus *mbus, int port, int reg) +{ + struct yt921x_priv *priv = mbus->priv; + u16 val; + int res; + + mutex_lock(&priv->reg_lock); + res = yt921x_extif_read(priv, port, reg, &val); + mutex_unlock(&priv->reg_lock); + + if (res) + return res; + return val; +} + +static int +yt921x_mbus_ext_write(struct mii_bus *mbus, int port, int reg, u16 data) +{ + struct yt921x_priv *priv = mbus->priv; + int res; + + mutex_lock(&priv->reg_lock); + res = yt921x_extif_write(priv, port, reg, data); + mutex_unlock(&priv->reg_lock); + + return res; +} + +static int +yt921x_mbus_ext_init(struct yt921x_priv *priv, struct device_node *mnp) +{ + struct device *dev = to_device(priv); + struct mii_bus *mbus; + int res; + + mbus = devm_mdiobus_alloc(dev); + if (!mbus) + return -ENOMEM; + + mbus->name = "YT921x external MDIO bus"; + snprintf(mbus->id, MII_BUS_ID_SIZE, "%s@ext", dev_name(dev)); + mbus->priv = priv; + /* TODO: c45? */ + mbus->read = yt921x_mbus_ext_read; + mbus->write = yt921x_mbus_ext_write; + mbus->parent = dev; + + res = devm_of_mdiobus_register(dev, mbus, mnp); + if (res) + return res; + + priv->mbus_ext = mbus; + + return 0; +} + +/* Read and handle overflow of 32bit MIBs. MIB buffer must be zeroed before. */ +static int yt921x_read_mib(struct yt921x_priv *priv, int port) +{ + struct yt921x_port *pp = &priv->ports[port]; + struct device *dev = to_device(priv); + struct yt921x_mib *mib = &pp->mib; + int res = 0; + + /* Reading of yt921x_port::mib is not protected by a lock and it's vain + * to keep its consistency, since we have to read registers one by one + * and there is no way to make a snapshot of MIB stats. + * + * Writing (by this function only) is and should be protected by + * reg_lock. + */ + + for (size_t i = 0; i < ARRAY_SIZE(yt921x_mib_descs); i++) { + const struct yt921x_mib_desc *desc = &yt921x_mib_descs[i]; + u32 reg = YT921X_MIBn_DATA0(port) + desc->offset; + u64 *valp = &((u64 *)mib)[i]; + u64 val = *valp; + u32 val0; + u32 val1; + + res = yt921x_reg_read(priv, reg, &val0); + if (res) + break; + + if (desc->size <= 1) { + if (val < (u32)val) + /* overflow */ + val += (u64)U32_MAX + 1; + val &= ~U32_MAX; + val |= val0; + } else { + res = yt921x_reg_read(priv, reg + 4, &val1); + if (res) + break; + val = ((u64)val0 << 32) | val1; + } + + WRITE_ONCE(*valp, val); + } + + pp->rx_frames = mib->rx_64byte + mib->rx_65_127byte + + mib->rx_128_255byte + mib->rx_256_511byte + + mib->rx_512_1023byte + mib->rx_1024_1518byte + + mib->rx_jumbo; + pp->tx_frames = mib->tx_64byte + mib->tx_65_127byte + + mib->tx_128_255byte + mib->tx_256_511byte + + mib->tx_512_1023byte + mib->tx_1024_1518byte + + mib->tx_jumbo; + + if (res) + dev_err(dev, "Failed to %s port %d: %i\n", "read stats for", + port, res); + return res; +} + +static void yt921x_poll_mib(struct work_struct *work) +{ + struct yt921x_port *pp = container_of_const(work, struct yt921x_port, + mib_read.work); + struct yt921x_priv *priv = (void *)(pp - pp->index) - + offsetof(struct yt921x_priv, ports); + unsigned long delay = YT921X_STATS_INTERVAL_JIFFIES; + int port = pp->index; + int res; + + mutex_lock(&priv->reg_lock); + res = yt921x_read_mib(priv, port); + mutex_unlock(&priv->reg_lock); + if (res) + delay *= 4; + + schedule_delayed_work(&pp->mib_read, delay); +} + +static void +yt921x_dsa_get_strings(struct dsa_switch *ds, int port, u32 stringset, + uint8_t *data) +{ + if (stringset != ETH_SS_STATS) + return; + + for (size_t i = 0; i < ARRAY_SIZE(yt921x_mib_descs); i++) { + const struct yt921x_mib_desc *desc = &yt921x_mib_descs[i]; + + if (desc->name) + ethtool_puts(&data, desc->name); + } +} + +static void +yt921x_dsa_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + struct yt921x_port *pp = &priv->ports[port]; + struct yt921x_mib *mib = &pp->mib; + size_t j; + + mutex_lock(&priv->reg_lock); + yt921x_read_mib(priv, port); + mutex_unlock(&priv->reg_lock); + + j = 0; + for (size_t i = 0; i < ARRAY_SIZE(yt921x_mib_descs); i++) { + const struct yt921x_mib_desc *desc = &yt921x_mib_descs[i]; + + if (!desc->name) + continue; + + data[j] = ((u64 *)mib)[i]; + j++; + } +} + +static int yt921x_dsa_get_sset_count(struct dsa_switch *ds, int port, int sset) +{ + int cnt = 0; + + if (sset != ETH_SS_STATS) + return 0; + + for (size_t i = 0; i < ARRAY_SIZE(yt921x_mib_descs); i++) { + const struct yt921x_mib_desc *desc = &yt921x_mib_descs[i]; + + if (desc->name) + cnt++; + } + + return cnt; +} + +static void +yt921x_dsa_get_eth_mac_stats(struct dsa_switch *ds, int port, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + struct yt921x_port *pp = &priv->ports[port]; + struct yt921x_mib *mib = &pp->mib; + + mutex_lock(&priv->reg_lock); + yt921x_read_mib(priv, port); + mutex_unlock(&priv->reg_lock); + + mac_stats->FramesTransmittedOK = pp->tx_frames; + mac_stats->SingleCollisionFrames = mib->tx_single_collisions; + mac_stats->MultipleCollisionFrames = mib->tx_multiple_collisions; + mac_stats->FramesReceivedOK = pp->rx_frames; + mac_stats->FrameCheckSequenceErrors = mib->rx_crc_errors; + mac_stats->AlignmentErrors = mib->rx_alignment_errors; + mac_stats->OctetsTransmittedOK = mib->tx_good_bytes; + mac_stats->FramesWithDeferredXmissions = mib->tx_deferred; + mac_stats->LateCollisions = mib->tx_late_collisions; + mac_stats->FramesAbortedDueToXSColls = mib->tx_aborted_errors; + /* mac_stats->FramesLostDueToIntMACXmitError */ + /* mac_stats->CarrierSenseErrors */ + mac_stats->OctetsReceivedOK = mib->rx_good_bytes; + /* mac_stats->FramesLostDueToIntMACRcvError */ + mac_stats->MulticastFramesXmittedOK = mib->tx_multicast; + mac_stats->BroadcastFramesXmittedOK = mib->tx_broadcast; + /* mac_stats->FramesWithExcessiveDeferral */ + mac_stats->MulticastFramesReceivedOK = mib->rx_multicast; + mac_stats->BroadcastFramesReceivedOK = mib->rx_broadcast; + /* mac_stats->InRangeLengthErrors */ + /* mac_stats->OutOfRangeLengthField */ + mac_stats->FrameTooLongErrors = mib->rx_oversize_errors; +} + +static void +yt921x_dsa_get_eth_ctrl_stats(struct dsa_switch *ds, int port, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + struct yt921x_port *pp = &priv->ports[port]; + struct yt921x_mib *mib = &pp->mib; + + mutex_lock(&priv->reg_lock); + yt921x_read_mib(priv, port); + mutex_unlock(&priv->reg_lock); + + ctrl_stats->MACControlFramesTransmitted = mib->tx_pause; + ctrl_stats->MACControlFramesReceived = mib->rx_pause; + /* ctrl_stats->UnsupportedOpcodesReceived */ +} + +static const struct ethtool_rmon_hist_range yt921x_rmon_ranges[] = { + { 0, 64 }, + { 65, 127 }, + { 128, 255 }, + { 256, 511 }, + { 512, 1023 }, + { 1024, 1518 }, + { 1519, YT921X_FRAME_SIZE_MAX }, + {} +}; + +static void +yt921x_dsa_get_rmon_stats(struct dsa_switch *ds, int port, + struct ethtool_rmon_stats *rmon_stats, + const struct ethtool_rmon_hist_range **ranges) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + struct yt921x_port *pp = &priv->ports[port]; + struct yt921x_mib *mib = &pp->mib; + + mutex_lock(&priv->reg_lock); + yt921x_read_mib(priv, port); + mutex_unlock(&priv->reg_lock); + + *ranges = yt921x_rmon_ranges; + + rmon_stats->undersize_pkts = mib->rx_undersize_errors; + rmon_stats->oversize_pkts = mib->rx_oversize_errors; + rmon_stats->fragments = mib->rx_alignment_errors; + /* rmon_stats->jabbers */ + + rmon_stats->hist[0] = mib->rx_64byte; + rmon_stats->hist[1] = mib->rx_65_127byte; + rmon_stats->hist[2] = mib->rx_128_255byte; + rmon_stats->hist[3] = mib->rx_256_511byte; + rmon_stats->hist[4] = mib->rx_512_1023byte; + rmon_stats->hist[5] = mib->rx_1024_1518byte; + rmon_stats->hist[6] = mib->rx_jumbo; + + rmon_stats->hist_tx[0] = mib->tx_64byte; + rmon_stats->hist_tx[1] = mib->tx_65_127byte; + rmon_stats->hist_tx[2] = mib->tx_128_255byte; + rmon_stats->hist_tx[3] = mib->tx_256_511byte; + rmon_stats->hist_tx[4] = mib->tx_512_1023byte; + rmon_stats->hist_tx[5] = mib->tx_1024_1518byte; + rmon_stats->hist_tx[6] = mib->tx_jumbo; +} + +static void +yt921x_dsa_get_stats64(struct dsa_switch *ds, int port, + struct rtnl_link_stats64 *stats) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + struct yt921x_port *pp = &priv->ports[port]; + struct yt921x_mib *mib = &pp->mib; + + stats->rx_length_errors = mib->rx_undersize_errors + + mib->rx_fragment_errors; + stats->rx_over_errors = mib->rx_oversize_errors; + stats->rx_crc_errors = mib->rx_crc_errors; + stats->rx_frame_errors = mib->rx_alignment_errors; + /* stats->rx_fifo_errors */ + /* stats->rx_missed_errors */ + + stats->tx_aborted_errors = mib->tx_aborted_errors; + /* stats->tx_carrier_errors */ + stats->tx_fifo_errors = mib->tx_undersize_errors; + /* stats->tx_heartbeat_errors */ + stats->tx_window_errors = mib->tx_late_collisions; + + stats->rx_packets = pp->rx_frames; + stats->tx_packets = pp->tx_frames; + stats->rx_bytes = mib->rx_good_bytes - ETH_FCS_LEN * stats->rx_packets; + stats->tx_bytes = mib->tx_good_bytes - ETH_FCS_LEN * stats->tx_packets; + stats->rx_errors = stats->rx_length_errors + stats->rx_over_errors + + stats->rx_crc_errors + stats->rx_frame_errors; + stats->tx_errors = stats->tx_aborted_errors + stats->tx_fifo_errors + + stats->tx_window_errors; + stats->rx_dropped = mib->rx_dropped; + /* stats->tx_dropped */ + stats->multicast = mib->rx_multicast; + stats->collisions = mib->tx_collisions; +} + +static void +yt921x_dsa_get_pause_stats(struct dsa_switch *ds, int port, + struct ethtool_pause_stats *pause_stats) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + struct yt921x_port *pp = &priv->ports[port]; + struct yt921x_mib *mib = &pp->mib; + + mutex_lock(&priv->reg_lock); + yt921x_read_mib(priv, port); + mutex_unlock(&priv->reg_lock); + + pause_stats->tx_pause_frames = mib->tx_pause; + pause_stats->rx_pause_frames = mib->rx_pause; +} + +static int +yt921x_set_eee(struct yt921x_priv *priv, int port, struct ethtool_keee *e) +{ + /* Poor datasheet for EEE operations; don't ask if you are confused */ + + bool enable = e->eee_enabled; + u16 new_mask; + int res; + + /* Enable / disable global EEE */ + new_mask = priv->eee_ports_mask; + new_mask &= ~BIT(port); + new_mask |= !enable ? 0 : BIT(port); + + if (!!new_mask != !!priv->eee_ports_mask) { + res = yt921x_reg_toggle_bits(priv, YT921X_PON_STRAP_FUNC, + YT921X_PON_STRAP_EEE, !!new_mask); + if (res) + return res; + res = yt921x_reg_toggle_bits(priv, YT921X_PON_STRAP_VAL, + YT921X_PON_STRAP_EEE, !!new_mask); + if (res) + return res; + } + + priv->eee_ports_mask = new_mask; + + /* Enable / disable port EEE */ + res = yt921x_reg_toggle_bits(priv, YT921X_EEE_CTRL, + YT921X_EEE_CTRL_ENn(port), enable); + if (res) + return res; + res = yt921x_reg_toggle_bits(priv, YT921X_EEEn_VAL(port), + YT921X_EEE_VAL_DATA, enable); + if (res) + return res; + + return 0; +} + +static int +yt921x_dsa_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + int res; + + mutex_lock(&priv->reg_lock); + res = yt921x_set_eee(priv, port, e); + mutex_unlock(&priv->reg_lock); + + return res; +} + +static int +yt921x_dsa_port_change_mtu(struct dsa_switch *ds, int port, int new_mtu) +{ + /* Only serves as packet filter, since the frame size is always set to + * maximum after reset + */ + + struct yt921x_priv *priv = to_yt921x_priv(ds); + struct dsa_port *dp = dsa_to_port(ds, port); + int frame_size; + int res; + + frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN; + if (dsa_port_is_cpu(dp)) + frame_size += YT921X_TAG_LEN; + + mutex_lock(&priv->reg_lock); + res = yt921x_reg_update_bits(priv, YT921X_MACn_FRAME(port), + YT921X_MAC_FRAME_SIZE_M, + YT921X_MAC_FRAME_SIZE(frame_size)); + mutex_unlock(&priv->reg_lock); + + return res; +} + +static int yt921x_dsa_port_max_mtu(struct dsa_switch *ds, int port) +{ + /* Only called for user ports, exclude tag len here */ + return YT921X_FRAME_SIZE_MAX - ETH_HLEN - ETH_FCS_LEN - YT921X_TAG_LEN; +} + +static int +yt921x_mirror_del(struct yt921x_priv *priv, int port, bool ingress) +{ + u32 mask; + + if (ingress) + mask = YT921X_MIRROR_IGR_PORTn(port); + else + mask = YT921X_MIRROR_EGR_PORTn(port); + return yt921x_reg_clear_bits(priv, YT921X_MIRROR, mask); +} + +static int +yt921x_mirror_add(struct yt921x_priv *priv, int port, bool ingress, + int to_local_port, struct netlink_ext_ack *extack) +{ + u32 srcs; + u32 ctrl; + u32 val; + u32 dst; + int res; + + if (ingress) + srcs = YT921X_MIRROR_IGR_PORTn(port); + else + srcs = YT921X_MIRROR_EGR_PORTn(port); + dst = YT921X_MIRROR_PORT(to_local_port); + + res = yt921x_reg_read(priv, YT921X_MIRROR, &val); + if (res) + return res; + + /* other mirror tasks & different dst port -> conflict */ + if ((val & ~srcs & (YT921X_MIRROR_EGR_PORTS_M | + YT921X_MIRROR_IGR_PORTS_M)) && + (val & YT921X_MIRROR_PORT_M) != dst) { + NL_SET_ERR_MSG_MOD(extack, + "Sniffer port is already configured, delete existing rules & retry"); + return -EBUSY; + } + + ctrl = val & ~YT921X_MIRROR_PORT_M; + ctrl |= srcs; + ctrl |= dst; + + if (ctrl == val) + return 0; + + return yt921x_reg_write(priv, YT921X_MIRROR, ctrl); +} + +static void +yt921x_dsa_port_mirror_del(struct dsa_switch *ds, int port, + struct dsa_mall_mirror_tc_entry *mirror) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + struct device *dev = to_device(priv); + int res; + + mutex_lock(&priv->reg_lock); + res = yt921x_mirror_del(priv, port, mirror->ingress); + mutex_unlock(&priv->reg_lock); + + if (res) + dev_err(dev, "Failed to %s port %d: %i\n", "unmirror", + port, res); +} + +static int +yt921x_dsa_port_mirror_add(struct dsa_switch *ds, int port, + struct dsa_mall_mirror_tc_entry *mirror, + bool ingress, struct netlink_ext_ack *extack) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + int res; + + mutex_lock(&priv->reg_lock); + res = yt921x_mirror_add(priv, port, ingress, + mirror->to_local_port, extack); + mutex_unlock(&priv->reg_lock); + + return res; +} + +static int yt921x_fdb_wait(struct yt921x_priv *priv, u32 *valp) +{ + struct device *dev = to_device(priv); + u32 val = YT921X_FDB_RESULT_DONE; + int res; + + res = yt921x_reg_wait(priv, YT921X_FDB_RESULT, YT921X_FDB_RESULT_DONE, + &val); + if (res) { + dev_err(dev, "FDB probably stucked\n"); + return res; + } + + *valp = val; + return 0; +} + +static int +yt921x_fdb_in01(struct yt921x_priv *priv, const unsigned char *addr, + u16 vid, u32 ctrl1) +{ + u32 ctrl; + int res; + + ctrl = (addr[0] << 24) | (addr[1] << 16) | (addr[2] << 8) | addr[3]; + res = yt921x_reg_write(priv, YT921X_FDB_IN0, ctrl); + if (res) + return res; + + ctrl = ctrl1 | YT921X_FDB_IO1_FID(vid) | (addr[4] << 8) | addr[5]; + return yt921x_reg_write(priv, YT921X_FDB_IN1, ctrl); +} + +static int +yt921x_fdb_has(struct yt921x_priv *priv, const unsigned char *addr, u16 vid, + u16 *indexp) +{ + u32 ctrl; + u32 val; + int res; + + res = yt921x_fdb_in01(priv, addr, vid, 0); + if (res) + return res; + + ctrl = 0; + res = yt921x_reg_write(priv, YT921X_FDB_IN2, ctrl); + if (res) + return res; + + ctrl = YT921X_FDB_OP_OP_GET_ONE | YT921X_FDB_OP_START; + res = yt921x_reg_write(priv, YT921X_FDB_OP, ctrl); + if (res) + return res; + + res = yt921x_fdb_wait(priv, &val); + if (res) + return res; + if (val & YT921X_FDB_RESULT_NOTFOUND) { + *indexp = YT921X_FDB_NUM; + return 0; + } + + *indexp = FIELD_GET(YT921X_FDB_RESULT_INDEX_M, val); + return 0; +} + +static int +yt921x_fdb_read(struct yt921x_priv *priv, unsigned char *addr, u16 *vidp, + u16 *ports_maskp, u16 *indexp, u8 *statusp) +{ + struct device *dev = to_device(priv); + u16 index; + u32 data0; + u32 data1; + u32 data2; + u32 val; + int res; + + res = yt921x_fdb_wait(priv, &val); + if (res) + return res; + if (val & YT921X_FDB_RESULT_NOTFOUND) { + *ports_maskp = 0; + return 0; + } + index = FIELD_GET(YT921X_FDB_RESULT_INDEX_M, val); + + res = yt921x_reg_read(priv, YT921X_FDB_OUT1, &data1); + if (res) + return res; + if ((data1 & YT921X_FDB_IO1_STATUS_M) == + YT921X_FDB_IO1_STATUS_INVALID) { + *ports_maskp = 0; + return 0; + } + + res = yt921x_reg_read(priv, YT921X_FDB_OUT0, &data0); + if (res) + return res; + res = yt921x_reg_read(priv, YT921X_FDB_OUT2, &data2); + if (res) + return res; + + addr[0] = data0 >> 24; + addr[1] = data0 >> 16; + addr[2] = data0 >> 8; + addr[3] = data0; + addr[4] = data1 >> 8; + addr[5] = data1; + *vidp = FIELD_GET(YT921X_FDB_IO1_FID_M, data1); + *indexp = index; + *ports_maskp = FIELD_GET(YT921X_FDB_IO2_EGR_PORTS_M, data2); + *statusp = FIELD_GET(YT921X_FDB_IO1_STATUS_M, data1); + + dev_dbg(dev, + "%s: index 0x%x, mac %02x:%02x:%02x:%02x:%02x:%02x, vid %d, ports 0x%x, status %d\n", + __func__, *indexp, addr[0], addr[1], addr[2], addr[3], + addr[4], addr[5], *vidp, *ports_maskp, *statusp); + return 0; +} + +static int +yt921x_fdb_dump(struct yt921x_priv *priv, u16 ports_mask, + dsa_fdb_dump_cb_t *cb, void *data) +{ + unsigned char addr[ETH_ALEN]; + u8 status; + u16 pmask; + u16 index; + u32 ctrl; + u16 vid; + int res; + + ctrl = YT921X_FDB_OP_INDEX(0) | YT921X_FDB_OP_MODE_INDEX | + YT921X_FDB_OP_OP_GET_ONE | YT921X_FDB_OP_START; + res = yt921x_reg_write(priv, YT921X_FDB_OP, ctrl); + if (res) + return res; + res = yt921x_fdb_read(priv, addr, &vid, &pmask, &index, &status); + if (res) + return res; + if ((pmask & ports_mask) && !is_multicast_ether_addr(addr)) { + res = cb(addr, vid, + status == YT921X_FDB_ENTRY_STATUS_STATIC, data); + if (res) + return res; + } + + ctrl = YT921X_FDB_IO2_EGR_PORTS(ports_mask); + res = yt921x_reg_write(priv, YT921X_FDB_IN2, ctrl); + if (res) + return res; + + index = 0; + do { + ctrl = YT921X_FDB_OP_INDEX(index) | YT921X_FDB_OP_MODE_INDEX | + YT921X_FDB_OP_NEXT_TYPE_UCAST_PORT | + YT921X_FDB_OP_OP_GET_NEXT | YT921X_FDB_OP_START; + res = yt921x_reg_write(priv, YT921X_FDB_OP, ctrl); + if (res) + return res; + + res = yt921x_fdb_read(priv, addr, &vid, &pmask, &index, + &status); + if (res) + return res; + if (!pmask) + break; + + if ((pmask & ports_mask) && !is_multicast_ether_addr(addr)) { + res = cb(addr, vid, + status == YT921X_FDB_ENTRY_STATUS_STATIC, + data); + if (res) + return res; + } + + /* Never call GET_NEXT with 4095, otherwise it will hang + * forever until a reset! + */ + } while (index < YT921X_FDB_NUM - 1); + + return 0; +} + +static int +yt921x_fdb_flush_raw(struct yt921x_priv *priv, u16 ports_mask, u16 vid, + bool flush_static) +{ + u32 ctrl; + u32 val; + int res; + + if (vid < 4096) { + ctrl = YT921X_FDB_IO1_FID(vid); + res = yt921x_reg_write(priv, YT921X_FDB_IN1, ctrl); + if (res) + return res; + } + + ctrl = YT921X_FDB_IO2_EGR_PORTS(ports_mask); + res = yt921x_reg_write(priv, YT921X_FDB_IN2, ctrl); + if (res) + return res; + + ctrl = YT921X_FDB_OP_OP_FLUSH | YT921X_FDB_OP_START; + if (vid >= 4096) + ctrl |= YT921X_FDB_OP_FLUSH_PORT; + else + ctrl |= YT921X_FDB_OP_FLUSH_PORT_VID; + if (flush_static) + ctrl |= YT921X_FDB_OP_FLUSH_STATIC; + res = yt921x_reg_write(priv, YT921X_FDB_OP, ctrl); + if (res) + return res; + + res = yt921x_fdb_wait(priv, &val); + if (res) + return res; + + return 0; +} + +static int +yt921x_fdb_flush_port(struct yt921x_priv *priv, int port, bool flush_static) +{ + return yt921x_fdb_flush_raw(priv, BIT(port), 4096, flush_static); +} + +static int +yt921x_fdb_add_index_in12(struct yt921x_priv *priv, u16 index, u16 ctrl1, + u16 ctrl2) +{ + u32 ctrl; + u32 val; + int res; + + res = yt921x_reg_write(priv, YT921X_FDB_IN1, ctrl1); + if (res) + return res; + res = yt921x_reg_write(priv, YT921X_FDB_IN2, ctrl2); + if (res) + return res; + + ctrl = YT921X_FDB_OP_INDEX(index) | YT921X_FDB_OP_MODE_INDEX | + YT921X_FDB_OP_OP_ADD | YT921X_FDB_OP_START; + res = yt921x_reg_write(priv, YT921X_FDB_OP, ctrl); + if (res) + return res; + + return yt921x_fdb_wait(priv, &val); +} + +static int +yt921x_fdb_add(struct yt921x_priv *priv, const unsigned char *addr, u16 vid, + u16 ports_mask) +{ + u32 ctrl; + u32 val; + int res; + + ctrl = YT921X_FDB_IO1_STATUS_STATIC; + res = yt921x_fdb_in01(priv, addr, vid, ctrl); + if (res) + return res; + + ctrl = YT921X_FDB_IO2_EGR_PORTS(ports_mask); + res = yt921x_reg_write(priv, YT921X_FDB_IN2, ctrl); + if (res) + return res; + + ctrl = YT921X_FDB_OP_OP_ADD | YT921X_FDB_OP_START; + res = yt921x_reg_write(priv, YT921X_FDB_OP, ctrl); + if (res) + return res; + + return yt921x_fdb_wait(priv, &val); +} + +static int +yt921x_fdb_leave(struct yt921x_priv *priv, const unsigned char *addr, + u16 vid, u16 ports_mask) +{ + u16 index; + u32 ctrl1; + u32 ctrl2; + u32 ctrl; + u32 val2; + u32 val; + int res; + + /* Check for presence */ + res = yt921x_fdb_has(priv, addr, vid, &index); + if (res) + return res; + if (index >= YT921X_FDB_NUM) + return 0; + + /* Check if action required */ + res = yt921x_reg_read(priv, YT921X_FDB_OUT2, &val2); + if (res) + return res; + + ctrl2 = val2 & ~YT921X_FDB_IO2_EGR_PORTS(ports_mask); + if (ctrl2 == val2) + return 0; + if (!(ctrl2 & YT921X_FDB_IO2_EGR_PORTS_M)) { + ctrl = YT921X_FDB_OP_OP_DEL | YT921X_FDB_OP_START; + res = yt921x_reg_write(priv, YT921X_FDB_OP, ctrl); + if (res) + return res; + + return yt921x_fdb_wait(priv, &val); + } + + res = yt921x_reg_read(priv, YT921X_FDB_OUT1, &ctrl1); + if (res) + return res; + + return yt921x_fdb_add_index_in12(priv, index, ctrl1, ctrl2); +} + +static int +yt921x_fdb_join(struct yt921x_priv *priv, const unsigned char *addr, u16 vid, + u16 ports_mask) +{ + u16 index; + u32 ctrl1; + u32 ctrl2; + u32 val1; + u32 val2; + int res; + + /* Check for presence */ + res = yt921x_fdb_has(priv, addr, vid, &index); + if (res) + return res; + if (index >= YT921X_FDB_NUM) + return yt921x_fdb_add(priv, addr, vid, ports_mask); + + /* Check if action required */ + res = yt921x_reg_read(priv, YT921X_FDB_OUT1, &val1); + if (res) + return res; + res = yt921x_reg_read(priv, YT921X_FDB_OUT2, &val2); + if (res) + return res; + + ctrl1 = val1 & ~YT921X_FDB_IO1_STATUS_M; + ctrl1 |= YT921X_FDB_IO1_STATUS_STATIC; + ctrl2 = val2 | YT921X_FDB_IO2_EGR_PORTS(ports_mask); + if (ctrl1 == val1 && ctrl2 == val2) + return 0; + + return yt921x_fdb_add_index_in12(priv, index, ctrl1, ctrl2); +} + +static int +yt921x_dsa_port_fdb_dump(struct dsa_switch *ds, int port, + dsa_fdb_dump_cb_t *cb, void *data) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + int res; + + mutex_lock(&priv->reg_lock); + /* Hardware FDB is shared for fdb and mdb, "bridge fdb show" + * only wants to see unicast + */ + res = yt921x_fdb_dump(priv, BIT(port), cb, data); + mutex_unlock(&priv->reg_lock); + + return res; +} + +static void yt921x_dsa_port_fast_age(struct dsa_switch *ds, int port) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + struct device *dev = to_device(priv); + int res; + + mutex_lock(&priv->reg_lock); + res = yt921x_fdb_flush_port(priv, port, false); + mutex_unlock(&priv->reg_lock); + + if (res) + dev_err(dev, "Failed to %s port %d: %i\n", "clear FDB for", + port, res); +} + +static int +yt921x_dsa_set_ageing_time(struct dsa_switch *ds, unsigned int msecs) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + u32 ctrl; + int res; + + /* AGEING reg is set in 5s step */ + ctrl = clamp(msecs / 5000, 1, U16_MAX); + + mutex_lock(&priv->reg_lock); + res = yt921x_reg_write(priv, YT921X_AGEING, ctrl); + mutex_unlock(&priv->reg_lock); + + return res; +} + +static int +yt921x_dsa_port_fdb_del(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid, struct dsa_db db) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + int res; + + mutex_lock(&priv->reg_lock); + res = yt921x_fdb_leave(priv, addr, vid, BIT(port)); + mutex_unlock(&priv->reg_lock); + + return res; +} + +static int +yt921x_dsa_port_fdb_add(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid, struct dsa_db db) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + int res; + + mutex_lock(&priv->reg_lock); + res = yt921x_fdb_join(priv, addr, vid, BIT(port)); + mutex_unlock(&priv->reg_lock); + + return res; +} + +static int +yt921x_dsa_port_mdb_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + const unsigned char *addr = mdb->addr; + u16 vid = mdb->vid; + int res; + + mutex_lock(&priv->reg_lock); + res = yt921x_fdb_leave(priv, addr, vid, BIT(port)); + mutex_unlock(&priv->reg_lock); + + return res; +} + +static int +yt921x_dsa_port_mdb_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + const unsigned char *addr = mdb->addr; + u16 vid = mdb->vid; + int res; + + mutex_lock(&priv->reg_lock); + res = yt921x_fdb_join(priv, addr, vid, BIT(port)); + mutex_unlock(&priv->reg_lock); + + return res; +} + +static int +yt921x_port_set_pvid(struct yt921x_priv *priv, int port, u16 vid) +{ + u32 mask; + u32 ctrl; + + mask = YT921X_PORT_VLAN_CTRL_CVID_M; + ctrl = YT921X_PORT_VLAN_CTRL_CVID(vid); + return yt921x_reg_update_bits(priv, YT921X_PORTn_VLAN_CTRL(port), + mask, ctrl); +} + +static int +yt921x_vlan_filtering(struct yt921x_priv *priv, int port, bool vlan_filtering) +{ + struct dsa_port *dp = dsa_to_port(&priv->ds, port); + struct net_device *bdev; + u16 pvid; + u32 mask; + u32 ctrl; + int res; + + bdev = dsa_port_bridge_dev_get(dp); + + if (!bdev || !vlan_filtering) + pvid = YT921X_VID_UNWARE; + else + br_vlan_get_pvid(bdev, &pvid); + res = yt921x_port_set_pvid(priv, port, pvid); + if (res) + return res; + + mask = YT921X_PORT_VLAN_CTRL1_CVLAN_DROP_TAGGED | + YT921X_PORT_VLAN_CTRL1_CVLAN_DROP_UNTAGGED; + ctrl = 0; + /* Do not drop tagged frames here; let VLAN_IGR_FILTER do it */ + if (vlan_filtering && !pvid) + ctrl |= YT921X_PORT_VLAN_CTRL1_CVLAN_DROP_UNTAGGED; + res = yt921x_reg_update_bits(priv, YT921X_PORTn_VLAN_CTRL1(port), + mask, ctrl); + if (res) + return res; + + res = yt921x_reg_toggle_bits(priv, YT921X_VLAN_IGR_FILTER, + YT921X_VLAN_IGR_FILTER_PORTn(port), + vlan_filtering); + if (res) + return res; + + /* Turn on / off VLAN awareness */ + mask = YT921X_PORT_IGR_TPIDn_CTAG_M; + if (!vlan_filtering) + ctrl = 0; + else + ctrl = YT921X_PORT_IGR_TPIDn_CTAG(0); + res = yt921x_reg_update_bits(priv, YT921X_PORTn_IGR_TPID(port), + mask, ctrl); + if (res) + return res; + + return 0; +} + +static int +yt921x_vlan_del(struct yt921x_priv *priv, int port, u16 vid) +{ + u64 mask64; + + mask64 = YT921X_VLAN_CTRL_PORTS(port) | + YT921X_VLAN_CTRL_UNTAG_PORTn(port); + + return yt921x_reg64_clear_bits(priv, YT921X_VLANn_CTRL(vid), mask64); +} + +static int +yt921x_vlan_add(struct yt921x_priv *priv, int port, u16 vid, bool untagged) +{ + u64 mask64; + u64 ctrl64; + + mask64 = YT921X_VLAN_CTRL_PORTn(port) | + YT921X_VLAN_CTRL_PORTS(priv->cpu_ports_mask); + ctrl64 = mask64; + + mask64 |= YT921X_VLAN_CTRL_UNTAG_PORTn(port); + if (untagged) + ctrl64 |= YT921X_VLAN_CTRL_UNTAG_PORTn(port); + + return yt921x_reg64_update_bits(priv, YT921X_VLANn_CTRL(vid), + mask64, ctrl64); +} + +static int +yt921x_pvid_clear(struct yt921x_priv *priv, int port) +{ + struct dsa_port *dp = dsa_to_port(&priv->ds, port); + bool vlan_filtering; + u32 mask; + int res; + + vlan_filtering = dsa_port_is_vlan_filtering(dp); + + res = yt921x_port_set_pvid(priv, port, + vlan_filtering ? 0 : YT921X_VID_UNWARE); + if (res) + return res; + + if (vlan_filtering) { + mask = YT921X_PORT_VLAN_CTRL1_CVLAN_DROP_UNTAGGED; + res = yt921x_reg_set_bits(priv, YT921X_PORTn_VLAN_CTRL1(port), + mask); + if (res) + return res; + } + + return 0; +} + +static int +yt921x_pvid_set(struct yt921x_priv *priv, int port, u16 vid) +{ + struct dsa_port *dp = dsa_to_port(&priv->ds, port); + bool vlan_filtering; + u32 mask; + int res; + + vlan_filtering = dsa_port_is_vlan_filtering(dp); + + if (vlan_filtering) { + res = yt921x_port_set_pvid(priv, port, vid); + if (res) + return res; + } + + mask = YT921X_PORT_VLAN_CTRL1_CVLAN_DROP_UNTAGGED; + res = yt921x_reg_clear_bits(priv, YT921X_PORTn_VLAN_CTRL1(port), mask); + if (res) + return res; + + return 0; +} + +static int +yt921x_dsa_port_vlan_filtering(struct dsa_switch *ds, int port, + bool vlan_filtering, + struct netlink_ext_ack *extack) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + int res; + + if (dsa_is_cpu_port(ds, port)) + return 0; + + mutex_lock(&priv->reg_lock); + res = yt921x_vlan_filtering(priv, port, vlan_filtering); + mutex_unlock(&priv->reg_lock); + + return res; +} + +static int +yt921x_dsa_port_vlan_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + u16 vid = vlan->vid; + u16 pvid; + int res; + + if (dsa_is_cpu_port(ds, port)) + return 0; + + mutex_lock(&priv->reg_lock); + do { + struct dsa_port *dp = dsa_to_port(ds, port); + struct net_device *bdev; + + res = yt921x_vlan_del(priv, port, vid); + if (res) + break; + + bdev = dsa_port_bridge_dev_get(dp); + if (bdev) { + br_vlan_get_pvid(bdev, &pvid); + if (pvid == vid) + res = yt921x_pvid_clear(priv, port); + } + } while (0); + mutex_unlock(&priv->reg_lock); + + return res; +} + +static int +yt921x_dsa_port_vlan_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + u16 vid = vlan->vid; + u16 pvid; + int res; + + /* CPU port is supposed to be a member of every VLAN; see + * yt921x_vlan_add() and yt921x_port_setup() + */ + if (dsa_is_cpu_port(ds, port)) + return 0; + + mutex_lock(&priv->reg_lock); + do { + struct dsa_port *dp = dsa_to_port(ds, port); + struct net_device *bdev; + + res = yt921x_vlan_add(priv, port, vid, + vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED); + if (res) + break; + + bdev = dsa_port_bridge_dev_get(dp); + if (bdev) { + if (vlan->flags & BRIDGE_VLAN_INFO_PVID) { + res = yt921x_pvid_set(priv, port, vid); + } else { + br_vlan_get_pvid(bdev, &pvid); + if (pvid == vid) + res = yt921x_pvid_clear(priv, port); + } + } + } while (0); + mutex_unlock(&priv->reg_lock); + + return res; +} + +static int yt921x_userport_standalone(struct yt921x_priv *priv, int port) +{ + u32 mask; + u32 ctrl; + int res; + + ctrl = ~priv->cpu_ports_mask; + res = yt921x_reg_write(priv, YT921X_PORTn_ISOLATION(port), ctrl); + if (res) + return res; + + /* Turn off FDB learning to prevent FDB pollution */ + mask = YT921X_PORT_LEARN_DIS; + res = yt921x_reg_set_bits(priv, YT921X_PORTn_LEARN(port), mask); + if (res) + return res; + + /* Turn off VLAN awareness */ + mask = YT921X_PORT_IGR_TPIDn_CTAG_M; + res = yt921x_reg_clear_bits(priv, YT921X_PORTn_IGR_TPID(port), mask); + if (res) + return res; + + /* Unrelated since learning is off and all packets are trapped; + * set it anyway + */ + res = yt921x_port_set_pvid(priv, port, YT921X_VID_UNWARE); + if (res) + return res; + + return 0; +} + +static int yt921x_userport_bridge(struct yt921x_priv *priv, int port) +{ + u32 mask; + int res; + + mask = YT921X_PORT_LEARN_DIS; + res = yt921x_reg_clear_bits(priv, YT921X_PORTn_LEARN(port), mask); + if (res) + return res; + + return 0; +} + +static int yt921x_isolate(struct yt921x_priv *priv, int port) +{ + u32 mask; + int res; + + mask = BIT(port); + for (int i = 0; i < YT921X_PORT_NUM; i++) { + if ((BIT(i) & priv->cpu_ports_mask) || i == port) + continue; + + res = yt921x_reg_set_bits(priv, YT921X_PORTn_ISOLATION(i), + mask); + if (res) + return res; + } + + return 0; +} + +/* Make sure to include the CPU port in ports_mask, or your bridge will + * not have it. + */ +static int yt921x_bridge(struct yt921x_priv *priv, u16 ports_mask) +{ + unsigned long targets_mask = ports_mask & ~priv->cpu_ports_mask; + u32 isolated_mask; + u32 ctrl; + int port; + int res; + + isolated_mask = 0; + for_each_set_bit(port, &targets_mask, YT921X_PORT_NUM) { + struct yt921x_port *pp = &priv->ports[port]; + + if (pp->isolated) + isolated_mask |= BIT(port); + } + + /* Block from non-cpu bridge ports ... */ + for_each_set_bit(port, &targets_mask, YT921X_PORT_NUM) { + struct yt921x_port *pp = &priv->ports[port]; + + /* to non-bridge ports */ + ctrl = ~ports_mask; + /* to isolated ports when isolated */ + if (pp->isolated) + ctrl |= isolated_mask; + /* to itself when non-hairpin */ + if (!pp->hairpin) + ctrl |= BIT(port); + else + ctrl &= ~BIT(port); + + res = yt921x_reg_write(priv, YT921X_PORTn_ISOLATION(port), + ctrl); + if (res) + return res; + } + + return 0; +} + +static int yt921x_bridge_leave(struct yt921x_priv *priv, int port) +{ + int res; + + res = yt921x_userport_standalone(priv, port); + if (res) + return res; + + res = yt921x_isolate(priv, port); + if (res) + return res; + + return 0; +} + +static int +yt921x_bridge_join(struct yt921x_priv *priv, int port, u16 ports_mask) +{ + int res; + + res = yt921x_userport_bridge(priv, port); + if (res) + return res; + + res = yt921x_bridge(priv, ports_mask); + if (res) + return res; + + return 0; +} + +static u32 +dsa_bridge_ports(struct dsa_switch *ds, const struct net_device *bdev) +{ + struct dsa_port *dp; + u32 mask = 0; + + dsa_switch_for_each_user_port(dp, ds) + if (dsa_port_offloads_bridge_dev(dp, bdev)) + mask |= BIT(dp->index); + + return mask; +} + +static int +yt921x_bridge_flags(struct yt921x_priv *priv, int port, + struct switchdev_brport_flags flags) +{ + struct yt921x_port *pp = &priv->ports[port]; + bool do_flush; + u32 mask; + int res; + + if (flags.mask & BR_LEARNING) { + bool learning = flags.val & BR_LEARNING; + + mask = YT921X_PORT_LEARN_DIS; + res = yt921x_reg_toggle_bits(priv, YT921X_PORTn_LEARN(port), + mask, !learning); + if (res) + return res; + } + + /* BR_FLOOD, BR_MCAST_FLOOD: see the comment where ACT_UNK_ACTn_TRAP + * is set + */ + + /* BR_BCAST_FLOOD: we can filter bcast, but cannot trap them */ + + do_flush = false; + if (flags.mask & BR_HAIRPIN_MODE) { + pp->hairpin = flags.val & BR_HAIRPIN_MODE; + do_flush = true; + } + if (flags.mask & BR_ISOLATED) { + pp->isolated = flags.val & BR_ISOLATED; + do_flush = true; + } + if (do_flush) { + struct dsa_switch *ds = &priv->ds; + struct dsa_port *dp = dsa_to_port(ds, port); + struct net_device *bdev; + + bdev = dsa_port_bridge_dev_get(dp); + if (bdev) { + u32 ports_mask; + + ports_mask = dsa_bridge_ports(ds, bdev); + ports_mask |= priv->cpu_ports_mask; + res = yt921x_bridge(priv, ports_mask); + if (res) + return res; + } + } + + return 0; +} + +static int +yt921x_dsa_port_pre_bridge_flags(struct dsa_switch *ds, int port, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack) +{ + if (flags.mask & ~(BR_HAIRPIN_MODE | BR_LEARNING | BR_FLOOD | + BR_MCAST_FLOOD | BR_ISOLATED)) + return -EINVAL; + return 0; +} + +static int +yt921x_dsa_port_bridge_flags(struct dsa_switch *ds, int port, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + int res; + + if (dsa_is_cpu_port(ds, port)) + return 0; + + mutex_lock(&priv->reg_lock); + res = yt921x_bridge_flags(priv, port, flags); + mutex_unlock(&priv->reg_lock); + + return res; +} + +static void +yt921x_dsa_port_bridge_leave(struct dsa_switch *ds, int port, + struct dsa_bridge bridge) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + struct device *dev = to_device(priv); + int res; + + if (dsa_is_cpu_port(ds, port)) + return; + + mutex_lock(&priv->reg_lock); + res = yt921x_bridge_leave(priv, port); + mutex_unlock(&priv->reg_lock); + + if (res) + dev_err(dev, "Failed to %s port %d: %i\n", "unbridge", + port, res); +} + +static int +yt921x_dsa_port_bridge_join(struct dsa_switch *ds, int port, + struct dsa_bridge bridge, bool *tx_fwd_offload, + struct netlink_ext_ack *extack) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + u16 ports_mask; + int res; + + if (dsa_is_cpu_port(ds, port)) + return 0; + + ports_mask = dsa_bridge_ports(ds, bridge.dev); + ports_mask |= priv->cpu_ports_mask; + + mutex_lock(&priv->reg_lock); + res = yt921x_bridge_join(priv, port, ports_mask); + mutex_unlock(&priv->reg_lock); + + return res; +} + +static int yt921x_port_down(struct yt921x_priv *priv, int port) +{ + u32 mask; + int res; + + mask = YT921X_PORT_LINK | YT921X_PORT_RX_MAC_EN | YT921X_PORT_TX_MAC_EN; + res = yt921x_reg_clear_bits(priv, YT921X_PORTn_CTRL(port), mask); + if (res) + return res; + + if (yt921x_port_is_external(port)) { + mask = YT921X_SERDES_LINK; + res = yt921x_reg_clear_bits(priv, YT921X_SERDESn(port), mask); + if (res) + return res; + + mask = YT921X_XMII_LINK; + res = yt921x_reg_clear_bits(priv, YT921X_XMIIn(port), mask); + if (res) + return res; + } + + return 0; +} + +static int +yt921x_port_up(struct yt921x_priv *priv, int port, unsigned int mode, + phy_interface_t interface, int speed, int duplex, + bool tx_pause, bool rx_pause) +{ + u32 mask; + u32 ctrl; + int res; + + switch (speed) { + case SPEED_10: + ctrl = YT921X_PORT_SPEED_10; + break; + case SPEED_100: + ctrl = YT921X_PORT_SPEED_100; + break; + case SPEED_1000: + ctrl = YT921X_PORT_SPEED_1000; + break; + case SPEED_2500: + ctrl = YT921X_PORT_SPEED_2500; + break; + case SPEED_10000: + ctrl = YT921X_PORT_SPEED_10000; + break; + default: + return -EINVAL; + } + if (duplex == DUPLEX_FULL) + ctrl |= YT921X_PORT_DUPLEX_FULL; + if (tx_pause) + ctrl |= YT921X_PORT_TX_PAUSE; + if (rx_pause) + ctrl |= YT921X_PORT_RX_PAUSE; + ctrl |= YT921X_PORT_RX_MAC_EN | YT921X_PORT_TX_MAC_EN; + res = yt921x_reg_write(priv, YT921X_PORTn_CTRL(port), ctrl); + if (res) + return res; + + if (yt921x_port_is_external(port)) { + mask = YT921X_SERDES_SPEED_M; + switch (speed) { + case SPEED_10: + ctrl = YT921X_SERDES_SPEED_10; + break; + case SPEED_100: + ctrl = YT921X_SERDES_SPEED_100; + break; + case SPEED_1000: + ctrl = YT921X_SERDES_SPEED_1000; + break; + case SPEED_2500: + ctrl = YT921X_SERDES_SPEED_2500; + break; + case SPEED_10000: + ctrl = YT921X_SERDES_SPEED_10000; + break; + default: + return -EINVAL; + } + mask |= YT921X_SERDES_DUPLEX_FULL; + if (duplex == DUPLEX_FULL) + ctrl |= YT921X_SERDES_DUPLEX_FULL; + mask |= YT921X_SERDES_TX_PAUSE; + if (tx_pause) + ctrl |= YT921X_SERDES_TX_PAUSE; + mask |= YT921X_SERDES_RX_PAUSE; + if (rx_pause) + ctrl |= YT921X_SERDES_RX_PAUSE; + mask |= YT921X_SERDES_LINK; + ctrl |= YT921X_SERDES_LINK; + res = yt921x_reg_update_bits(priv, YT921X_SERDESn(port), + mask, ctrl); + if (res) + return res; + + mask = YT921X_XMII_LINK; + res = yt921x_reg_set_bits(priv, YT921X_XMIIn(port), mask); + if (res) + return res; + + switch (speed) { + case SPEED_10: + ctrl = YT921X_MDIO_POLLING_SPEED_10; + break; + case SPEED_100: + ctrl = YT921X_MDIO_POLLING_SPEED_100; + break; + case SPEED_1000: + ctrl = YT921X_MDIO_POLLING_SPEED_1000; + break; + case SPEED_2500: + ctrl = YT921X_MDIO_POLLING_SPEED_2500; + break; + case SPEED_10000: + ctrl = YT921X_MDIO_POLLING_SPEED_10000; + break; + default: + return -EINVAL; + } + if (duplex == DUPLEX_FULL) + ctrl |= YT921X_MDIO_POLLING_DUPLEX_FULL; + ctrl |= YT921X_MDIO_POLLING_LINK; + res = yt921x_reg_write(priv, YT921X_MDIO_POLLINGn(port), ctrl); + if (res) + return res; + } + + return 0; +} + +static int +yt921x_port_config(struct yt921x_priv *priv, int port, unsigned int mode, + phy_interface_t interface) +{ + struct device *dev = to_device(priv); + u32 mask; + u32 ctrl; + int res; + + if (!yt921x_port_is_external(port)) { + if (interface != PHY_INTERFACE_MODE_INTERNAL) { + dev_err(dev, "Wrong mode %d on port %d\n", + interface, port); + return -EINVAL; + } + return 0; + } + + switch (interface) { + /* SERDES */ + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_100BASEX: + case PHY_INTERFACE_MODE_1000BASEX: + case PHY_INTERFACE_MODE_2500BASEX: + mask = YT921X_SERDES_CTRL_PORTn(port); + res = yt921x_reg_set_bits(priv, YT921X_SERDES_CTRL, mask); + if (res) + return res; + + mask = YT921X_XMII_CTRL_PORTn(port); + res = yt921x_reg_clear_bits(priv, YT921X_XMII_CTRL, mask); + if (res) + return res; + + mask = YT921X_SERDES_MODE_M; + switch (interface) { + case PHY_INTERFACE_MODE_SGMII: + ctrl = YT921X_SERDES_MODE_SGMII; + break; + case PHY_INTERFACE_MODE_100BASEX: + ctrl = YT921X_SERDES_MODE_100BASEX; + break; + case PHY_INTERFACE_MODE_1000BASEX: + ctrl = YT921X_SERDES_MODE_1000BASEX; + break; + case PHY_INTERFACE_MODE_2500BASEX: + ctrl = YT921X_SERDES_MODE_2500BASEX; + break; + default: + return -EINVAL; + } + res = yt921x_reg_update_bits(priv, YT921X_SERDESn(port), + mask, ctrl); + if (res) + return res; + + break; + /* add XMII support here */ + default: + return -EINVAL; + } + + return 0; +} + +static void +yt921x_phylink_mac_link_down(struct phylink_config *config, unsigned int mode, + phy_interface_t interface) +{ + struct dsa_port *dp = dsa_phylink_to_port(config); + struct yt921x_priv *priv = to_yt921x_priv(dp->ds); + int port = dp->index; + int res; + + /* No need to sync; port control block is hold until device remove */ + cancel_delayed_work(&priv->ports[port].mib_read); + + mutex_lock(&priv->reg_lock); + res = yt921x_port_down(priv, port); + mutex_unlock(&priv->reg_lock); + + if (res) + dev_err(dp->ds->dev, "Failed to %s port %d: %i\n", "bring down", + port, res); +} + +static void +yt921x_phylink_mac_link_up(struct phylink_config *config, + struct phy_device *phydev, unsigned int mode, + phy_interface_t interface, int speed, int duplex, + bool tx_pause, bool rx_pause) +{ + struct dsa_port *dp = dsa_phylink_to_port(config); + struct yt921x_priv *priv = to_yt921x_priv(dp->ds); + int port = dp->index; + int res; + + mutex_lock(&priv->reg_lock); + res = yt921x_port_up(priv, port, mode, interface, speed, duplex, + tx_pause, rx_pause); + mutex_unlock(&priv->reg_lock); + + if (res) + dev_err(dp->ds->dev, "Failed to %s port %d: %i\n", "bring up", + port, res); + + schedule_delayed_work(&priv->ports[port].mib_read, 0); +} + +static void +yt921x_phylink_mac_config(struct phylink_config *config, unsigned int mode, + const struct phylink_link_state *state) +{ + struct dsa_port *dp = dsa_phylink_to_port(config); + struct yt921x_priv *priv = to_yt921x_priv(dp->ds); + int port = dp->index; + int res; + + mutex_lock(&priv->reg_lock); + res = yt921x_port_config(priv, port, mode, state->interface); + mutex_unlock(&priv->reg_lock); + + if (res) + dev_err(dp->ds->dev, "Failed to %s port %d: %i\n", "config", + port, res); +} + +static void +yt921x_dsa_phylink_get_caps(struct dsa_switch *ds, int port, + struct phylink_config *config) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + const struct yt921x_info *info = priv->info; + + config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | + MAC_10 | MAC_100 | MAC_1000; + + if (info->internal_mask & BIT(port)) { + /* Port 10 for MCU should probably go here too. But since that + * is untested yet, turn it down for the moment by letting it + * fall to the default branch. + */ + __set_bit(PHY_INTERFACE_MODE_INTERNAL, + config->supported_interfaces); + } else if (info->external_mask & BIT(port)) { + /* TODO: external ports may support SERDES only, XMII only, or + * SERDES + XMII depending on the chip. However, we can't get + * the accurate config table due to lack of document, thus + * we simply declare SERDES + XMII and rely on the correctness + * of devicetree for now. + */ + + /* SERDES */ + __set_bit(PHY_INTERFACE_MODE_SGMII, + config->supported_interfaces); + /* REVSGMII (SGMII in PHY role) should go here, once + * PHY_INTERFACE_MODE_REVSGMII is introduced. + */ + __set_bit(PHY_INTERFACE_MODE_100BASEX, + config->supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_1000BASEX, + config->supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_2500BASEX, + config->supported_interfaces); + config->mac_capabilities |= MAC_2500FD; + + /* XMII */ + + /* Not tested. To add support for XMII: + * - Add proper interface modes below + * - Handle them in yt921x_port_config() + */ + } + /* no such port: empty supported_interfaces causes phylink to turn it + * down + */ +} + +static int yt921x_port_setup(struct yt921x_priv *priv, int port) +{ + struct dsa_switch *ds = &priv->ds; + u32 ctrl; + int res; + + res = yt921x_userport_standalone(priv, port); + if (res) + return res; + + if (dsa_is_cpu_port(ds, port)) { + /* Egress of CPU port is supposed to be completely controlled + * via tagging, so set to oneway isolated (drop all packets + * without tag). + */ + ctrl = ~(u32)0; + res = yt921x_reg_write(priv, YT921X_PORTn_ISOLATION(port), + ctrl); + if (res) + return res; + + /* To simplify FDB "isolation" simulation, we also disable + * learning on the CPU port, and let software identify packets + * towarding CPU (either trapped or a static FDB entry is + * matched, no matter which bridge that entry is for), which is + * already done by yt921x_userport_standalone(). As a result, + * VLAN-awareness becomes unrelated on the CPU port (set to + * VLAN-unaware by the way). + */ + } + + return 0; +} + +static enum dsa_tag_protocol +yt921x_dsa_get_tag_protocol(struct dsa_switch *ds, int port, + enum dsa_tag_protocol m) +{ + return DSA_TAG_PROTO_YT921X; +} + +static int yt921x_dsa_port_setup(struct dsa_switch *ds, int port) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + int res; + + mutex_lock(&priv->reg_lock); + res = yt921x_port_setup(priv, port); + mutex_unlock(&priv->reg_lock); + + return res; +} + +static int yt921x_edata_wait(struct yt921x_priv *priv, u32 *valp) +{ + u32 val = YT921X_EDATA_DATA_IDLE; + int res; + + res = yt921x_reg_wait(priv, YT921X_EDATA_DATA, + YT921X_EDATA_DATA_STATUS_M, &val); + if (res) + return res; + + *valp = val; + return 0; +} + +static int +yt921x_edata_read_cont(struct yt921x_priv *priv, u8 addr, u8 *valp) +{ + u32 ctrl; + u32 val; + int res; + + ctrl = YT921X_EDATA_CTRL_ADDR(addr) | YT921X_EDATA_CTRL_READ; + res = yt921x_reg_write(priv, YT921X_EDATA_CTRL, ctrl); + if (res) + return res; + res = yt921x_edata_wait(priv, &val); + if (res) + return res; + + *valp = FIELD_GET(YT921X_EDATA_DATA_DATA_M, val); + return 0; +} + +static int yt921x_edata_read(struct yt921x_priv *priv, u8 addr, u8 *valp) +{ + u32 val; + int res; + + res = yt921x_edata_wait(priv, &val); + if (res) + return res; + return yt921x_edata_read_cont(priv, addr, valp); +} + +static int yt921x_chip_detect(struct yt921x_priv *priv) +{ + struct device *dev = to_device(priv); + const struct yt921x_info *info; + u8 extmode; + u32 chipid; + u32 major; + u32 mode; + int res; + + res = yt921x_reg_read(priv, YT921X_CHIP_ID, &chipid); + if (res) + return res; + + major = FIELD_GET(YT921X_CHIP_ID_MAJOR, chipid); + + for (info = yt921x_infos; info->name; info++) + if (info->major == major) + break; + if (!info->name) { + dev_err(dev, "Unexpected chipid 0x%x\n", chipid); + return -ENODEV; + } + + res = yt921x_reg_read(priv, YT921X_CHIP_MODE, &mode); + if (res) + return res; + res = yt921x_edata_read(priv, YT921X_EDATA_EXTMODE, &extmode); + if (res) + return res; + + for (; info->name; info++) + if (info->major == major && info->mode == mode && + info->extmode == extmode) + break; + if (!info->name) { + dev_err(dev, + "Unsupported chipid 0x%x with chipmode 0x%x 0x%x\n", + chipid, mode, extmode); + return -ENODEV; + } + + /* Print chipid here since we are interested in lower 16 bits */ + dev_info(dev, + "Motorcomm %s ethernet switch, chipid: 0x%x, chipmode: 0x%x 0x%x\n", + info->name, chipid, mode, extmode); + + priv->info = info; + return 0; +} + +static int yt921x_chip_reset(struct yt921x_priv *priv) +{ + struct device *dev = to_device(priv); + u16 eth_p_tag; + u32 val; + int res; + + res = yt921x_chip_detect(priv); + if (res) + return res; + + /* Reset */ + res = yt921x_reg_write(priv, YT921X_RST, YT921X_RST_HW); + if (res) + return res; + + /* RST_HW is almost same as GPIO hard reset, so we need this delay. */ + fsleep(YT921X_RST_DELAY_US); + + val = 0; + res = yt921x_reg_wait(priv, YT921X_RST, ~0, &val); + if (res) + return res; + + /* Check for tag EtherType; do it after reset in case you messed it up + * before. + */ + res = yt921x_reg_read(priv, YT921X_CPU_TAG_TPID, &val); + if (res) + return res; + eth_p_tag = FIELD_GET(YT921X_CPU_TAG_TPID_TPID_M, val); + if (eth_p_tag != ETH_P_YT921X) { + dev_err(dev, "Tag type 0x%x != 0x%x\n", eth_p_tag, + ETH_P_YT921X); + /* Despite being possible, we choose not to set CPU_TAG_TPID, + * since there is no way it can be different unless you have the + * wrong chip. + */ + return -EINVAL; + } + + return 0; +} + +static int yt921x_chip_setup(struct yt921x_priv *priv) +{ + struct dsa_switch *ds = &priv->ds; + unsigned long cpu_ports_mask; + u64 ctrl64; + u32 ctrl; + int port; + int res; + + /* Enable DSA */ + priv->cpu_ports_mask = dsa_cpu_ports(ds); + + ctrl = YT921X_EXT_CPU_PORT_TAG_EN | YT921X_EXT_CPU_PORT_PORT_EN | + YT921X_EXT_CPU_PORT_PORT(__ffs(priv->cpu_ports_mask)); + res = yt921x_reg_write(priv, YT921X_EXT_CPU_PORT, ctrl); + if (res) + return res; + + /* Enable and clear MIB */ + res = yt921x_reg_set_bits(priv, YT921X_FUNC, YT921X_FUNC_MIB); + if (res) + return res; + + ctrl = YT921X_MIB_CTRL_CLEAN | YT921X_MIB_CTRL_ALL_PORT; + res = yt921x_reg_write(priv, YT921X_MIB_CTRL, ctrl); + if (res) + return res; + + /* Setup software switch */ + ctrl = YT921X_CPU_COPY_TO_EXT_CPU; + res = yt921x_reg_write(priv, YT921X_CPU_COPY, ctrl); + if (res) + return res; + + ctrl = GENMASK(10, 0); + res = yt921x_reg_write(priv, YT921X_FILTER_UNK_UCAST, ctrl); + if (res) + return res; + res = yt921x_reg_write(priv, YT921X_FILTER_UNK_MCAST, ctrl); + if (res) + return res; + + /* YT921x does not support native DSA port bridging, so we use port + * isolation to emulate it. However, be especially careful that port + * isolation takes _after_ FDB lookups, i.e. if an FDB entry (from + * another bridge) is matched and the destination port (in another + * bridge) is blocked, the packet will be dropped instead of flooding to + * the "bridged" ports, thus we need to trap and handle those packets by + * software. + * + * If there is no more than one bridge, we might be able to drop them + * directly given some conditions are met, but we trap them in all cases + * for now. + */ + ctrl = 0; + for (int i = 0; i < YT921X_PORT_NUM; i++) + ctrl |= YT921X_ACT_UNK_ACTn_TRAP(i); + /* Except for CPU ports, if any packets are sent via CPU ports without + * tag, they should be dropped. + */ + cpu_ports_mask = priv->cpu_ports_mask; + for_each_set_bit(port, &cpu_ports_mask, YT921X_PORT_NUM) { + ctrl &= ~YT921X_ACT_UNK_ACTn_M(port); + ctrl |= YT921X_ACT_UNK_ACTn_DROP(port); + } + res = yt921x_reg_write(priv, YT921X_ACT_UNK_UCAST, ctrl); + if (res) + return res; + res = yt921x_reg_write(priv, YT921X_ACT_UNK_MCAST, ctrl); + if (res) + return res; + + /* Tagged VID 0 should be treated as untagged, which confuses the + * hardware a lot + */ + ctrl64 = YT921X_VLAN_CTRL_LEARN_DIS | YT921X_VLAN_CTRL_PORTS_M; + res = yt921x_reg64_write(priv, YT921X_VLANn_CTRL(0), ctrl64); + if (res) + return res; + + /* Miscellaneous */ + res = yt921x_reg_set_bits(priv, YT921X_SENSOR, YT921X_SENSOR_TEMP); + if (res) + return res; + + return 0; +} + +static int yt921x_dsa_setup(struct dsa_switch *ds) +{ + struct yt921x_priv *priv = to_yt921x_priv(ds); + struct device *dev = to_device(priv); + struct device_node *np = dev->of_node; + struct device_node *child; + int res; + + mutex_lock(&priv->reg_lock); + res = yt921x_chip_reset(priv); + mutex_unlock(&priv->reg_lock); + + if (res) + return res; + + /* Register the internal mdio bus. Nodes for internal ports should have + * proper phy-handle pointing to their PHYs. Not enabling the internal + * bus is possible, though pretty wired, if internal ports are not used. + */ + child = of_get_child_by_name(np, "mdio"); + if (child) { + res = yt921x_mbus_int_init(priv, child); + of_node_put(child); + if (res) + return res; + } + + /* External mdio bus is optional */ + child = of_get_child_by_name(np, "mdio-external"); + if (child) { + res = yt921x_mbus_ext_init(priv, child); + of_node_put(child); + if (res) + return res; + + dev_err(dev, "Untested external mdio bus\n"); + return -ENODEV; + } + + mutex_lock(&priv->reg_lock); + res = yt921x_chip_setup(priv); + mutex_unlock(&priv->reg_lock); + + if (res) + return res; + + return 0; +} + +static const struct phylink_mac_ops yt921x_phylink_mac_ops = { + .mac_link_down = yt921x_phylink_mac_link_down, + .mac_link_up = yt921x_phylink_mac_link_up, + .mac_config = yt921x_phylink_mac_config, +}; + +static const struct dsa_switch_ops yt921x_dsa_switch_ops = { + /* mib */ + .get_strings = yt921x_dsa_get_strings, + .get_ethtool_stats = yt921x_dsa_get_ethtool_stats, + .get_sset_count = yt921x_dsa_get_sset_count, + .get_eth_mac_stats = yt921x_dsa_get_eth_mac_stats, + .get_eth_ctrl_stats = yt921x_dsa_get_eth_ctrl_stats, + .get_rmon_stats = yt921x_dsa_get_rmon_stats, + .get_stats64 = yt921x_dsa_get_stats64, + .get_pause_stats = yt921x_dsa_get_pause_stats, + /* eee */ + .support_eee = dsa_supports_eee, + .set_mac_eee = yt921x_dsa_set_mac_eee, + /* mtu */ + .port_change_mtu = yt921x_dsa_port_change_mtu, + .port_max_mtu = yt921x_dsa_port_max_mtu, + /* mirror */ + .port_mirror_del = yt921x_dsa_port_mirror_del, + .port_mirror_add = yt921x_dsa_port_mirror_add, + /* fdb */ + .port_fdb_dump = yt921x_dsa_port_fdb_dump, + .port_fast_age = yt921x_dsa_port_fast_age, + .set_ageing_time = yt921x_dsa_set_ageing_time, + .port_fdb_del = yt921x_dsa_port_fdb_del, + .port_fdb_add = yt921x_dsa_port_fdb_add, + .port_mdb_del = yt921x_dsa_port_mdb_del, + .port_mdb_add = yt921x_dsa_port_mdb_add, + /* vlan */ + .port_vlan_filtering = yt921x_dsa_port_vlan_filtering, + .port_vlan_del = yt921x_dsa_port_vlan_del, + .port_vlan_add = yt921x_dsa_port_vlan_add, + /* bridge */ + .port_pre_bridge_flags = yt921x_dsa_port_pre_bridge_flags, + .port_bridge_flags = yt921x_dsa_port_bridge_flags, + .port_bridge_leave = yt921x_dsa_port_bridge_leave, + .port_bridge_join = yt921x_dsa_port_bridge_join, + /* port */ + .get_tag_protocol = yt921x_dsa_get_tag_protocol, + .phylink_get_caps = yt921x_dsa_phylink_get_caps, + .port_setup = yt921x_dsa_port_setup, + /* chip */ + .setup = yt921x_dsa_setup, +}; + +static void yt921x_mdio_shutdown(struct mdio_device *mdiodev) +{ + struct yt921x_priv *priv = mdiodev_get_drvdata(mdiodev); + + if (!priv) + return; + + dsa_switch_shutdown(&priv->ds); +} + +static void yt921x_mdio_remove(struct mdio_device *mdiodev) +{ + struct yt921x_priv *priv = mdiodev_get_drvdata(mdiodev); + + if (!priv) + return; + + for (size_t i = ARRAY_SIZE(priv->ports); i-- > 0; ) { + struct yt921x_port *pp = &priv->ports[i]; + + disable_delayed_work_sync(&pp->mib_read); + } + + dsa_unregister_switch(&priv->ds); + + mutex_destroy(&priv->reg_lock); +} + +static int yt921x_mdio_probe(struct mdio_device *mdiodev) +{ + struct device *dev = &mdiodev->dev; + struct yt921x_reg_mdio *mdio; + struct yt921x_priv *priv; + struct dsa_switch *ds; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + mdio = devm_kzalloc(dev, sizeof(*mdio), GFP_KERNEL); + if (!mdio) + return -ENOMEM; + + mdio->bus = mdiodev->bus; + mdio->addr = mdiodev->addr; + mdio->switchid = 0; + + mutex_init(&priv->reg_lock); + + priv->reg_ops = &yt921x_reg_ops_mdio; + priv->reg_ctx = mdio; + + for (size_t i = 0; i < ARRAY_SIZE(priv->ports); i++) { + struct yt921x_port *pp = &priv->ports[i]; + + pp->index = i; + INIT_DELAYED_WORK(&pp->mib_read, yt921x_poll_mib); + } + + ds = &priv->ds; + ds->dev = dev; + ds->assisted_learning_on_cpu_port = true; + ds->priv = priv; + ds->ops = &yt921x_dsa_switch_ops; + ds->phylink_mac_ops = &yt921x_phylink_mac_ops; + ds->num_ports = YT921X_PORT_NUM; + + mdiodev_set_drvdata(mdiodev, priv); + + return dsa_register_switch(ds); +} + +static const struct of_device_id yt921x_of_match[] = { + { .compatible = "motorcomm,yt9215" }, + {} +}; +MODULE_DEVICE_TABLE(of, yt921x_of_match); + +static struct mdio_driver yt921x_mdio_driver = { + .probe = yt921x_mdio_probe, + .remove = yt921x_mdio_remove, + .shutdown = yt921x_mdio_shutdown, + .mdiodrv.driver = { + .name = YT921X_NAME, + .of_match_table = yt921x_of_match, + }, +}; + +mdio_module_driver(yt921x_mdio_driver); + +MODULE_AUTHOR("David Yang "); +MODULE_DESCRIPTION("Driver for Motorcomm YT921x Switch"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/dsa/yt921x.h b/drivers/net/dsa/yt921x.h new file mode 100644 index 0000000000000..3e85d90826fb5 --- /dev/null +++ b/drivers/net/dsa/yt921x.h @@ -0,0 +1,504 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2025 David Yang + */ + +#ifndef __YT921X_H +#define __YT921X_H + +#include + +#define YT921X_SMI_SWITCHID_M GENMASK(3, 2) +#define YT921X_SMI_SWITCHID(x) FIELD_PREP(YT921X_SMI_SWITCHID_M, (x)) +#define YT921X_SMI_AD BIT(1) +#define YT921X_SMI_ADDR 0 +#define YT921X_SMI_DATA YT921X_SMI_AD +#define YT921X_SMI_RW BIT(0) +#define YT921X_SMI_WRITE 0 +#define YT921X_SMI_READ YT921X_SMI_RW + +#define YT921X_SWITCHID_NUM 4 + +#define YT921X_RST 0x80000 +#define YT921X_RST_HW BIT(31) +#define YT921X_RST_SW BIT(1) +#define YT921X_FUNC 0x80004 +#define YT921X_FUNC_MIB BIT(1) +#define YT921X_CHIP_ID 0x80008 +#define YT921X_CHIP_ID_MAJOR GENMASK(31, 16) +#define YT921X_EXT_CPU_PORT 0x8000c +#define YT921X_EXT_CPU_PORT_TAG_EN BIT(15) +#define YT921X_EXT_CPU_PORT_PORT_EN BIT(14) +#define YT921X_EXT_CPU_PORT_PORT_M GENMASK(3, 0) +#define YT921X_EXT_CPU_PORT_PORT(x) FIELD_PREP(YT921X_EXT_CPU_PORT_PORT_M, (x)) +#define YT921X_CPU_TAG_TPID 0x80010 +#define YT921X_CPU_TAG_TPID_TPID_M GENMASK(15, 0) +/* Same as ETH_P_YT921X, but this represents the true HW default, while the + * former is a local convention chosen by us. + */ +#define YT921X_CPU_TAG_TPID_TPID_DEFAULT 0x9988 +#define YT921X_PVID_SEL 0x80014 +#define YT921X_PVID_SEL_SVID_PORTn(port) BIT(port) +#define YT921X_SERDES_CTRL 0x80028 +#define YT921X_SERDES_CTRL_PORTn_TEST(port) BIT((port) - 3) +#define YT921X_SERDES_CTRL_PORTn(port) BIT((port) - 8) +#define YT921X_IO_LEVEL 0x80030 +#define YT9215_IO_LEVEL_NORMAL_M GENMASK(5, 4) +#define YT9215_IO_LEVEL_NORMAL(x) FIELD_PREP(YT9215_IO_LEVEL_NORMAL_M, (x)) +#define YT9215_IO_LEVEL_NORMAL_3V3 YT9215_IO_LEVEL_NORMAL(0) +#define YT9215_IO_LEVEL_NORMAL_1V8 YT9215_IO_LEVEL_NORMAL(3) +#define YT9215_IO_LEVEL_RGMII1_M GENMASK(3, 2) +#define YT9215_IO_LEVEL_RGMII1(x) FIELD_PREP(YT9215_IO_LEVEL_RGMII1_M, (x)) +#define YT9215_IO_LEVEL_RGMII1_3V3 YT9215_IO_LEVEL_RGMII1(0) +#define YT9215_IO_LEVEL_RGMII1_2V5 YT9215_IO_LEVEL_RGMII1(1) +#define YT9215_IO_LEVEL_RGMII1_1V8 YT9215_IO_LEVEL_RGMII1(2) +#define YT9215_IO_LEVEL_RGMII0_M GENMASK(1, 0) +#define YT9215_IO_LEVEL_RGMII0(x) FIELD_PREP(YT9215_IO_LEVEL_RGMII0_M, (x)) +#define YT9215_IO_LEVEL_RGMII0_3V3 YT9215_IO_LEVEL_RGMII0(0) +#define YT9215_IO_LEVEL_RGMII0_2V5 YT9215_IO_LEVEL_RGMII0(1) +#define YT9215_IO_LEVEL_RGMII0_1V8 YT9215_IO_LEVEL_RGMII0(2) +#define YT9218_IO_LEVEL_RGMII1_M GENMASK(5, 4) +#define YT9218_IO_LEVEL_RGMII1(x) FIELD_PREP(YT9218_IO_LEVEL_RGMII1_M, (x)) +#define YT9218_IO_LEVEL_RGMII1_3V3 YT9218_IO_LEVEL_RGMII1(0) +#define YT9218_IO_LEVEL_RGMII1_2V5 YT9218_IO_LEVEL_RGMII1(1) +#define YT9218_IO_LEVEL_RGMII1_1V8 YT9218_IO_LEVEL_RGMII1(2) +#define YT9218_IO_LEVEL_RGMII0_M GENMASK(3, 2) +#define YT9218_IO_LEVEL_RGMII0(x) FIELD_PREP(YT9218_IO_LEVEL_RGMII0_M, (x)) +#define YT9218_IO_LEVEL_RGMII0_3V3 YT9218_IO_LEVEL_RGMII0(0) +#define YT9218_IO_LEVEL_RGMII0_2V5 YT9218_IO_LEVEL_RGMII0(1) +#define YT9218_IO_LEVEL_RGMII0_1V8 YT9218_IO_LEVEL_RGMII0(2) +#define YT9218_IO_LEVEL_NORMAL_M GENMASK(1, 0) +#define YT9218_IO_LEVEL_NORMAL(x) FIELD_PREP(YT9218_IO_LEVEL_NORMAL_M, (x)) +#define YT9218_IO_LEVEL_NORMAL_3V3 YT9218_IO_LEVEL_NORMAL(0) +#define YT9218_IO_LEVEL_NORMAL_1V8 YT9218_IO_LEVEL_NORMAL(3) +#define YT921X_MAC_ADDR_HI2 0x80080 +#define YT921X_MAC_ADDR_LO4 0x80084 +#define YT921X_SERDESn(port) (0x8008c + 4 * ((port) - 8)) +#define YT921X_SERDES_MODE_M GENMASK(9, 7) +#define YT921X_SERDES_MODE(x) FIELD_PREP(YT921X_SERDES_MODE_M, (x)) +#define YT921X_SERDES_MODE_SGMII YT921X_SERDES_MODE(0) +#define YT921X_SERDES_MODE_REVSGMII YT921X_SERDES_MODE(1) +#define YT921X_SERDES_MODE_1000BASEX YT921X_SERDES_MODE(2) +#define YT921X_SERDES_MODE_100BASEX YT921X_SERDES_MODE(3) +#define YT921X_SERDES_MODE_2500BASEX YT921X_SERDES_MODE(4) +#define YT921X_SERDES_RX_PAUSE BIT(6) +#define YT921X_SERDES_TX_PAUSE BIT(5) +#define YT921X_SERDES_LINK BIT(4) /* force link */ +#define YT921X_SERDES_DUPLEX_FULL BIT(3) +#define YT921X_SERDES_SPEED_M GENMASK(2, 0) +#define YT921X_SERDES_SPEED(x) FIELD_PREP(YT921X_SERDES_SPEED_M, (x)) +#define YT921X_SERDES_SPEED_10 YT921X_SERDES_SPEED(0) +#define YT921X_SERDES_SPEED_100 YT921X_SERDES_SPEED(1) +#define YT921X_SERDES_SPEED_1000 YT921X_SERDES_SPEED(2) +#define YT921X_SERDES_SPEED_10000 YT921X_SERDES_SPEED(3) +#define YT921X_SERDES_SPEED_2500 YT921X_SERDES_SPEED(4) +#define YT921X_PORTn_CTRL(port) (0x80100 + 4 * (port)) +#define YT921X_PORT_CTRL_PAUSE_AN BIT(10) +#define YT921X_PORTn_STATUS(port) (0x80200 + 4 * (port)) +#define YT921X_PORT_LINK BIT(9) /* CTRL: auto negotiation */ +#define YT921X_PORT_HALF_PAUSE BIT(8) /* Half-duplex back pressure mode */ +#define YT921X_PORT_DUPLEX_FULL BIT(7) +#define YT921X_PORT_RX_PAUSE BIT(6) +#define YT921X_PORT_TX_PAUSE BIT(5) +#define YT921X_PORT_RX_MAC_EN BIT(4) +#define YT921X_PORT_TX_MAC_EN BIT(3) +#define YT921X_PORT_SPEED_M GENMASK(2, 0) +#define YT921X_PORT_SPEED(x) FIELD_PREP(YT921X_PORT_SPEED_M, (x)) +#define YT921X_PORT_SPEED_10 YT921X_PORT_SPEED(0) +#define YT921X_PORT_SPEED_100 YT921X_PORT_SPEED(1) +#define YT921X_PORT_SPEED_1000 YT921X_PORT_SPEED(2) +#define YT921X_PORT_SPEED_10000 YT921X_PORT_SPEED(3) +#define YT921X_PORT_SPEED_2500 YT921X_PORT_SPEED(4) +#define YT921X_PON_STRAP_FUNC 0x80320 +#define YT921X_PON_STRAP_VAL 0x80324 +#define YT921X_PON_STRAP_CAP 0x80328 +#define YT921X_PON_STRAP_EEE BIT(16) +#define YT921X_PON_STRAP_LOOP_DETECT BIT(7) +#define YT921X_MDIO_POLLINGn(port) (0x80364 + 4 * ((port) - 8)) +#define YT921X_MDIO_POLLING_DUPLEX_FULL BIT(4) +#define YT921X_MDIO_POLLING_LINK BIT(3) +#define YT921X_MDIO_POLLING_SPEED_M GENMASK(2, 0) +#define YT921X_MDIO_POLLING_SPEED(x) FIELD_PREP(YT921X_MDIO_POLLING_SPEED_M, (x)) +#define YT921X_MDIO_POLLING_SPEED_10 YT921X_MDIO_POLLING_SPEED(0) +#define YT921X_MDIO_POLLING_SPEED_100 YT921X_MDIO_POLLING_SPEED(1) +#define YT921X_MDIO_POLLING_SPEED_1000 YT921X_MDIO_POLLING_SPEED(2) +#define YT921X_MDIO_POLLING_SPEED_10000 YT921X_MDIO_POLLING_SPEED(3) +#define YT921X_MDIO_POLLING_SPEED_2500 YT921X_MDIO_POLLING_SPEED(4) +#define YT921X_SENSOR 0x8036c +#define YT921X_SENSOR_TEMP BIT(18) +#define YT921X_TEMP 0x80374 +#define YT921X_CHIP_MODE 0x80388 +#define YT921X_CHIP_MODE_MODE GENMASK(1, 0) +#define YT921X_XMII_CTRL 0x80394 +#define YT921X_XMII_CTRL_PORTn(port) BIT(9 - (port)) /* Yes, it's reversed */ +#define YT921X_XMIIn(port) (0x80400 + 8 * ((port) - 8)) +#define YT921X_XMII_MODE_M GENMASK(31, 29) +#define YT921X_XMII_MODE(x) FIELD_PREP(YT921X_XMII_MODE_M, (x)) +#define YT921X_XMII_MODE_MII YT921X_XMII_MODE(0) +#define YT921X_XMII_MODE_REVMII YT921X_XMII_MODE(1) +#define YT921X_XMII_MODE_RMII YT921X_XMII_MODE(2) +#define YT921X_XMII_MODE_REVRMII YT921X_XMII_MODE(3) +#define YT921X_XMII_MODE_RGMII YT921X_XMII_MODE(4) +#define YT921X_XMII_MODE_DISABLE YT921X_XMII_MODE(5) +#define YT921X_XMII_LINK BIT(19) /* force link */ +#define YT921X_XMII_EN BIT(18) +#define YT921X_XMII_SOFT_RST BIT(17) +#define YT921X_XMII_RGMII_TX_DELAY_150PS_M GENMASK(16, 13) +#define YT921X_XMII_RGMII_TX_DELAY_150PS(x) FIELD_PREP(YT921X_XMII_RGMII_TX_DELAY_150PS_M, (x)) +#define YT921X_XMII_TX_CLK_IN BIT(11) +#define YT921X_XMII_RX_CLK_IN BIT(10) +#define YT921X_XMII_RGMII_TX_DELAY_2NS BIT(8) +#define YT921X_XMII_RGMII_TX_CLK_OUT BIT(7) +#define YT921X_XMII_RGMII_RX_DELAY_150PS_M GENMASK(6, 3) +#define YT921X_XMII_RGMII_RX_DELAY_150PS(x) FIELD_PREP(YT921X_XMII_RGMII_RX_DELAY_150PS_M, (x)) +#define YT921X_XMII_RMII_PHY_TX_CLK_OUT BIT(2) +#define YT921X_XMII_REVMII_TX_CLK_OUT BIT(1) +#define YT921X_XMII_REVMII_RX_CLK_OUT BIT(0) + +#define YT921X_MACn_FRAME(port) (0x81008 + 0x1000 * (port)) +#define YT921X_MAC_FRAME_SIZE_M GENMASK(21, 8) +#define YT921X_MAC_FRAME_SIZE(x) FIELD_PREP(YT921X_MAC_FRAME_SIZE_M, (x)) + +#define YT921X_EEEn_VAL(port) (0xa0000 + 0x40 * (port)) +#define YT921X_EEE_VAL_DATA BIT(1) + +#define YT921X_EEE_CTRL 0xb0000 +#define YT921X_EEE_CTRL_ENn(port) BIT(port) + +#define YT921X_MIB_CTRL 0xc0004 +#define YT921X_MIB_CTRL_CLEAN BIT(30) +#define YT921X_MIB_CTRL_PORT_M GENMASK(6, 3) +#define YT921X_MIB_CTRL_PORT(x) FIELD_PREP(YT921X_MIB_CTRL_PORT_M, (x)) +#define YT921X_MIB_CTRL_ONE_PORT BIT(1) +#define YT921X_MIB_CTRL_ALL_PORT BIT(0) +#define YT921X_MIBn_DATA0(port) (0xc0100 + 0x100 * (port)) +#define YT921X_MIBn_DATAm(port, x) (YT921X_MIBn_DATA0(port) + 4 * (x)) + +#define YT921X_EDATA_CTRL 0xe0000 +#define YT921X_EDATA_CTRL_ADDR_M GENMASK(15, 8) +#define YT921X_EDATA_CTRL_ADDR(x) FIELD_PREP(YT921X_EDATA_CTRL_ADDR_M, (x)) +#define YT921X_EDATA_CTRL_OP_M GENMASK(3, 0) +#define YT921X_EDATA_CTRL_OP(x) FIELD_PREP(YT921X_EDATA_CTRL_OP_M, (x)) +#define YT921X_EDATA_CTRL_READ YT921X_EDATA_CTRL_OP(5) +#define YT921X_EDATA_DATA 0xe0004 +#define YT921X_EDATA_DATA_DATA_M GENMASK(31, 24) +#define YT921X_EDATA_DATA_STATUS_M GENMASK(3, 0) +#define YT921X_EDATA_DATA_STATUS(x) FIELD_PREP(YT921X_EDATA_DATA_STATUS_M, (x)) +#define YT921X_EDATA_DATA_IDLE YT921X_EDATA_DATA_STATUS(3) + +#define YT921X_EXT_MBUS_OP 0x6a000 +#define YT921X_INT_MBUS_OP 0xf0000 +#define YT921X_MBUS_OP_START BIT(0) +#define YT921X_EXT_MBUS_CTRL 0x6a004 +#define YT921X_INT_MBUS_CTRL 0xf0004 +#define YT921X_MBUS_CTRL_PORT_M GENMASK(25, 21) +#define YT921X_MBUS_CTRL_PORT(x) FIELD_PREP(YT921X_MBUS_CTRL_PORT_M, (x)) +#define YT921X_MBUS_CTRL_REG_M GENMASK(20, 16) +#define YT921X_MBUS_CTRL_REG(x) FIELD_PREP(YT921X_MBUS_CTRL_REG_M, (x)) +#define YT921X_MBUS_CTRL_TYPE_M GENMASK(11, 8) /* wild guess */ +#define YT921X_MBUS_CTRL_TYPE(x) FIELD_PREP(YT921X_MBUS_CTRL_TYPE_M, (x)) +#define YT921X_MBUS_CTRL_TYPE_C22 YT921X_MBUS_CTRL_TYPE(4) +#define YT921X_MBUS_CTRL_OP_M GENMASK(3, 2) /* wild guess */ +#define YT921X_MBUS_CTRL_OP(x) FIELD_PREP(YT921X_MBUS_CTRL_OP_M, (x)) +#define YT921X_MBUS_CTRL_WRITE YT921X_MBUS_CTRL_OP(1) +#define YT921X_MBUS_CTRL_READ YT921X_MBUS_CTRL_OP(2) +#define YT921X_EXT_MBUS_DOUT 0x6a008 +#define YT921X_INT_MBUS_DOUT 0xf0008 +#define YT921X_EXT_MBUS_DIN 0x6a00c +#define YT921X_INT_MBUS_DIN 0xf000c + +#define YT921X_PORTn_EGR(port) (0x100000 + 4 * (port)) +#define YT921X_PORT_EGR_TPID_CTAG_M GENMASK(5, 4) +#define YT921X_PORT_EGR_TPID_CTAG(x) FIELD_PREP(YT921X_PORT_EGR_TPID_CTAG_M, (x)) +#define YT921X_PORT_EGR_TPID_STAG_M GENMASK(3, 2) +#define YT921X_PORT_EGR_TPID_STAG(x) FIELD_PREP(YT921X_PORT_EGR_TPID_STAG_M, (x)) +#define YT921X_TPID_EGRn(x) (0x100300 + 4 * (x)) /* [0, 3] */ +#define YT921X_TPID_EGR_TPID_M GENMASK(15, 0) + +#define YT921X_VLAN_IGR_FILTER 0x180280 +#define YT921X_VLAN_IGR_FILTER_PORTn_BYPASS_IGMP(port) BIT((port) + 11) +#define YT921X_VLAN_IGR_FILTER_PORTn(port) BIT(port) +#define YT921X_PORTn_ISOLATION(port) (0x180294 + 4 * (port)) +#define YT921X_PORT_ISOLATION_BLOCKn(port) BIT(port) +#define YT921X_PORTn_LEARN(port) (0x1803d0 + 4 * (port)) +#define YT921X_PORT_LEARN_VID_LEARN_MULTI_EN BIT(22) +#define YT921X_PORT_LEARN_VID_LEARN_MODE BIT(21) +#define YT921X_PORT_LEARN_VID_LEARN_EN BIT(20) +#define YT921X_PORT_LEARN_SUSPEND_COPY_EN BIT(19) +#define YT921X_PORT_LEARN_SUSPEND_DROP_EN BIT(18) +#define YT921X_PORT_LEARN_DIS BIT(17) +#define YT921X_PORT_LEARN_LIMIT_EN BIT(16) +#define YT921X_PORT_LEARN_LIMIT_M GENMASK(15, 8) +#define YT921X_PORT_LEARN_LIMIT(x) FIELD_PREP(YT921X_PORT_LEARN_LIMIT_M, (x)) +#define YT921X_PORT_LEARN_DROP_ON_EXCEEDED BIT(2) +#define YT921X_PORT_LEARN_MODE_M GENMASK(1, 0) +#define YT921X_PORT_LEARN_MODE(x) FIELD_PREP(YT921X_PORT_LEARN_MODE_M, (x)) +#define YT921X_PORT_LEARN_MODE_AUTO YT921X_PORT_LEARN_MODE(0) +#define YT921X_PORT_LEARN_MODE_AUTO_AND_COPY YT921X_PORT_LEARN_MODE(1) +#define YT921X_PORT_LEARN_MODE_CPU_CONTROL YT921X_PORT_LEARN_MODE(2) +#define YT921X_AGEING 0x180440 +#define YT921X_AGEING_INTERVAL_M GENMASK(15, 0) +#define YT921X_FDB_IN0 0x180454 +#define YT921X_FDB_IN1 0x180458 +#define YT921X_FDB_IN2 0x18045c +#define YT921X_FDB_OP 0x180460 +#define YT921X_FDB_OP_INDEX_M GENMASK(22, 11) +#define YT921X_FDB_OP_INDEX(x) FIELD_PREP(YT921X_FDB_OP_INDEX_M, (x)) +#define YT921X_FDB_OP_MODE_INDEX BIT(10) /* mac+fid / index */ +#define YT921X_FDB_OP_FLUSH_MCAST BIT(9) /* ucast / mcast */ +#define YT921X_FDB_OP_FLUSH_M GENMASK(8, 7) +#define YT921X_FDB_OP_FLUSH(x) FIELD_PREP(YT921X_FDB_OP_FLUSH_M, (x)) +#define YT921X_FDB_OP_FLUSH_ALL YT921X_FDB_OP_FLUSH(0) +#define YT921X_FDB_OP_FLUSH_PORT YT921X_FDB_OP_FLUSH(1) +#define YT921X_FDB_OP_FLUSH_PORT_VID YT921X_FDB_OP_FLUSH(2) +#define YT921X_FDB_OP_FLUSH_VID YT921X_FDB_OP_FLUSH(3) +#define YT921X_FDB_OP_FLUSH_STATIC BIT(6) +#define YT921X_FDB_OP_NEXT_TYPE_M GENMASK(5, 4) +#define YT921X_FDB_OP_NEXT_TYPE(x) FIELD_PREP(YT921X_FDB_OP_NEXT_TYPE_M, (x)) +#define YT921X_FDB_OP_NEXT_TYPE_UCAST_PORT YT921X_FDB_OP_NEXT_TYPE(0) +#define YT921X_FDB_OP_NEXT_TYPE_UCAST_VID YT921X_FDB_OP_NEXT_TYPE(1) +#define YT921X_FDB_OP_NEXT_TYPE_UCAST YT921X_FDB_OP_NEXT_TYPE(2) +#define YT921X_FDB_OP_NEXT_TYPE_MCAST YT921X_FDB_OP_NEXT_TYPE(3) +#define YT921X_FDB_OP_OP_M GENMASK(3, 1) +#define YT921X_FDB_OP_OP(x) FIELD_PREP(YT921X_FDB_OP_OP_M, (x)) +#define YT921X_FDB_OP_OP_ADD YT921X_FDB_OP_OP(0) +#define YT921X_FDB_OP_OP_DEL YT921X_FDB_OP_OP(1) +#define YT921X_FDB_OP_OP_GET_ONE YT921X_FDB_OP_OP(2) +#define YT921X_FDB_OP_OP_GET_NEXT YT921X_FDB_OP_OP(3) +#define YT921X_FDB_OP_OP_FLUSH YT921X_FDB_OP_OP(4) +#define YT921X_FDB_OP_START BIT(0) +#define YT921X_FDB_RESULT 0x180464 +#define YT921X_FDB_RESULT_DONE BIT(15) +#define YT921X_FDB_RESULT_NOTFOUND BIT(14) +#define YT921X_FDB_RESULT_OVERWRITED BIT(13) +#define YT921X_FDB_RESULT_INDEX_M GENMASK(11, 0) +#define YT921X_FDB_RESULT_INDEX(x) FIELD_PREP(YT921X_FDB_RESULT_INDEX_M, (x)) +#define YT921X_FDB_OUT0 0x1804b0 +#define YT921X_FDB_IO0_ADDR_HI4_M GENMASK(31, 0) +#define YT921X_FDB_OUT1 0x1804b4 +#define YT921X_FDB_IO1_EGR_INT_PRI_EN BIT(31) +#define YT921X_FDB_IO1_STATUS_M GENMASK(30, 28) +#define YT921X_FDB_IO1_STATUS(x) FIELD_PREP(YT921X_FDB_IO1_STATUS_M, (x)) +#define YT921X_FDB_IO1_STATUS_INVALID YT921X_FDB_IO1_STATUS(0) +#define YT921X_FDB_IO1_STATUS_MIN_TIME YT921X_FDB_IO1_STATUS(1) +#define YT921X_FDB_IO1_STATUS_MOVE_AGING_MAX_TIME YT921X_FDB_IO1_STATUS(3) +#define YT921X_FDB_IO1_STATUS_MAX_TIME YT921X_FDB_IO1_STATUS(5) +#define YT921X_FDB_IO1_STATUS_PENDING YT921X_FDB_IO1_STATUS(6) +#define YT921X_FDB_IO1_STATUS_STATIC YT921X_FDB_IO1_STATUS(7) +#define YT921X_FDB_IO1_FID_M GENMASK(27, 16) /* filtering ID (VID) */ +#define YT921X_FDB_IO1_FID(x) FIELD_PREP(YT921X_FDB_IO1_FID_M, (x)) +#define YT921X_FDB_IO1_ADDR_LO2_M GENMASK(15, 0) +#define YT921X_FDB_OUT2 0x1804b8 +#define YT921X_FDB_IO2_MOVE_AGING_STATUS_M GENMASK(31, 30) +#define YT921X_FDB_IO2_IGR_DROP BIT(29) +#define YT921X_FDB_IO2_EGR_PORTS_M GENMASK(28, 18) +#define YT921X_FDB_IO2_EGR_PORTS(x) FIELD_PREP(YT921X_FDB_IO2_EGR_PORTS_M, (x)) +#define YT921X_FDB_IO2_EGR_DROP BIT(17) +#define YT921X_FDB_IO2_COPY_TO_CPU BIT(16) +#define YT921X_FDB_IO2_IGR_INT_PRI_EN BIT(15) +#define YT921X_FDB_IO2_INT_PRI_M GENMASK(14, 12) +#define YT921X_FDB_IO2_INT_PRI(x) FIELD_PREP(YT921X_FDB_IO2_INT_PRI_M, (x)) +#define YT921X_FDB_IO2_NEW_VID_M GENMASK(11, 0) +#define YT921X_FDB_IO2_NEW_VID(x) FIELD_PREP(YT921X_FDB_IO2_NEW_VID_M, (x)) +#define YT921X_FILTER_UNK_UCAST 0x180508 +#define YT921X_FILTER_UNK_MCAST 0x18050c +#define YT921X_FILTER_MCAST 0x180510 +#define YT921X_FILTER_BCAST 0x180514 +#define YT921X_FILTER_PORTS_M GENMASK(10, 0) +#define YT921X_FILTER_PORTS(x) FIELD_PREP(YT921X_FILTER_PORTS_M, (x)) +#define YT921X_FILTER_PORTn(port) BIT(port) +#define YT921X_VLAN_EGR_FILTER 0x180598 +#define YT921X_VLAN_EGR_FILTER_PORTn(port) BIT(port) +#define YT921X_CPU_COPY 0x180690 +#define YT921X_CPU_COPY_FORCE_INT_PORT BIT(2) +#define YT921X_CPU_COPY_TO_INT_CPU BIT(1) +#define YT921X_CPU_COPY_TO_EXT_CPU BIT(0) +#define YT921X_ACT_UNK_UCAST 0x180734 +#define YT921X_ACT_UNK_MCAST 0x180738 +#define YT921X_ACT_UNK_MCAST_BYPASS_DROP_RMA BIT(23) +#define YT921X_ACT_UNK_MCAST_BYPASS_DROP_IGMP BIT(22) +#define YT921X_ACT_UNK_ACTn_M(port) GENMASK(2 * (port) + 1, 2 * (port)) +#define YT921X_ACT_UNK_ACTn(port, x) ((x) << (2 * (port))) +#define YT921X_ACT_UNK_ACTn_FORWARD(port) YT921X_ACT_UNK_ACTn(port, 0) /* flood */ +#define YT921X_ACT_UNK_ACTn_TRAP(port) YT921X_ACT_UNK_ACTn(port, 1) /* steer to CPU */ +#define YT921X_ACT_UNK_ACTn_DROP(port) YT921X_ACT_UNK_ACTn(port, 2) /* discard */ +/* NEVER use this action; see comments in the tag driver */ +#define YT921X_ACT_UNK_ACTn_COPY(port) YT921X_ACT_UNK_ACTn(port, 3) /* flood and copy */ +#define YT921X_FDB_HW_FLUSH 0x180958 +#define YT921X_FDB_HW_FLUSH_ON_LINKDOWN BIT(0) + +#define YT921X_VLANn_CTRL(vlan) (0x188000 + 8 * (vlan)) +#define YT921X_VLAN_CTRL_UNTAG_PORTS_M GENMASK(50, 40) +#define YT921X_VLAN_CTRL_UNTAG_PORTS(x) FIELD_PREP(YT921X_VLAN_CTRL_UNTAG_PORTS_M, (x)) +#define YT921X_VLAN_CTRL_UNTAG_PORTn(port) BIT((port) + 40) +#define YT921X_VLAN_CTRL_STP_ID_M GENMASK(39, 36) +#define YT921X_VLAN_CTRL_STP_ID(x) FIELD_PREP(YT921X_VLAN_CTRL_STP_ID_M, (x)) +#define YT921X_VLAN_CTRL_SVLAN_EN BIT(35) +#define YT921X_VLAN_CTRL_FID_M GENMASK(34, 23) +#define YT921X_VLAN_CTRL_FID(x) FIELD_PREP(YT921X_VLAN_CTRL_FID_M, (x)) +#define YT921X_VLAN_CTRL_LEARN_DIS BIT(22) +#define YT921X_VLAN_CTRL_INT_PRI_EN BIT(21) +#define YT921X_VLAN_CTRL_INT_PRI_M GENMASK(20, 18) +#define YT921X_VLAN_CTRL_PORTS_M GENMASK(17, 7) +#define YT921X_VLAN_CTRL_PORTS(x) FIELD_PREP(YT921X_VLAN_CTRL_PORTS_M, (x)) +#define YT921X_VLAN_CTRL_PORTn(port) BIT((port) + 7) +#define YT921X_VLAN_CTRL_BYPASS_1X_AC BIT(6) +#define YT921X_VLAN_CTRL_METER_EN BIT(5) +#define YT921X_VLAN_CTRL_METER_ID_M GENMASK(4, 0) + +#define YT921X_TPID_IGRn(x) (0x210000 + 4 * (x)) /* [0, 3] */ +#define YT921X_TPID_IGR_TPID_M GENMASK(15, 0) +#define YT921X_PORTn_IGR_TPID(port) (0x210010 + 4 * (port)) +#define YT921X_PORT_IGR_TPIDn_STAG_M GENMASK(7, 4) +#define YT921X_PORT_IGR_TPIDn_STAG(x) BIT((x) + 4) +#define YT921X_PORT_IGR_TPIDn_CTAG_M GENMASK(3, 0) +#define YT921X_PORT_IGR_TPIDn_CTAG(x) BIT(x) + +#define YT921X_PORTn_VLAN_CTRL(port) (0x230010 + 4 * (port)) +#define YT921X_PORT_VLAN_CTRL_SVLAN_PRI_EN BIT(31) +#define YT921X_PORT_VLAN_CTRL_CVLAN_PRI_EN BIT(30) +#define YT921X_PORT_VLAN_CTRL_SVID_M GENMASK(29, 18) +#define YT921X_PORT_VLAN_CTRL_SVID(x) FIELD_PREP(YT921X_PORT_VLAN_CTRL_SVID_M, (x)) +#define YT921X_PORT_VLAN_CTRL_CVID_M GENMASK(17, 6) +#define YT921X_PORT_VLAN_CTRL_CVID(x) FIELD_PREP(YT921X_PORT_VLAN_CTRL_CVID_M, (x)) +#define YT921X_PORT_VLAN_CTRL_SVLAN_PRI_M GENMASK(5, 3) +#define YT921X_PORT_VLAN_CTRL_CVLAN_PRI_M GENMASK(2, 0) +#define YT921X_PORTn_VLAN_CTRL1(port) (0x230080 + 4 * (port)) +#define YT921X_PORT_VLAN_CTRL1_VLAN_RANGE_EN BIT(8) +#define YT921X_PORT_VLAN_CTRL1_VLAN_RANGE_PROFILE_ID_M GENMASK(7, 4) +#define YT921X_PORT_VLAN_CTRL1_SVLAN_DROP_TAGGED BIT(3) +#define YT921X_PORT_VLAN_CTRL1_SVLAN_DROP_UNTAGGED BIT(2) +#define YT921X_PORT_VLAN_CTRL1_CVLAN_DROP_TAGGED BIT(1) +#define YT921X_PORT_VLAN_CTRL1_CVLAN_DROP_UNTAGGED BIT(0) + +#define YT921X_MIRROR 0x300300 +#define YT921X_MIRROR_IGR_PORTS_M GENMASK(26, 16) +#define YT921X_MIRROR_IGR_PORTS(x) FIELD_PREP(YT921X_MIRROR_IGR_PORTS_M, (x)) +#define YT921X_MIRROR_IGR_PORTn(port) BIT((port) + 16) +#define YT921X_MIRROR_EGR_PORTS_M GENMASK(14, 4) +#define YT921X_MIRROR_EGR_PORTS(x) FIELD_PREP(YT921X_MIRROR_EGR_PORTS_M, (x)) +#define YT921X_MIRROR_EGR_PORTn(port) BIT((port) + 4) +#define YT921X_MIRROR_PORT_M GENMASK(3, 0) +#define YT921X_MIRROR_PORT(x) FIELD_PREP(YT921X_MIRROR_PORT_M, (x)) + +#define YT921X_EDATA_EXTMODE 0xfb +#define YT921X_EDATA_LEN 0x100 + +#define YT921X_FDB_NUM 4096 + +enum yt921x_fdb_entry_status { + YT921X_FDB_ENTRY_STATUS_INVALID = 0, + YT921X_FDB_ENTRY_STATUS_MIN_TIME = 1, + YT921X_FDB_ENTRY_STATUS_MOVE_AGING_MAX_TIME = 3, + YT921X_FDB_ENTRY_STATUS_MAX_TIME = 5, + YT921X_FDB_ENTRY_STATUS_PENDING = 6, + YT921X_FDB_ENTRY_STATUS_STATIC = 7, +}; + +#define YT9215_MAJOR 0x9002 +#define YT9218_MAJOR 0x9001 + +/* required for a hard reset */ +#define YT921X_RST_DELAY_US 10000 + +#define YT921X_FRAME_SIZE_MAX 0x2400 /* 9216 */ + +#define YT921X_TAG_LEN 8 + +/* 8 internal + 2 external + 1 mcu */ +#define YT921X_PORT_NUM 11 + +#define yt921x_port_is_internal(port) ((port) < 8) +#define yt921x_port_is_external(port) (8 <= (port) && (port) < 9) + +struct yt921x_mib { + u64 rx_broadcast; + u64 rx_pause; + u64 rx_multicast; + u64 rx_crc_errors; + + u64 rx_alignment_errors; + u64 rx_undersize_errors; + u64 rx_fragment_errors; + u64 rx_64byte; + + u64 rx_65_127byte; + u64 rx_128_255byte; + u64 rx_256_511byte; + u64 rx_512_1023byte; + + u64 rx_1024_1518byte; + u64 rx_jumbo; + u64 rx_good_bytes; + + u64 rx_bad_bytes; + u64 rx_oversize_errors; + + u64 rx_dropped; + u64 tx_broadcast; + u64 tx_pause; + u64 tx_multicast; + + u64 tx_undersize_errors; + u64 tx_64byte; + u64 tx_65_127byte; + u64 tx_128_255byte; + + u64 tx_256_511byte; + u64 tx_512_1023byte; + u64 tx_1024_1518byte; + u64 tx_jumbo; + + u64 tx_good_bytes; + u64 tx_collisions; + + u64 tx_aborted_errors; + u64 tx_multiple_collisions; + u64 tx_single_collisions; + u64 tx_good; + + u64 tx_deferred; + u64 tx_late_collisions; + u64 rx_oam; + u64 tx_oam; +}; + +struct yt921x_port { + unsigned char index; + + bool hairpin; + bool isolated; + + struct delayed_work mib_read; + struct yt921x_mib mib; + u64 rx_frames; + u64 tx_frames; +}; + +struct yt921x_reg_ops { + int (*read)(void *context, u32 reg, u32 *valp); + int (*write)(void *context, u32 reg, u32 val); +}; + +struct yt921x_priv { + struct dsa_switch ds; + + const struct yt921x_info *info; + /* cache of dsa_cpu_ports(ds) */ + u16 cpu_ports_mask; + + /* protect the access to the switch registers */ + struct mutex reg_lock; + const struct yt921x_reg_ops *reg_ops; + void *reg_ctx; + + /* mdio master bus */ + struct mii_bus *mbus_int; + struct mii_bus *mbus_ext; + + struct yt921x_port ports[YT921X_PORT_NUM]; + + u16 eee_ports_mask; +}; + +#endif From 0c5480ac96a4b7079f6cf6877561d7f172c5c8e5 Mon Sep 17 00:00:00 2001 From: David Yang Date: Fri, 17 Oct 2025 14:08:56 +0800 Subject: [PATCH 184/867] MAINTAINERS: add entry for Motorcomm YT921x ethernet switch driver Add a MAINTAINERS entry for the Motorcomm YT921x ethernet switch driver and its DT binding. Signed-off-by: David Yang Link: https://patch.msgid.link/20251017060859.326450-5-mmyangfl@gmail.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 4faa7719bf865..ea72b3bd22485 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17435,6 +17435,14 @@ S: Maintained F: Documentation/devicetree/bindings/net/motorcomm,yt8xxx.yaml F: drivers/net/phy/motorcomm.c +MOTORCOMM YT921X ETHERNET SWITCH DRIVER +M: David Yang +L: netdev@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/net/dsa/motorcomm,yt921x.yaml +F: drivers/net/dsa/yt921x.* +F: net/dsa/tag_yt921x.c + MOXA SMARTIO/INDUSTIO/INTELLIO SERIAL CARD M: Jiri Slaby S: Maintained From 1471a274b76d1469a06e32752d49b25cb6db2406 Mon Sep 17 00:00:00 2001 From: Shi Hao Date: Sat, 18 Oct 2025 10:55:41 +0530 Subject: [PATCH 185/867] eth: 3c515: replace cleanup_module with __exit update old legacy cleanup_module from the file with __exit module as per kernel code practices and restore the #ifdef MODULE condition to allow successful compilation as a built -in driver. The file had an old cleanup_module still in use which could be updated with __exit module function although its init_module is indeed newer however the cleanup_module was still using the older version of exit. To set proper exit module function replace cleanup_module with __exit corkscrew_exit_module to align it to the kernel code consistency. Signed-off-by: Shi Hao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251018052541.124365-1-i.shihao.999@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/3com/3c515.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c index ecdea58e6a21f..2227c83a48622 100644 --- a/drivers/net/ethernet/3com/3c515.c +++ b/drivers/net/ethernet/3com/3c515.c @@ -1547,9 +1547,8 @@ static const struct ethtool_ops netdev_ethtool_ops = { .set_msglevel = netdev_set_msglevel, }; - #ifdef MODULE -void cleanup_module(void) +static void __exit corkscrew_exit_module(void) { while (!list_empty(&root_corkscrew_dev)) { struct net_device *dev; @@ -1563,4 +1562,5 @@ void cleanup_module(void) free_netdev(dev); } } +module_exit(corkscrew_exit_module); #endif /* MODULE */ From 4a107a0e836177359b861e9849a2a24ad35e9919 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 18 Oct 2025 20:48:07 +0200 Subject: [PATCH 186/867] net: stmmac: mdio: use phy_find_first to simplify stmmac_mdio_register Simplify the code by using phy_find_first(). Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20ca4962-9588-40b8-b021-fb349a92e9e5@gmail.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_mdio.c | 53 ++++++++----------- 1 file changed, 22 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index d62b2870899d5..65db43e9c85e2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -584,7 +584,8 @@ int stmmac_mdio_register(struct net_device *ndev) struct device *dev = ndev->dev.parent; struct fwnode_handle *fixed_node; struct fwnode_handle *fwnode; - int addr, found, max_addr; + struct phy_device *phydev; + int max_addr; if (!mdio_bus_data) return 0; @@ -668,41 +669,31 @@ int stmmac_mdio_register(struct net_device *ndev) if (priv->plat->phy_node || mdio_node) goto bus_register_done; - found = 0; - for (addr = 0; addr < max_addr; addr++) { - struct phy_device *phydev = mdiobus_get_phy(new_bus, addr); - - if (!phydev) - continue; - - /* - * If an IRQ was provided to be assigned after - * the bus probe, do it here. - */ - if (!mdio_bus_data->irqs && - (mdio_bus_data->probed_phy_irq > 0)) { - new_bus->irq[addr] = mdio_bus_data->probed_phy_irq; - phydev->irq = mdio_bus_data->probed_phy_irq; - } - - /* - * If we're going to bind the MAC to this PHY bus, - * and no PHY number was provided to the MAC, - * use the one probed here. - */ - if (priv->plat->phy_addr == -1) - priv->plat->phy_addr = addr; - - phy_attached_info(phydev); - found = 1; - } - - if (!found && !mdio_node) { + phydev = phy_find_first(new_bus); + if (!phydev || phydev->mdio.addr > max_addr) { dev_warn(dev, "No PHY found\n"); err = -ENODEV; goto no_phy_found; } + /* + * If an IRQ was provided to be assigned after + * the bus probe, do it here. + */ + if (!mdio_bus_data->irqs && mdio_bus_data->probed_phy_irq > 0) { + new_bus->irq[phydev->mdio.addr] = mdio_bus_data->probed_phy_irq; + phydev->irq = mdio_bus_data->probed_phy_irq; + } + + /* + * If we're going to bind the MAC to this PHY bus, and no PHY number + * was provided to the MAC, use the one probed here. + */ + if (priv->plat->phy_addr == -1) + priv->plat->phy_addr = phydev->mdio.addr; + + phy_attached_info(phydev); + bus_register_done: priv->mii = new_bus; From 91f76771dba0e5ec9ef74e55eb50f2855ce583a3 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Sun, 19 Oct 2025 22:57:20 +0000 Subject: [PATCH 187/867] bnxt_en: support PPS in/out on all pins Add supported_extts_flags and supported_perout_flags configuration to make the driver complaint with the latest API. Initialize channel information to 0 to avoid confusing users, because HW doesn't actually care about channels. Signed-off-by: Vadim Fedorenko Reviewed-by: Pavan Chebbi Link: https://patch.msgid.link/20251019225720.898550-1-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index db81cf6d5289b..1425a75de9a11 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -952,7 +952,6 @@ static int bnxt_ptp_pps_init(struct bnxt *bp) snprintf(ptp_info->pin_config[i].name, sizeof(ptp_info->pin_config[i].name), "bnxt_pps%d", i); ptp_info->pin_config[i].index = i; - ptp_info->pin_config[i].chan = i; if (*pin_usg == BNXT_PPS_PIN_PPS_IN) ptp_info->pin_config[i].func = PTP_PF_EXTTS; else if (*pin_usg == BNXT_PPS_PIN_PPS_OUT) @@ -969,6 +968,8 @@ static int bnxt_ptp_pps_init(struct bnxt *bp) ptp_info->n_per_out = 1; ptp_info->pps = 1; ptp_info->verify = bnxt_ptp_verify; + ptp_info->supported_extts_flags = PTP_RISING_EDGE | PTP_STRICT_FLAGS; + ptp_info->supported_perout_flags = PTP_PEROUT_DUTY_CYCLE; return 0; } From 962ac5ca99a5c3e7469215bf47572440402dfd59 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Mon, 20 Oct 2025 09:44:41 +0800 Subject: [PATCH 188/867] net: macb: Remove duplicate linux/inetdevice.h header ./drivers/net/ethernet/cadence/macb_main.c: linux/inetdevice.h is included more than once. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=26474 Signed-off-by: Jiapeng Chong Acked-by: Nicolas Ferre Link: https://patch.msgid.link/20251020014441.2070356-1-jiapeng.chong@linux.alibaba.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 214f543af3b8f..39673f5c3337f 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include From 54be197109762d2a3c2b23d44e960ce7a43a3798 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Mon, 20 Oct 2025 17:37:59 +0200 Subject: [PATCH 189/867] wifi: ath10k: Support for FTM TLV test commands Existing tools like myftm use 'legacy' test command API. Similarly to ath11k and ath12k, we want to support raw TLV payload submitted from the test tool. This requires segmenting the TLV payload and encapsulating it within a WMI command. The opposite operation needs to be done upon corresponding event receiving. Tested-on: WCN3990 hw1.0 WLAN.HL.3.3.7.c2-00931-QCAHLSWMTPLZ-1 Signed-off-by: Loic Poulain Link: https://patch.msgid.link/20251020153759.407516-1-loic.poulain@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath10k/core.h | 4 + drivers/net/wireless/ath/ath10k/testmode.c | 253 ++++++++++++++++--- drivers/net/wireless/ath/ath10k/testmode_i.h | 15 ++ drivers/net/wireless/ath/ath10k/wmi.h | 19 +- 4 files changed, 260 insertions(+), 31 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index 859176fcb5a29..73a9db302245d 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -1259,9 +1259,13 @@ struct ath10k { struct { /* protected by conf_mutex */ struct ath10k_fw_components utf_mode_fw; + u8 ftm_msgref; /* protected by data_lock */ bool utf_monitor; + u32 data_pos; + u32 expected_seq; + u8 *eventdata; } testmode; struct { diff --git a/drivers/net/wireless/ath/ath10k/testmode.c b/drivers/net/wireless/ath/ath10k/testmode.c index 3fcefc55b74f2..d3bd385694d6e 100644 --- a/drivers/net/wireless/ath/ath10k/testmode.c +++ b/drivers/net/wireless/ath/ath10k/testmode.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: ISC /* * Copyright (c) 2014-2017 Qualcomm Atheros, Inc. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include "testmode.h" @@ -10,12 +11,17 @@ #include "debug.h" #include "wmi.h" +#include "wmi-tlv.h" #include "hif.h" #include "hw.h" #include "core.h" #include "testmode_i.h" +#define ATH10K_FTM_SEG_NONE ((u32)-1) +#define ATH10K_FTM_SEGHDR_CURRENT_SEQ GENMASK(3, 0) +#define ATH10K_FTM_SEGHDR_TOTAL_SEGMENTS GENMASK(7, 4) + static const struct nla_policy ath10k_tm_policy[ATH10K_TM_ATTR_MAX + 1] = { [ATH10K_TM_ATTR_CMD] = { .type = NLA_U32 }, [ATH10K_TM_ATTR_DATA] = { .type = NLA_BINARY, @@ -25,41 +31,19 @@ static const struct nla_policy ath10k_tm_policy[ATH10K_TM_ATTR_MAX + 1] = { [ATH10K_TM_ATTR_VERSION_MINOR] = { .type = NLA_U32 }, }; -/* Returns true if callee consumes the skb and the skb should be discarded. - * Returns false if skb is not used. Does not sleep. - */ -bool ath10k_tm_event_wmi(struct ath10k *ar, u32 cmd_id, struct sk_buff *skb) +static void ath10k_tm_event_unsegmented(struct ath10k *ar, u32 cmd_id, + struct sk_buff *skb) { struct sk_buff *nl_skb; - bool consumed; int ret; - ath10k_dbg(ar, ATH10K_DBG_TESTMODE, - "testmode event wmi cmd_id %d skb %p skb->len %d\n", - cmd_id, skb, skb->len); - - ath10k_dbg_dump(ar, ATH10K_DBG_TESTMODE, NULL, "", skb->data, skb->len); - - spin_lock_bh(&ar->data_lock); - - if (!ar->testmode.utf_monitor) { - consumed = false; - goto out; - } - - /* Only testmode.c should be handling events from utf firmware, - * otherwise all sort of problems will arise as mac80211 operations - * are not initialised. - */ - consumed = true; - nl_skb = cfg80211_testmode_alloc_event_skb(ar->hw->wiphy, 2 * sizeof(u32) + skb->len, GFP_ATOMIC); if (!nl_skb) { ath10k_warn(ar, "failed to allocate skb for testmode wmi event\n"); - goto out; + return; } ret = nla_put_u32(nl_skb, ATH10K_TM_ATTR_CMD, ATH10K_TM_CMD_WMI); @@ -68,7 +52,7 @@ bool ath10k_tm_event_wmi(struct ath10k *ar, u32 cmd_id, struct sk_buff *skb) "failed to put testmode wmi event cmd attribute: %d\n", ret); kfree_skb(nl_skb); - goto out; + return; } ret = nla_put_u32(nl_skb, ATH10K_TM_ATTR_WMI_CMDID, cmd_id); @@ -77,7 +61,7 @@ bool ath10k_tm_event_wmi(struct ath10k *ar, u32 cmd_id, struct sk_buff *skb) "failed to put testmode wmi event cmd_id: %d\n", ret); kfree_skb(nl_skb); - goto out; + return; } ret = nla_put(nl_skb, ATH10K_TM_ATTR_DATA, skb->len, skb->data); @@ -86,10 +70,122 @@ bool ath10k_tm_event_wmi(struct ath10k *ar, u32 cmd_id, struct sk_buff *skb) "failed to copy skb to testmode wmi event: %d\n", ret); kfree_skb(nl_skb); - goto out; + return; + } + + cfg80211_testmode_event(nl_skb, GFP_ATOMIC); +} + +static void ath10k_tm_event_segmented(struct ath10k *ar, u32 cmd_id, struct sk_buff *skb) +{ + struct wmi_ftm_cmd *ftm = (struct wmi_ftm_cmd *)skb->data; + u8 total_segments, current_seq; + struct sk_buff *nl_skb; + u8 const *buf_pos; + u16 datalen; + u32 data_pos; + int ret; + + if (skb->len < sizeof(*ftm)) { + ath10k_warn(ar, "Invalid ftm event length: %d\n", skb->len); + return; + } + + current_seq = FIELD_GET(ATH10K_FTM_SEGHDR_CURRENT_SEQ, + __le32_to_cpu(ftm->seg_hdr.segmentinfo)); + total_segments = FIELD_GET(ATH10K_FTM_SEGHDR_TOTAL_SEGMENTS, + __le32_to_cpu(ftm->seg_hdr.segmentinfo)); + datalen = skb->len - sizeof(*ftm); + buf_pos = ftm->data; + + if (current_seq == 0) { + ar->testmode.expected_seq = 0; + ar->testmode.data_pos = 0; + } + + data_pos = ar->testmode.data_pos; + + if ((data_pos + datalen) > ATH_FTM_EVENT_MAX_BUF_LENGTH) { + ath10k_warn(ar, "Invalid ftm event length at %u: %u\n", + data_pos, datalen); + ret = -EINVAL; + return; + } + + memcpy(&ar->testmode.eventdata[data_pos], buf_pos, datalen); + data_pos += datalen; + + if (++ar->testmode.expected_seq != total_segments) { + ar->testmode.data_pos = data_pos; + ath10k_dbg(ar, ATH10K_DBG_TESTMODE, "partial data received %u/%u\n", + current_seq + 1, total_segments); + return; + } + + ath10k_dbg(ar, ATH10K_DBG_TESTMODE, "total data length %u\n", data_pos); + + nl_skb = cfg80211_testmode_alloc_event_skb(ar->hw->wiphy, + 2 * sizeof(u32) + data_pos, + GFP_ATOMIC); + if (!nl_skb) { + ath10k_warn(ar, "failed to allocate skb for testmode wmi event\n"); + return; + } + + ret = nla_put_u32(nl_skb, ATH10K_TM_ATTR_CMD, ATH10K_TM_CMD_TLV); + if (ret) { + ath10k_warn(ar, "failed to put testmode wmi event attribute: %d\n", ret); + kfree_skb(nl_skb); + return; + } + + ret = nla_put_u32(nl_skb, ATH10K_TM_ATTR_WMI_CMDID, cmd_id); + if (ret) { + ath10k_warn(ar, "failed to put testmode wmi event cmd_id: %d\n", ret); + kfree_skb(nl_skb); + return; + } + + ret = nla_put(nl_skb, ATH10K_TM_ATTR_DATA, data_pos, &ar->testmode.eventdata[0]); + if (ret) { + ath10k_warn(ar, "failed to copy skb to testmode wmi event: %d\n", ret); + kfree_skb(nl_skb); + return; } cfg80211_testmode_event(nl_skb, GFP_ATOMIC); +} + +/* Returns true if callee consumes the skb and the skb should be discarded. + * Returns false if skb is not used. Does not sleep. + */ +bool ath10k_tm_event_wmi(struct ath10k *ar, u32 cmd_id, struct sk_buff *skb) +{ + bool consumed; + + ath10k_dbg(ar, ATH10K_DBG_TESTMODE, + "testmode event wmi cmd_id %d skb %p skb->len %d\n", + cmd_id, skb, skb->len); + + ath10k_dbg_dump(ar, ATH10K_DBG_TESTMODE, NULL, "", skb->data, skb->len); + + spin_lock_bh(&ar->data_lock); + + if (!ar->testmode.utf_monitor) { + consumed = false; + goto out; + } + + /* Only testmode.c should be handling events from utf firmware, + * otherwise all sort of problems will arise as mac80211 operations + * are not initialised. + */ + consumed = true; + + if (ar->testmode.expected_seq != ATH10K_FTM_SEG_NONE) + ath10k_tm_event_segmented(ar, cmd_id, skb); + else + ath10k_tm_event_unsegmented(ar, cmd_id, skb); out: spin_unlock_bh(&ar->data_lock); @@ -281,12 +377,18 @@ static int ath10k_tm_cmd_utf_start(struct ath10k *ar, struct nlattr *tb[]) goto err_release_utf_mode_fw; } + ar->testmode.eventdata = kzalloc(ATH_FTM_EVENT_MAX_BUF_LENGTH, GFP_KERNEL); + if (!ar->testmode.eventdata) { + ret = -ENOMEM; + goto err_power_down; + } + ret = ath10k_core_start(ar, ATH10K_FIRMWARE_MODE_UTF, &ar->testmode.utf_mode_fw); if (ret) { ath10k_err(ar, "failed to start core (testmode): %d\n", ret); ar->state = ATH10K_STATE_OFF; - goto err_power_down; + goto err_release_eventdata; } ar->state = ATH10K_STATE_UTF; @@ -302,6 +404,10 @@ static int ath10k_tm_cmd_utf_start(struct ath10k *ar, struct nlattr *tb[]) return 0; +err_release_eventdata: + kfree(ar->testmode.eventdata); + ar->testmode.eventdata = NULL; + err_power_down: ath10k_hif_power_down(ar); @@ -341,6 +447,9 @@ static void __ath10k_tm_cmd_utf_stop(struct ath10k *ar) release_firmware(ar->testmode.utf_mode_fw.fw_file.firmware); ar->testmode.utf_mode_fw.fw_file.firmware = NULL; + kfree(ar->testmode.eventdata); + ar->testmode.eventdata = NULL; + ar->state = ATH10K_STATE_OFF; } @@ -424,6 +533,85 @@ static int ath10k_tm_cmd_wmi(struct ath10k *ar, struct nlattr *tb[]) return ret; } +static int ath10k_tm_cmd_tlv(struct ath10k *ar, struct nlattr *tb[]) +{ + u16 total_bytes, num_segments; + u32 cmd_id, buf_len; + u8 segnumber = 0; + u8 *bufpos; + void *buf; + int ret; + + mutex_lock(&ar->conf_mutex); + + if (ar->state != ATH10K_STATE_UTF) { + ret = -ENETDOWN; + goto out; + } + + buf = nla_data(tb[ATH10K_TM_ATTR_DATA]); + buf_len = nla_len(tb[ATH10K_TM_ATTR_DATA]); + cmd_id = WMI_PDEV_UTF_CMDID; + + ath10k_dbg(ar, ATH10K_DBG_TESTMODE, + "cmd wmi ftm cmd_id %d buffer length %d\n", + cmd_id, buf_len); + ath10k_dbg_dump(ar, ATH10K_DBG_TESTMODE, NULL, "", buf, buf_len); + + bufpos = buf; + total_bytes = buf_len; + num_segments = total_bytes / MAX_WMI_UTF_LEN; + ar->testmode.expected_seq = 0; + + if (buf_len - (num_segments * MAX_WMI_UTF_LEN)) + num_segments++; + + while (buf_len) { + u16 chunk_len = min_t(u16, buf_len, MAX_WMI_UTF_LEN); + struct wmi_ftm_cmd *ftm_cmd; + struct sk_buff *skb; + u32 hdr_info; + u8 seginfo; + + skb = ath10k_wmi_alloc_skb(ar, (chunk_len + + sizeof(struct wmi_ftm_cmd))); + if (!skb) { + ret = -ENOMEM; + goto out; + } + + ftm_cmd = (struct wmi_ftm_cmd *)skb->data; + hdr_info = FIELD_PREP(WMI_TLV_TAG, WMI_TLV_TAG_ARRAY_BYTE) | + FIELD_PREP(WMI_TLV_LEN, (chunk_len + + sizeof(struct wmi_ftm_seg_hdr))); + ftm_cmd->tlv_header = __cpu_to_le32(hdr_info); + ftm_cmd->seg_hdr.len = __cpu_to_le32(total_bytes); + ftm_cmd->seg_hdr.msgref = __cpu_to_le32(ar->testmode.ftm_msgref); + seginfo = FIELD_PREP(ATH10K_FTM_SEGHDR_TOTAL_SEGMENTS, num_segments) | + FIELD_PREP(ATH10K_FTM_SEGHDR_CURRENT_SEQ, segnumber); + ftm_cmd->seg_hdr.segmentinfo = __cpu_to_le32(seginfo); + segnumber++; + + memcpy(&ftm_cmd->data, bufpos, chunk_len); + + ret = ath10k_wmi_cmd_send(ar, skb, cmd_id); + if (ret) { + ath10k_warn(ar, "failed to send wmi ftm command: %d\n", ret); + goto out; + } + + buf_len -= chunk_len; + bufpos += chunk_len; + } + + ar->testmode.ftm_msgref++; + ret = 0; + +out: + mutex_unlock(&ar->conf_mutex); + return ret; +} + int ath10k_tm_cmd(struct ieee80211_hw *hw, struct ieee80211_vif *vif, void *data, int len) { @@ -439,9 +627,14 @@ int ath10k_tm_cmd(struct ieee80211_hw *hw, struct ieee80211_vif *vif, if (!tb[ATH10K_TM_ATTR_CMD]) return -EINVAL; + ar->testmode.expected_seq = ATH10K_FTM_SEG_NONE; + switch (nla_get_u32(tb[ATH10K_TM_ATTR_CMD])) { case ATH10K_TM_CMD_GET_VERSION: - return ath10k_tm_cmd_get_version(ar, tb); + if (!tb[ATH10K_TM_ATTR_DATA]) + return ath10k_tm_cmd_get_version(ar, tb); + else /* ATH10K_TM_CMD_TLV */ + return ath10k_tm_cmd_tlv(ar, tb); case ATH10K_TM_CMD_UTF_START: return ath10k_tm_cmd_utf_start(ar, tb); case ATH10K_TM_CMD_UTF_STOP: diff --git a/drivers/net/wireless/ath/ath10k/testmode_i.h b/drivers/net/wireless/ath/ath10k/testmode_i.h index ee1cb27c1d600..1603f52766825 100644 --- a/drivers/net/wireless/ath/ath10k/testmode_i.h +++ b/drivers/net/wireless/ath/ath10k/testmode_i.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: ISC */ /* * Copyright (c) 2014,2017 Qualcomm Atheros, Inc. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ /* "API" level of the ath10k testmode interface. Bump it after every @@ -14,6 +15,7 @@ #define ATH10K_TESTMODE_VERSION_MINOR 0 #define ATH10K_TM_DATA_MAX_LEN 5000 +#define ATH_FTM_EVENT_MAX_BUF_LENGTH 2048 enum ath10k_tm_attr { __ATH10K_TM_ATTR_INVALID = 0, @@ -57,4 +59,17 @@ enum ath10k_tm_cmd { * ATH10K_TM_ATTR_DATA. */ ATH10K_TM_CMD_WMI = 3, + + /* The command used to transmit a test command to the firmware + * and the event to receive test events from the firmware. The data + * received only contain the TLV payload, need to add the tlv header + * and send the cmd to firmware with command id WMI_PDEV_UTF_CMDID. + * The data payload size could be large and the driver needs to + * send segmented data to firmware. + * + * This legacy testmode command shares the same value as the get-version + * command. To distinguish between them, we check whether the data attribute + * is present. + */ + ATH10K_TM_CMD_TLV = ATH10K_TM_CMD_GET_VERSION, }; diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index 0faefc0a9a405..7f50a1de6b978 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -3,7 +3,7 @@ * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #ifndef _WMI_H_ @@ -7418,6 +7418,23 @@ struct wmi_pdev_bb_timing_cfg_cmd { __le32 bb_xpa_timing; } __packed; +struct wmi_ftm_seg_hdr { + __le32 len; + __le32 msgref; + __le32 segmentinfo; + __le32 pdev_id; +} __packed; + +struct wmi_ftm_cmd { + __le32 tlv_header; + struct wmi_ftm_seg_hdr seg_hdr; + u8 data[]; +} __packed; + +#define WMI_TLV_LEN GENMASK(15, 0) +#define WMI_TLV_TAG GENMASK(31, 16) +#define MAX_WMI_UTF_LEN 252 + struct ath10k; struct ath10k_vif; struct ath10k_fw_stats_pdev; From 26ab9830beabda863766be4a79dc590c7645f4d9 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 21 Oct 2025 08:26:49 +0100 Subject: [PATCH 190/867] net: stmmac: replace has_xxxx with core_type Replace the has_gmac, has_gmac4 and has_xgmac ints, of which only one can be set when matching a core to its driver backend, with an enumerated type carrying the DWMAC core type. Tested-by: Maxime Chevallier Signed-off-by: Russell King (Oracle) Acked-by: Chen-Yu Tsai Reviewed-by: Maxime Chevallier Tested-by: Mohd Ayaan Anwar Reviewed-by: Bartosz Golaszewski Link: https://patch.msgid.link/E1vB6ld-0000000BIPy-2Qi4@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/common.h | 5 ++ .../stmicro/stmmac/dwmac-dwc-qos-eth.c | 2 +- .../net/ethernet/stmicro/stmmac/dwmac-intel.c | 5 +- .../ethernet/stmicro/stmmac/dwmac-ipq806x.c | 2 +- .../ethernet/stmicro/stmmac/dwmac-loongson.c | 2 +- .../ethernet/stmicro/stmmac/dwmac-lpc18xx.c | 2 +- .../stmicro/stmmac/dwmac-qcom-ethqos.c | 2 +- .../net/ethernet/stmicro/stmmac/dwmac-rk.c | 4 +- .../net/ethernet/stmicro/stmmac/dwmac-s32.c | 2 +- .../ethernet/stmicro/stmmac/dwmac-socfpga.c | 2 +- .../net/ethernet/stmicro/stmmac/dwmac-sunxi.c | 2 +- .../net/ethernet/stmicro/stmmac/dwmac-tegra.c | 2 +- drivers/net/ethernet/stmicro/stmmac/hwif.c | 73 +++++++------------ .../net/ethernet/stmicro/stmmac/stmmac_est.c | 4 +- .../ethernet/stmicro/stmmac/stmmac_ethtool.c | 13 ++-- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 34 +++++---- .../net/ethernet/stmicro/stmmac/stmmac_mdio.c | 14 ++-- .../net/ethernet/stmicro/stmmac/stmmac_pci.c | 4 +- .../ethernet/stmicro/stmmac/stmmac_platform.c | 9 +-- .../net/ethernet/stmicro/stmmac/stmmac_ptp.c | 4 +- include/linux/stmmac.h | 11 ++- 21 files changed, 94 insertions(+), 104 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index fee7021246b1a..31254ba525d56 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -43,6 +43,11 @@ #define DWXGMAC_ID 0x76 #define DWXLGMAC_ID 0x27 +static inline bool dwmac_is_xmac(enum dwmac_core_type core_type) +{ + return core_type == DWMAC_CORE_GMAC4 || core_type == DWMAC_CORE_XGMAC; +} + #define STMMAC_CHAN0 0 /* Always supported and default for all chips */ /* TX and RX Descriptor Length, these need to be power of two. diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index f1c2e35badf72..c7cd6497d42df 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -109,7 +109,7 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev, } /* dwc-qos needs GMAC4, AAL, TSO and PMT */ - plat_dat->has_gmac4 = 1; + plat_dat->core_type = DWMAC_CORE_GMAC4; plat_dat->dma_cfg->aal = 1; plat_dat->flags |= STMMAC_FLAG_TSO_EN; plat_dat->pmt = 1; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index e74d00984b889..b2194e414ec1f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -565,7 +565,7 @@ static void common_default_data(struct plat_stmmacenet_data *plat) { /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */ plat->clk_csr = STMMAC_CSR_20_35M; - plat->has_gmac = 1; + plat->core_type = DWMAC_CORE_GMAC; plat->force_sf_dma_mode = 1; plat->mdio_bus_data->needs_reset = true; @@ -612,8 +612,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, plat->pdev = pdev; plat->phy_addr = -1; plat->clk_csr = STMMAC_CSR_250_300M; - plat->has_gmac = 0; - plat->has_gmac4 = 1; + plat->core_type = DWMAC_CORE_GMAC4; plat->force_sf_dma_mode = 0; plat->flags |= (STMMAC_FLAG_TSO_EN | STMMAC_FLAG_SPH_DISABLE); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index ca4035cbb55b6..c05f85534f0ca 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -473,7 +473,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) return err; } - plat_dat->has_gmac = true; + plat_dat->core_type = DWMAC_CORE_GMAC; plat_dat->bsp_priv = gmac; plat_dat->set_clk_tx_rate = ipq806x_gmac_set_clk_tx_rate; plat_dat->multicast_filter_bins = 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index 592aa9d636e50..2a3ac0136cdbc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -92,7 +92,7 @@ static void loongson_default_data(struct pci_dev *pdev, /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */ plat->clk_csr = STMMAC_CSR_20_35M; - plat->has_gmac = 1; + plat->core_type = DWMAC_CORE_GMAC; plat->force_sf_dma_mode = 1; /* Set default value for multicast hash bins */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c index 2562a6d036a2a..6fffc9dfbae55 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c @@ -41,7 +41,7 @@ static int lpc18xx_dwmac_probe(struct platform_device *pdev) if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); - plat_dat->has_gmac = true; + plat_dat->core_type = DWMAC_CORE_GMAC; reg = syscon_regmap_lookup_by_compatible("nxp,lpc1850-creg"); if (IS_ERR(reg)) { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 32244217d9526..d1e48b524d7a9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -846,7 +846,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev) plat_dat->fix_mac_speed = ethqos_fix_mac_speed; plat_dat->dump_debug_regs = rgmii_dump; plat_dat->ptp_clk_freq_config = ethqos_ptp_clk_freq_config; - plat_dat->has_gmac4 = 1; + plat_dat->core_type = DWMAC_CORE_GMAC4; if (ethqos->has_emac_ge_3) plat_dat->dwmac4_addrs = &data->dwmac4_addrs; plat_dat->pmt = 1; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 51ea0caf16c11..9b92f4d335cca 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1750,8 +1750,8 @@ static int rk_gmac_probe(struct platform_device *pdev) /* If the stmmac is not already selected as gmac4, * then make sure we fallback to gmac. */ - if (!plat_dat->has_gmac4) { - plat_dat->has_gmac = true; + if (plat_dat->core_type != DWMAC_CORE_GMAC4) { + plat_dat->core_type = DWMAC_CORE_GMAC; plat_dat->rx_fifo_size = 4096; plat_dat->tx_fifo_size = 2048; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c index 221539d760bc8..ee095ac132037 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c @@ -146,7 +146,7 @@ static int s32_dwmac_probe(struct platform_device *pdev) gmac->ioaddr = res.addr; /* S32CC core feature set */ - plat->has_gmac4 = true; + plat->core_type = DWMAC_CORE_GMAC4; plat->pmt = 1; plat->flags |= STMMAC_FLAG_SPH_DISABLE; plat->rx_fifo_size = 20480; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 354f01184e6cc..2ff5db6d41ca0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -497,7 +497,7 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) plat_dat->pcs_init = socfpga_dwmac_pcs_init; plat_dat->pcs_exit = socfpga_dwmac_pcs_exit; plat_dat->select_pcs = socfpga_dwmac_select_pcs; - plat_dat->has_gmac = true; + plat_dat->core_type = DWMAC_CORE_GMAC; plat_dat->riwt_off = 1; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c index 1eadcf5d1ad63..7f560d78209d1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c @@ -136,7 +136,7 @@ static int sun7i_gmac_probe(struct platform_device *pdev) /* platform data specifying hardware features and callbacks. * hardware features were copied from Allwinner drivers. */ plat_dat->tx_coe = 1; - plat_dat->has_gmac = true; + plat_dat->core_type = DWMAC_CORE_GMAC; plat_dat->bsp_priv = gmac; plat_dat->init = sun7i_gmac_init; plat_dat->exit = sun7i_gmac_exit; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c index dc903b846b1bf..d765acbe37548 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c @@ -308,7 +308,7 @@ static int tegra_mgbe_probe(struct platform_device *pdev) goto disable_clks; } - plat->has_xgmac = 1; + plat->core_type = DWMAC_CORE_XGMAC; plat->flags |= STMMAC_FLAG_TSO_EN; plat->pmt = 1; plat->bsp_priv = mgbe; diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 3f7c765dcb797..00083ce525492 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -106,9 +106,7 @@ int stmmac_reset(struct stmmac_priv *priv, void __iomem *ioaddr) } static const struct stmmac_hwif_entry { - bool gmac; - bool gmac4; - bool xgmac; + enum dwmac_core_type core_type; u32 min_id; u32 dev_id; const struct stmmac_regs_off regs; @@ -127,9 +125,7 @@ static const struct stmmac_hwif_entry { } stmmac_hw[] = { /* NOTE: New HW versions shall go to the end of this table */ { - .gmac = false, - .gmac4 = false, - .xgmac = false, + .core_type = DWMAC_CORE_MAC100, .min_id = 0, .regs = { .ptp_off = PTP_GMAC3_X_OFFSET, @@ -146,9 +142,7 @@ static const struct stmmac_hwif_entry { .setup = dwmac100_setup, .quirks = stmmac_dwmac1_quirks, }, { - .gmac = true, - .gmac4 = false, - .xgmac = false, + .core_type = DWMAC_CORE_GMAC, .min_id = 0, .regs = { .ptp_off = PTP_GMAC3_X_OFFSET, @@ -165,9 +159,7 @@ static const struct stmmac_hwif_entry { .setup = dwmac1000_setup, .quirks = stmmac_dwmac1_quirks, }, { - .gmac = false, - .gmac4 = true, - .xgmac = false, + .core_type = DWMAC_CORE_GMAC4, .min_id = 0, .regs = { .ptp_off = PTP_GMAC4_OFFSET, @@ -187,9 +179,7 @@ static const struct stmmac_hwif_entry { .setup = dwmac4_setup, .quirks = stmmac_dwmac4_quirks, }, { - .gmac = false, - .gmac4 = true, - .xgmac = false, + .core_type = DWMAC_CORE_GMAC4, .min_id = DWMAC_CORE_4_00, .regs = { .ptp_off = PTP_GMAC4_OFFSET, @@ -210,9 +200,7 @@ static const struct stmmac_hwif_entry { .setup = dwmac4_setup, .quirks = NULL, }, { - .gmac = false, - .gmac4 = true, - .xgmac = false, + .core_type = DWMAC_CORE_GMAC4, .min_id = DWMAC_CORE_4_10, .regs = { .ptp_off = PTP_GMAC4_OFFSET, @@ -233,9 +221,7 @@ static const struct stmmac_hwif_entry { .setup = dwmac4_setup, .quirks = NULL, }, { - .gmac = false, - .gmac4 = true, - .xgmac = false, + .core_type = DWMAC_CORE_GMAC4, .min_id = DWMAC_CORE_5_10, .regs = { .ptp_off = PTP_GMAC4_OFFSET, @@ -256,9 +242,7 @@ static const struct stmmac_hwif_entry { .setup = dwmac4_setup, .quirks = NULL, }, { - .gmac = false, - .gmac4 = false, - .xgmac = true, + .core_type = DWMAC_CORE_XGMAC, .min_id = DWXGMAC_CORE_2_10, .dev_id = DWXGMAC_ID, .regs = { @@ -280,9 +264,7 @@ static const struct stmmac_hwif_entry { .setup = dwxgmac2_setup, .quirks = NULL, }, { - .gmac = false, - .gmac4 = false, - .xgmac = true, + .core_type = DWMAC_CORE_XGMAC, .min_id = DWXLGMAC_CORE_2_00, .dev_id = DWXLGMAC_ID, .regs = { @@ -308,20 +290,18 @@ static const struct stmmac_hwif_entry { int stmmac_hwif_init(struct stmmac_priv *priv) { - bool needs_xgmac = priv->plat->has_xgmac; - bool needs_gmac4 = priv->plat->has_gmac4; - bool needs_gmac = priv->plat->has_gmac; + enum dwmac_core_type core_type = priv->plat->core_type; const struct stmmac_hwif_entry *entry; struct mac_device_info *mac; bool needs_setup = true; u32 id, dev_id = 0; int i, ret; - if (needs_gmac) { + if (core_type == DWMAC_CORE_GMAC) { id = stmmac_get_id(priv, GMAC_VERSION); - } else if (needs_gmac4 || needs_xgmac) { + } else if (dwmac_is_xmac(core_type)) { id = stmmac_get_id(priv, GMAC4_VERSION); - if (needs_xgmac) + if (core_type == DWMAC_CORE_XGMAC) dev_id = stmmac_get_dev_id(priv, GMAC4_VERSION); } else { id = 0; @@ -331,14 +311,16 @@ int stmmac_hwif_init(struct stmmac_priv *priv) priv->synopsys_id = id; /* Lets assume some safe values first */ - priv->ptpaddr = priv->ioaddr + - (needs_gmac4 ? PTP_GMAC4_OFFSET : PTP_GMAC3_X_OFFSET); - priv->mmcaddr = priv->ioaddr + - (needs_gmac4 ? MMC_GMAC4_OFFSET : MMC_GMAC3_X_OFFSET); - if (needs_gmac4) + if (core_type == DWMAC_CORE_GMAC4) { + priv->ptpaddr = priv->ioaddr + PTP_GMAC4_OFFSET; + priv->mmcaddr = priv->ioaddr + MMC_GMAC4_OFFSET; priv->estaddr = priv->ioaddr + EST_GMAC4_OFFSET; - else if (needs_xgmac) - priv->estaddr = priv->ioaddr + EST_XGMAC_OFFSET; + } else { + priv->ptpaddr = priv->ioaddr + PTP_GMAC3_X_OFFSET; + priv->mmcaddr = priv->ioaddr + MMC_GMAC3_X_OFFSET; + if (core_type == DWMAC_CORE_XGMAC) + priv->estaddr = priv->ioaddr + EST_XGMAC_OFFSET; + } /* Check for HW specific setup first */ if (priv->plat->setup) { @@ -355,16 +337,12 @@ int stmmac_hwif_init(struct stmmac_priv *priv) for (i = ARRAY_SIZE(stmmac_hw) - 1; i >= 0; i--) { entry = &stmmac_hw[i]; - if (needs_gmac ^ entry->gmac) - continue; - if (needs_gmac4 ^ entry->gmac4) - continue; - if (needs_xgmac ^ entry->xgmac) + if (core_type != entry->core_type) continue; /* Use synopsys_id var because some setups can override this */ if (priv->synopsys_id < entry->min_id) continue; - if (needs_xgmac && (dev_id ^ entry->dev_id)) + if (core_type == DWMAC_CORE_XGMAC && (dev_id ^ entry->dev_id)) continue; /* Only use generic HW helpers if needed */ @@ -400,6 +378,7 @@ int stmmac_hwif_init(struct stmmac_priv *priv) } dev_err(priv->device, "Failed to find HW IF (id=0x%x, gmac=%d/%d)\n", - id, needs_gmac, needs_gmac4); + id, core_type == DWMAC_CORE_GMAC, + core_type == DWMAC_CORE_GMAC4); return -EINVAL; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c index 4b513d27a9889..afc516059b897 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c @@ -53,7 +53,7 @@ static int est_configure(struct stmmac_priv *priv, struct stmmac_est *cfg, } ctrl = readl(est_addr + EST_CONTROL); - if (priv->plat->has_xgmac) { + if (priv->plat->core_type == DWMAC_CORE_XGMAC) { ctrl &= ~EST_XGMAC_PTOV; ctrl |= ((NSEC_PER_SEC / ptp_rate) * EST_XGMAC_PTOV_MUL) << EST_XGMAC_PTOV_SHIFT; @@ -148,7 +148,7 @@ static void est_irq_status(struct stmmac_priv *priv, struct net_device *dev, } if (status & EST_BTRE) { - if (priv->plat->has_xgmac) { + if (priv->plat->core_type == DWMAC_CORE_XGMAC) { btrl = FIELD_GET(EST_XGMAC_BTRL, status); btrl_max = FIELD_MAX(EST_XGMAC_BTRL); } else { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index c60cd948311ea..df016c4eb7104 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -303,9 +303,10 @@ static void stmmac_ethtool_getdrvinfo(struct net_device *dev, { struct stmmac_priv *priv = netdev_priv(dev); - if (priv->plat->has_gmac || priv->plat->has_gmac4) + if (priv->plat->core_type == DWMAC_CORE_GMAC || + priv->plat->core_type == DWMAC_CORE_GMAC4) strscpy(info->driver, GMAC_ETHTOOL_NAME, sizeof(info->driver)); - else if (priv->plat->has_xgmac) + else if (priv->plat->core_type == DWMAC_CORE_XGMAC) strscpy(info->driver, XGMAC_ETHTOOL_NAME, sizeof(info->driver)); else strscpy(info->driver, MAC100_ETHTOOL_NAME, @@ -351,9 +352,9 @@ static int stmmac_ethtool_get_regs_len(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); - if (priv->plat->has_xgmac) + if (priv->plat->core_type == DWMAC_CORE_XGMAC) return XGMAC_REGSIZE * 4; - else if (priv->plat->has_gmac4) + else if (priv->plat->core_type == DWMAC_CORE_GMAC4) return GMAC4_REG_SPACE_SIZE; return REG_SPACE_SIZE; } @@ -368,12 +369,12 @@ static void stmmac_ethtool_gregs(struct net_device *dev, stmmac_dump_dma_regs(priv, priv->ioaddr, reg_space); /* Copy DMA registers to where ethtool expects them */ - if (priv->plat->has_gmac4) { + if (priv->plat->core_type == DWMAC_CORE_GMAC4) { /* GMAC4 dumps its DMA registers at its DMA_CHAN_BASE_ADDR */ memcpy(®_space[ETHTOOL_DMA_OFFSET], ®_space[GMAC4_DMA_CHAN_BASE_ADDR / 4], NUM_DWMAC4_DMA_REGS * 4); - } else if (!priv->plat->has_xgmac) { + } else if (priv->plat->core_type != DWMAC_CORE_XGMAC) { memcpy(®_space[ETHTOOL_DMA_OFFSET], ®_space[DMA_BUS_MODE / 4], NUM_DWMAC1000_DMA_REGS * 4); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5e6aaead58946..9fa3c221a0c3c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -446,7 +446,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p, if (!priv->hwts_rx_en) return; /* For GMAC4, the valid timestamp is from CTX next desc. */ - if (priv->plat->has_gmac4 || priv->plat->has_xgmac) + if (dwmac_is_xmac(priv->plat->core_type)) desc = np; /* Check if timestamp is available */ @@ -697,7 +697,7 @@ static int stmmac_hwtstamp_get(struct net_device *dev, static int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags) { - bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac; + bool xmac = dwmac_is_xmac(priv->plat->core_type); struct timespec64 now; u32 sec_inc = 0; u64 temp = 0; @@ -746,7 +746,7 @@ static int stmmac_init_tstamp_counter(struct stmmac_priv *priv, */ static int stmmac_init_timestamping(struct stmmac_priv *priv) { - bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac; + bool xmac = dwmac_is_xmac(priv->plat->core_type); int ret; if (priv->plat->ptp_clk_freq_config) @@ -2413,7 +2413,7 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) txfifosz = priv->dma_cap.tx_fifo_size; /* Split up the shared Tx/Rx FIFO memory on DW QoS Eth and DW XGMAC */ - if (priv->plat->has_gmac4 || priv->plat->has_xgmac) { + if (dwmac_is_xmac(priv->plat->core_type)) { rxfifosz /= rx_channels_count; txfifosz /= tx_channels_count; } @@ -4520,7 +4520,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) if (skb_is_gso(skb) && priv->tso) { if (gso & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) return stmmac_tso_xmit(skb, dev); - if (priv->plat->has_gmac4 && (gso & SKB_GSO_UDP_L4)) + if (priv->plat->core_type == DWMAC_CORE_GMAC4 && + (gso & SKB_GSO_UDP_L4)) return stmmac_tso_xmit(skb, dev); } @@ -5973,7 +5974,7 @@ static void stmmac_common_interrupt(struct stmmac_priv *priv) u32 queue; bool xmac; - xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac; + xmac = dwmac_is_xmac(priv->plat->core_type); queues_count = (rx_cnt > tx_cnt) ? rx_cnt : tx_cnt; if (priv->irq_wake) @@ -5987,7 +5988,7 @@ static void stmmac_common_interrupt(struct stmmac_priv *priv) stmmac_fpe_irq_status(priv); /* To handle GMAC own interrupts */ - if ((priv->plat->has_gmac) || xmac) { + if (priv->plat->core_type == DWMAC_CORE_GMAC || xmac) { int status = stmmac_host_irq_status(priv, priv->hw, &priv->xstats); if (unlikely(status)) { @@ -6348,7 +6349,7 @@ static int stmmac_dma_cap_show(struct seq_file *seq, void *v) (priv->dma_cap.mbps_1000) ? "Y" : "N"); seq_printf(seq, "\tHalf duplex: %s\n", (priv->dma_cap.half_duplex) ? "Y" : "N"); - if (priv->plat->has_xgmac) { + if (priv->plat->core_type == DWMAC_CORE_XGMAC) { seq_printf(seq, "\tNumber of Additional MAC address registers: %d\n", priv->dma_cap.multi_addr); @@ -6372,7 +6373,7 @@ static int stmmac_dma_cap_show(struct seq_file *seq, void *v) (priv->dma_cap.time_stamp) ? "Y" : "N"); seq_printf(seq, "\tIEEE 1588-2008 Advanced Time Stamp: %s\n", (priv->dma_cap.atime_stamp) ? "Y" : "N"); - if (priv->plat->has_xgmac) + if (priv->plat->core_type == DWMAC_CORE_XGMAC) seq_printf(seq, "\tTimestamp System Time Source: %s\n", dwxgmac_timestamp_source[priv->dma_cap.tssrc]); seq_printf(seq, "\t802.3az - Energy-Efficient Ethernet (EEE): %s\n", @@ -6381,7 +6382,7 @@ static int stmmac_dma_cap_show(struct seq_file *seq, void *v) seq_printf(seq, "\tChecksum Offload in TX: %s\n", (priv->dma_cap.tx_coe) ? "Y" : "N"); if (priv->synopsys_id >= DWMAC_CORE_4_00 || - priv->plat->has_xgmac) { + priv->plat->core_type == DWMAC_CORE_XGMAC) { seq_printf(seq, "\tIP Checksum Offload in RX: %s\n", (priv->dma_cap.rx_coe) ? "Y" : "N"); } else { @@ -7233,8 +7234,9 @@ static int stmmac_hw_init(struct stmmac_priv *priv) * has to be disable and this can be done by passing the * riwt_off field from the platform. */ - if (((priv->synopsys_id >= DWMAC_CORE_3_50) || - (priv->plat->has_xgmac)) && (!priv->plat->riwt_off)) { + if ((priv->synopsys_id >= DWMAC_CORE_3_50 || + priv->plat->core_type == DWMAC_CORE_XGMAC) && + !priv->plat->riwt_off) { priv->use_riwt = 1; dev_info(priv->device, "Enable RX Mitigation via HW Watchdog Timer\n"); @@ -7355,7 +7357,7 @@ static int stmmac_xdp_rx_timestamp(const struct xdp_md *_ctx, u64 *timestamp) return -ENODATA; /* For GMAC4, the valid timestamp is from CTX next desc. */ - if (priv->plat->has_gmac4 || priv->plat->has_xgmac) + if (dwmac_is_xmac(priv->plat->core_type)) desc_contains_ts = ndesc; /* Check if timestamp is available */ @@ -7511,7 +7513,7 @@ int stmmac_dvr_probe(struct device *device, if ((priv->plat->flags & STMMAC_FLAG_TSO_EN) && (priv->dma_cap.tsoen)) { ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; - if (priv->plat->has_gmac4) + if (priv->plat->core_type == DWMAC_CORE_GMAC4) ndev->hw_features |= NETIF_F_GSO_UDP_L4; priv->tso = true; dev_info(priv->device, "TSO feature enabled\n"); @@ -7564,7 +7566,7 @@ int stmmac_dvr_probe(struct device *device, #ifdef STMMAC_VLAN_TAG_USED /* Both mac100 and gmac support receive VLAN tag detection */ ndev->features |= NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX; - if (priv->plat->has_gmac4 || priv->plat->has_xgmac) { + if (dwmac_is_xmac(priv->plat->core_type)) { ndev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; priv->hw->hw_vlan_en = true; } @@ -7595,7 +7597,7 @@ int stmmac_dvr_probe(struct device *device, /* MTU range: 46 - hw-specific max */ ndev->min_mtu = ETH_ZLEN - ETH_HLEN; - if (priv->plat->has_xgmac) + if (priv->plat->core_type == DWMAC_CORE_XGMAC) ndev->max_mtu = XGMAC_JUMBO_LEN; else if ((priv->plat->enh_desc) || (priv->synopsys_id >= DWMAC_CORE_4_00)) ndev->max_mtu = JUMBO_LEN; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 65db43e9c85e2..3f8cc3293964c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -301,7 +301,7 @@ static int stmmac_mdio_read_c22(struct mii_bus *bus, int phyaddr, int phyreg) struct stmmac_priv *priv = netdev_priv(bus->priv); u32 cmd; - if (priv->plat->has_gmac4) + if (priv->plat->core_type == DWMAC_CORE_GMAC4) cmd = MII_GMAC4_READ; else cmd = 0; @@ -344,7 +344,7 @@ static int stmmac_mdio_write_c22(struct mii_bus *bus, int phyaddr, int phyreg, struct stmmac_priv *priv = netdev_priv(bus->priv); u32 cmd; - if (priv->plat->has_gmac4) + if (priv->plat->core_type == DWMAC_CORE_GMAC4) cmd = MII_GMAC4_WRITE; else cmd = MII_ADDR_GWRITE; @@ -417,7 +417,7 @@ int stmmac_mdio_reset(struct mii_bus *bus) * on MDC, so perform a dummy mdio read. To be updated for GMAC4 * if needed. */ - if (!priv->plat->has_gmac4) + if (priv->plat->core_type != DWMAC_CORE_GMAC4) writel(0, priv->ioaddr + mii_address); #endif return 0; @@ -528,7 +528,7 @@ static u32 stmmac_clk_csr_set(struct stmmac_priv *priv) value = 0; } - if (priv->plat->has_xgmac) { + if (priv->plat->core_type == DWMAC_CORE_XGMAC) { if (clk_rate > 400000000) value = 0x5; else if (clk_rate > 350000000) @@ -601,7 +601,7 @@ int stmmac_mdio_register(struct net_device *ndev) new_bus->name = "stmmac"; - if (priv->plat->has_xgmac) { + if (priv->plat->core_type == DWMAC_CORE_XGMAC) { new_bus->read = &stmmac_xgmac2_mdio_read_c22; new_bus->write = &stmmac_xgmac2_mdio_write_c22; new_bus->read_c45 = &stmmac_xgmac2_mdio_read_c45; @@ -622,7 +622,7 @@ int stmmac_mdio_register(struct net_device *ndev) } else { new_bus->read = &stmmac_mdio_read_c22; new_bus->write = &stmmac_mdio_write_c22; - if (priv->plat->has_gmac4) { + if (priv->plat->core_type == DWMAC_CORE_GMAC4) { new_bus->read_c45 = &stmmac_mdio_read_c45; new_bus->write_c45 = &stmmac_mdio_write_c45; } @@ -650,7 +650,7 @@ int stmmac_mdio_register(struct net_device *ndev) } /* Looks like we need a dummy read for XGMAC only and C45 PHYs */ - if (priv->plat->has_xgmac) + if (priv->plat->core_type == DWMAC_CORE_XGMAC) stmmac_xgmac2_mdio_read_c45(new_bus, 0, 0, 0); /* If fixed-link is set, skip PHY scanning */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 4e3aa611fda83..94b3a3b272706 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -23,7 +23,7 @@ static void common_default_data(struct plat_stmmacenet_data *plat) { /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */ plat->clk_csr = STMMAC_CSR_20_35M; - plat->has_gmac = 1; + plat->core_type = DWMAC_CORE_GMAC; plat->force_sf_dma_mode = 1; plat->mdio_bus_data->needs_reset = true; @@ -76,7 +76,7 @@ static int snps_gmac5_default_data(struct pci_dev *pdev, int i; plat->clk_csr = STMMAC_CSR_250_300M; - plat->has_gmac4 = 1; + plat->core_type = DWMAC_CORE_GMAC4; plat->force_sf_dma_mode = 1; plat->flags |= STMMAC_FLAG_TSO_EN; plat->pmt = 1; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 27bcaae07a7f2..fbb92cc6ab598 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -552,12 +552,12 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) &pdev->dev, plat->unicast_filter_entries); plat->multicast_filter_bins = dwmac1000_validate_mcast_bins( &pdev->dev, plat->multicast_filter_bins); - plat->has_gmac = 1; + plat->core_type = DWMAC_CORE_GMAC; plat->pmt = 1; } if (of_device_is_compatible(np, "snps,dwmac-3.40a")) { - plat->has_gmac = 1; + plat->core_type = DWMAC_CORE_GMAC; plat->enh_desc = 1; plat->tx_coe = 1; plat->bugged_jumbo = 1; @@ -565,8 +565,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) } if (of_device_compatible_match(np, stmmac_gmac4_compats)) { - plat->has_gmac4 = 1; - plat->has_gmac = 0; + plat->core_type = DWMAC_CORE_GMAC4; plat->pmt = 1; if (of_property_read_bool(np, "snps,tso")) plat->flags |= STMMAC_FLAG_TSO_EN; @@ -580,7 +579,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) } if (of_device_is_compatible(np, "snps,dwxgmac")) { - plat->has_xgmac = 1; + plat->core_type = DWMAC_CORE_XGMAC; plat->pmt = 1; if (of_property_read_bool(np, "snps,tso")) plat->flags |= STMMAC_FLAG_TSO_EN; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 993ff4e87e557..3e30172fa1294 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -57,7 +57,7 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) bool xmac, est_rst = false; int ret; - xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac; + xmac = dwmac_is_xmac(priv->plat->core_type); if (delta < 0) { neg_adj = 1; @@ -344,7 +344,7 @@ void stmmac_ptp_register(struct stmmac_priv *priv) /* Calculate the clock domain crossing (CDC) error if necessary */ priv->plat->cdc_error_adj = 0; - if (priv->plat->has_gmac4) + if (priv->plat->core_type == DWMAC_CORE_GMAC4) priv->plat->cdc_error_adj = (2 * NSEC_PER_SEC) / priv->plat->clk_ptp_rate; /* Update the ptp clock parameters based on feature discovery, when diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 99022620457ac..151c81c560c8c 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -171,6 +171,13 @@ struct dwmac4_addrs { u32 mtl_low_cred_offset; }; +enum dwmac_core_type { + DWMAC_CORE_MAC100, + DWMAC_CORE_GMAC, + DWMAC_CORE_GMAC4, + DWMAC_CORE_XGMAC, +}; + #define STMMAC_FLAG_SPH_DISABLE BIT(1) #define STMMAC_FLAG_USE_PHY_WOL BIT(2) #define STMMAC_FLAG_HAS_SUN8I BIT(3) @@ -186,6 +193,7 @@ struct dwmac4_addrs { #define STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY BIT(13) struct plat_stmmacenet_data { + enum dwmac_core_type core_type; int bus_id; int phy_addr; /* MAC ----- optional PCS ----- SerDes ----- optional PHY ----- Media @@ -219,7 +227,6 @@ struct plat_stmmacenet_data { struct stmmac_dma_cfg *dma_cfg; struct stmmac_safety_feature_cfg *safety_feat_cfg; int clk_csr; - int has_gmac; int enh_desc; int tx_coe; int rx_coe; @@ -282,10 +289,8 @@ struct plat_stmmacenet_data { struct reset_control *stmmac_rst; struct reset_control *stmmac_ahb_rst; struct stmmac_axi *axi; - int has_gmac4; int rss_en; int mac_port_sel_speed; - int has_xgmac; u8 vlan_fail_q; struct pci_dev *pdev; int int_snapshot_num; From 10e0378f05d2b89c4f88f04ec56b2a5b9a116648 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 21 Oct 2025 12:54:10 +0100 Subject: [PATCH 191/867] net: spacemit: Avoid -Wflex-array-member-not-at-end warnings -Wflex-array-member-not-at-end was introduced in GCC-14, and we are getting ready to enable it, globally. Use regular arrays instead of flexible-array members (they're not really needed in this case) in a couple of unions, and fix the following warnings: 1 drivers/net/ethernet/spacemit/k1_emac.c:122:42: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] 1 drivers/net/ethernet/spacemit/k1_emac.c:122:32: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] 1 drivers/net/ethernet/spacemit/k1_emac.c:121:42: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] 1 drivers/net/ethernet/spacemit/k1_emac.c:121:32: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] Signed-off-by: Gustavo A. R. Silva Reviewed-by: Simon Horman Acked-by: Vivian Wang Link: https://patch.msgid.link/aPd0YjO-oP60Lgvj@kspp Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/spacemit/k1_emac.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/spacemit/k1_emac.h b/drivers/net/ethernet/spacemit/k1_emac.h index 5a09e946a276f..577efe66573eb 100644 --- a/drivers/net/ethernet/spacemit/k1_emac.h +++ b/drivers/net/ethernet/spacemit/k1_emac.h @@ -363,7 +363,7 @@ struct emac_desc { /* Keep stats in this order, index used for accessing hardware */ union emac_hw_tx_stats { - struct { + struct individual_tx_stats { u64 tx_ok_pkts; u64 tx_total_pkts; u64 tx_ok_bytes; @@ -378,11 +378,11 @@ union emac_hw_tx_stats { u64 tx_pause_pkts; } stats; - DECLARE_FLEX_ARRAY(u64, array); + u64 array[sizeof(struct individual_tx_stats) / sizeof(u64)]; }; union emac_hw_rx_stats { - struct { + struct individual_rx_stats { u64 rx_ok_pkts; u64 rx_total_pkts; u64 rx_crc_err_pkts; @@ -410,7 +410,7 @@ union emac_hw_rx_stats { u64 rx_truncate_fifo_full_pkts; } stats; - DECLARE_FLEX_ARRAY(u64, array); + u64 array[sizeof(struct individual_rx_stats) / sizeof(u64)]; }; #endif /* _K1_EMAC_H_ */ From 114573962a68a527835f2f1433a89bc2f9feac1b Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Tue, 21 Oct 2025 19:46:26 +0800 Subject: [PATCH 192/867] net/sched: Remove unused inline helper qdisc_from_priv() Since commit fb38306ceb9e ("net/sched: Retire ATM qdisc"), this is not used and can be removed. Signed-off-by: Yue Haibing Link: https://patch.msgid.link/20251021114626.3148894-1-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski --- include/net/pkt_sched.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 8a75c73fc5558..c660ac8710831 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -25,11 +25,6 @@ struct qdisc_walker { const struct Qdisc * : (const void *)&q->privdata, \ struct Qdisc * : (void *)&q->privdata) -static inline struct Qdisc *qdisc_from_priv(void *priv) -{ - return container_of(priv, struct Qdisc, privdata); -} - /* Timer resolution MUST BE < 10% of min_schedulable_packet_size/bandwidth From d550d63d0082268a31e93a10c64cbc2476b98b24 Mon Sep 17 00:00:00 2001 From: Pei Xiao Date: Tue, 21 Oct 2025 17:42:27 +0800 Subject: [PATCH 193/867] eth: fbnic: fix integer overflow warning in TLV_MAX_DATA definition The TLV_MAX_DATA macro calculates (PAGE_SIZE - 512) which can exceed the maximum value of a 16-bit unsigned integer on architectures with large page sizes, causing compiler warnings: drivers/net/ethernet/meta/fbnic/fbnic_tlv.h:83:24: warning: conversion from 'long unsigned int' to 'short unsigned int' changes value from '261632' to '65024' [-Woverflow] Fix this by explicitly masking the result to 16 bits using bitwise AND with 0xFFFF, ensuring the value fits within the expected data type while maintaining the intended behavior for normal page sizes. This preserves the existing functionality while eliminating the compiler warning and potential undefined behavior from integer truncation. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202510190832.3SQkTCHe-lkp@intel.com/ Signed-off-by: Pei Xiao Link: https://patch.msgid.link/182b9d0235d044d69d7a57c1296cc6f46e395beb.1761039651.git.xiaopei01@kylinos.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_tlv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_tlv.h b/drivers/net/ethernet/meta/fbnic/fbnic_tlv.h index c34bf87eeec95..3508b46ebdd00 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_tlv.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_tlv.h @@ -80,7 +80,7 @@ struct fbnic_tlv_index { enum fbnic_tlv_type type; }; -#define TLV_MAX_DATA (PAGE_SIZE - 512) +#define TLV_MAX_DATA ((PAGE_SIZE - 512) & 0xFFFF) #define FBNIC_TLV_ATTR_ID_UNKNOWN USHRT_MAX #define FBNIC_TLV_ATTR_STRING(id, len) { id, len, FBNIC_TLV_STRING } #define FBNIC_TLV_ATTR_FLAG(id) { id, 0, FBNIC_TLV_FLAG } From e0665df8c501829f09e6a2a7c06b9860209d3a70 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Mon, 20 Oct 2025 15:46:11 +0200 Subject: [PATCH 194/867] net: ti: icssg-prueth: Omit a variable reassignment in prueth_netdev_init() An error code was assigned to a variable and checked accordingly. This value was passed to a dev_err_probe() call in an if branch. This function is documented in the way that the same value is returned. Thus delete two redundant variable reassignments. The source code was transformed by using the Coccinelle software. Signed-off-by: Markus Elfring Link: https://patch.msgid.link/71f7daa3-d4f4-4753-aae8-67040fc8297d@web.de Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 3 +-- drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 1c1f4394ff1f2..57a7d1ceab088 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -1250,8 +1250,7 @@ static int prueth_netdev_init(struct prueth *prueth, } else if (of_phy_is_fixed_link(eth_node)) { ret = of_phy_register_fixed_link(eth_node); if (ret) { - ret = dev_err_probe(prueth->dev, ret, - "failed to register fixed-link phy\n"); + dev_err_probe(prueth->dev, ret, "failed to register fixed-link phy\n"); goto free; } diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c b/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c index 2a8c8847a6bd0..7bb4f0d850cc7 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c @@ -818,8 +818,7 @@ static int prueth_netdev_init(struct prueth *prueth, } else if (of_phy_is_fixed_link(eth_node)) { ret = of_phy_register_fixed_link(eth_node); if (ret) { - ret = dev_err_probe(prueth->dev, ret, - "failed to register fixed-link phy\n"); + dev_err_probe(prueth->dev, ret, "failed to register fixed-link phy\n"); goto free; } From 61b7ade9ba8c3b16867e25411b5f7cf1abe35879 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 21 Oct 2025 09:07:26 +0200 Subject: [PATCH 195/867] net: phy: micrel: Add support for non PTP SKUs for lan8814 The lan8814 has 4 different SKUs and for 2 of these SKUs the PTP is disabled. All these SKUs have the same value in the register 2 and 3. Meaning that we can't differentiate them based on device id, therefore check the SKU register and based on this allow or not to create a PTP device. Signed-off-by: Horatiu Vultur Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251021070726.3690685-1-horatiu.vultur@microchip.com Signed-off-by: Paolo Abeni --- drivers/net/phy/micrel.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 5f2c7e5c314f5..a47e55c228155 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -101,6 +101,8 @@ #define LAN8814_CABLE_DIAG_VCT_DATA_MASK GENMASK(7, 0) #define LAN8814_PAIR_BIT_SHIFT 12 +#define LAN8814_SKUS 0xB + #define LAN8814_WIRE_PAIR_MASK 0xF /* Lan8814 general Interrupt control/status reg in GPHY specific block. */ @@ -367,6 +369,9 @@ #define LAN8842_REV_8832 0x8832 +#define LAN8814_REV_LAN8814 0x8814 +#define LAN8814_REV_LAN8818 0x8818 + struct kszphy_hw_stat { const char *string; u8 reg; @@ -449,6 +454,7 @@ struct kszphy_priv { bool rmii_ref_clk_sel; bool rmii_ref_clk_sel_val; bool clk_enable; + bool is_ptp_available; u64 stats[ARRAY_SIZE(kszphy_hw_stats)]; struct kszphy_phy_stats phy_stats; }; @@ -4126,6 +4132,17 @@ static int lan8804_config_intr(struct phy_device *phydev) return 0; } +/* Check if the PHY has 1588 support. There are multiple skus of the PHY and + * some of them support PTP while others don't support it. This function will + * return true is the sku supports it, otherwise will return false. + */ +static bool lan8814_has_ptp(struct phy_device *phydev) +{ + struct kszphy_priv *priv = phydev->priv; + + return priv->is_ptp_available; +} + static irqreturn_t lan8814_handle_interrupt(struct phy_device *phydev) { int ret = IRQ_NONE; @@ -4142,6 +4159,9 @@ static irqreturn_t lan8814_handle_interrupt(struct phy_device *phydev) ret = IRQ_HANDLED; } + if (!lan8814_has_ptp(phydev)) + return ret; + while (true) { irq_status = lanphy_read_page_reg(phydev, LAN8814_PAGE_PORT_REGS, PTP_TSU_INT_STS); @@ -4203,6 +4223,9 @@ static void lan8814_ptp_init(struct phy_device *phydev) !IS_ENABLED(CONFIG_NETWORK_PHY_TIMESTAMPING)) return; + if (!lan8814_has_ptp(phydev)) + return; + lanphy_write_page_reg(phydev, LAN8814_PAGE_PORT_REGS, TSU_HARD_RESET, TSU_HARD_RESET_); @@ -4332,6 +4355,9 @@ static int __lan8814_ptp_probe_once(struct phy_device *phydev, char *pin_name, static int lan8814_ptp_probe_once(struct phy_device *phydev) { + if (!lan8814_has_ptp(phydev)) + return 0; + return __lan8814_ptp_probe_once(phydev, "lan8814_ptp_pin", LAN8814_PTP_GPIO_NUM); } @@ -4446,6 +4472,18 @@ static int lan8814_probe(struct phy_device *phydev) devm_phy_package_join(&phydev->mdio.dev, phydev, addr, sizeof(struct lan8814_shared_priv)); + /* There are lan8814 SKUs that don't support PTP. Make sure that for + * those skus no PTP device is created. Here we check if the SKU + * supports PTP. + */ + err = lanphy_read_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_SKUS); + if (err < 0) + return err; + + priv->is_ptp_available = err == LAN8814_REV_LAN8814 || + err == LAN8814_REV_LAN8818; + if (phy_package_init_once(phydev)) { err = lan8814_release_coma_mode(phydev); if (err) From 47d0cd6bccb4604192633cc8d29511e85d811fc0 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Fri, 17 Oct 2025 09:48:59 +0800 Subject: [PATCH 196/867] wifi: ath11k: fix VHT MCS assignment While associating, firmware needs to know peer's receive capability to calculate its own VHT transmit MCS, currently host sends this information to firmware via mcs->rx_mcs_set field, this is wrong as firmware actually takes it from mcs->tx_mcs_set field. Till now there is no failure seen due to this, most likely because almost all peers are advertising the same capability for both transmit and receive. Swap the assignment to fix it. Besides, rate control mask is meant to limit our own transmit MCS, hence need to go via mcs->tx_mcs_set field. With the aforementioned swapping done, change is needed as well to apply it to the peer's receive capability rather than transmit capability. Tested-on: WCN6855 hw2.1 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.41 Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Fixes: d5c65159f289 ("ath11k: driver for Qualcomm IEEE 802.11ax devices") Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20251017-ath11k-mcs-assignment-v1-1-da40825c1783@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/mac.c | 4 ++-- drivers/net/wireless/ath/ath11k/wmi.c | 13 ++++++++----- drivers/net/wireless/ath/ath11k/wmi.h | 2 ++ 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 106e2530b64e9..367ca0c90167b 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -2235,9 +2235,9 @@ static void ath11k_peer_assoc_h_vht(struct ath11k *ar, arg->peer_nss = min(sta->deflink.rx_nss, max_nss); arg->rx_max_rate = __le16_to_cpu(vht_cap->vht_mcs.rx_highest); arg->rx_mcs_set = __le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map); + arg->rx_mcs_set = ath11k_peer_assoc_h_vht_limit(arg->rx_mcs_set, vht_mcs_mask); arg->tx_max_rate = __le16_to_cpu(vht_cap->vht_mcs.tx_highest); - arg->tx_mcs_set = ath11k_peer_assoc_h_vht_limit( - __le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map), vht_mcs_mask); + arg->tx_mcs_set = __le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map); /* In IPQ8074 platform, VHT mcs rate 10 and 11 is enabled by default. * VHT mcs rate 10 and 11 is not supported in 11ac standard. diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 0491e3fd6b5e1..942dfeb8b1af8 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include #include @@ -2061,10 +2061,13 @@ int ath11k_wmi_send_peer_assoc_cmd(struct ath11k *ar, cmd->peer_bw_rxnss_override |= param->peer_bw_rxnss_override; if (param->vht_capable) { - mcs->rx_max_rate = param->rx_max_rate; - mcs->rx_mcs_set = param->rx_mcs_set; - mcs->tx_max_rate = param->tx_max_rate; - mcs->tx_mcs_set = param->tx_mcs_set; + /* firmware interprets mcs->tx_mcs_set field as peer's + * RX capability + */ + mcs->tx_max_rate = param->rx_max_rate; + mcs->tx_mcs_set = param->rx_mcs_set; + mcs->rx_max_rate = param->tx_max_rate; + mcs->rx_mcs_set = param->tx_mcs_set; } /* HE Rates */ diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h index 3c4885a12855b..0f0de24a38408 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.h +++ b/drivers/net/wireless/ath/ath11k/wmi.h @@ -4119,8 +4119,10 @@ struct wmi_rate_set { struct wmi_vht_rate_set { u32 tlv_header; u32 rx_max_rate; + /* MCS at which the peer can transmit */ u32 rx_mcs_set; u32 tx_max_rate; + /* MCS at which the peer can receive */ u32 tx_mcs_set; u32 tx_max_mcs_nss; } __packed; From 4a013ca2d490c73c40588d62712ffaa432046a04 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Fri, 17 Oct 2025 09:49:00 +0800 Subject: [PATCH 197/867] wifi: ath11k: fix peer HE MCS assignment In ath11k_wmi_send_peer_assoc_cmd(), peer's transmit MCS is sent to firmware as receive MCS while peer's receive MCS sent as transmit MCS, which goes against firmwire's definition. While connecting to a misbehaved AP that advertises 0xffff (meaning not supported) for 160 MHz transmit MCS map, firmware crashes due to 0xffff is assigned to he_mcs->rx_mcs_set field. Ext Tag: HE Capabilities [...] Supported HE-MCS and NSS Set [...] Rx and Tx MCS Maps 160 MHz [...] Tx HE-MCS Map 160 MHz: 0xffff Swap the assignment to fix this issue. As the HE rate control mask is meant to limit our own transmit MCS, it needs to go via he_mcs->rx_mcs_set field. With the aforementioned swapping done, change is needed as well to apply it to the peer's receive MCS. Tested-on: WCN6855 hw2.1 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.41 Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Fixes: 61fe43e7216d ("ath11k: add support for setting fixed HE rate/gi/ltf") Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20251017-ath11k-mcs-assignment-v1-2-da40825c1783@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/mac.c | 4 ++-- drivers/net/wireless/ath/ath11k/wmi.c | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 367ca0c90167b..ebde4208791ce 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -2522,10 +2522,10 @@ static void ath11k_peer_assoc_h_he(struct ath11k *ar, he_tx_mcs = v; } v = le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_160); + v = ath11k_peer_assoc_h_he_limit(v, he_mcs_mask); arg->peer_he_rx_mcs_set[WMI_HECAP_TXRX_MCS_NSS_IDX_160] = v; v = le16_to_cpu(he_cap->he_mcs_nss_supp.tx_mcs_160); - v = ath11k_peer_assoc_h_he_limit(v, he_mcs_mask); arg->peer_he_tx_mcs_set[WMI_HECAP_TXRX_MCS_NSS_IDX_160] = v; arg->peer_he_mcs_count++; @@ -2535,10 +2535,10 @@ static void ath11k_peer_assoc_h_he(struct ath11k *ar, default: v = le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_80); + v = ath11k_peer_assoc_h_he_limit(v, he_mcs_mask); arg->peer_he_rx_mcs_set[WMI_HECAP_TXRX_MCS_NSS_IDX_80] = v; v = le16_to_cpu(he_cap->he_mcs_nss_supp.tx_mcs_80); - v = ath11k_peer_assoc_h_he_limit(v, he_mcs_mask); arg->peer_he_tx_mcs_set[WMI_HECAP_TXRX_MCS_NSS_IDX_80] = v; arg->peer_he_mcs_count++; diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 942dfeb8b1af8..edff6fb613449 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -2091,8 +2091,11 @@ int ath11k_wmi_send_peer_assoc_cmd(struct ath11k *ar, FIELD_PREP(WMI_TLV_LEN, sizeof(*he_mcs) - TLV_HDR_SIZE); - he_mcs->rx_mcs_set = param->peer_he_tx_mcs_set[i]; - he_mcs->tx_mcs_set = param->peer_he_rx_mcs_set[i]; + /* firmware interprets mcs->rx_mcs_set field as peer's + * RX capability + */ + he_mcs->rx_mcs_set = param->peer_he_rx_mcs_set[i]; + he_mcs->tx_mcs_set = param->peer_he_tx_mcs_set[i]; ptr += sizeof(*he_mcs); } From 66887282233d281cd9109dabfdad5d86b709acc0 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Fri, 17 Oct 2025 09:37:57 +0530 Subject: [PATCH 198/867] wifi: ath11k: relocate some Tx power related functions in mac.c A forthcoming change necessitates that these functions be defined prior to their usage. Therefore, relocate them now as a preparatory step for the upcoming modifications. Compile tested only. Signed-off-by: Aditya Kumar Singh Reviewed-by: Vasanthakumar Thiagarajan Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20251017-add_tx_power_insertion_support-v1-1-f08feacfca93@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/mac.c | 270 +++++++++++++------------- 1 file changed, 135 insertions(+), 135 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index ebde4208791ce..99dea865e31ea 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -4028,6 +4028,141 @@ static int ath11k_start_scan(struct ath11k *ar, return 0; } +static void ath11k_mac_fw_stats_reset(struct ath11k *ar) +{ + spin_lock_bh(&ar->data_lock); + ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); + ath11k_fw_stats_vdevs_free(&ar->fw_stats.vdevs); + ar->fw_stats.num_vdev_recvd = 0; + ar->fw_stats.num_bcn_recvd = 0; + spin_unlock_bh(&ar->data_lock); +} + +int ath11k_mac_fw_stats_request(struct ath11k *ar, + struct stats_request_params *req_param) +{ + struct ath11k_base *ab = ar->ab; + unsigned long time_left; + int ret; + + lockdep_assert_held(&ar->conf_mutex); + + ath11k_mac_fw_stats_reset(ar); + + reinit_completion(&ar->fw_stats_complete); + reinit_completion(&ar->fw_stats_done); + + ret = ath11k_wmi_send_stats_request_cmd(ar, req_param); + + if (ret) { + ath11k_warn(ab, "could not request fw stats (%d)\n", + ret); + return ret; + } + + time_left = wait_for_completion_timeout(&ar->fw_stats_complete, 1 * HZ); + if (!time_left) + return -ETIMEDOUT; + + /* FW stats can get split when exceeding the stats data buffer limit. + * In that case, since there is no end marking for the back-to-back + * received 'update stats' event, we keep a 3 seconds timeout in case, + * fw_stats_done is not marked yet + */ + time_left = wait_for_completion_timeout(&ar->fw_stats_done, 3 * HZ); + if (!time_left) + return -ETIMEDOUT; + + return 0; +} + +static int ath11k_mac_get_fw_stats(struct ath11k *ar, u32 pdev_id, + u32 vdev_id, u32 stats_id) +{ + struct ath11k_base *ab = ar->ab; + struct stats_request_params req_param; + int ret; + + lockdep_assert_held(&ar->conf_mutex); + + if (ar->state != ATH11K_STATE_ON) + return -ENETDOWN; + + req_param.pdev_id = pdev_id; + req_param.vdev_id = vdev_id; + req_param.stats_id = stats_id; + + ret = ath11k_mac_fw_stats_request(ar, &req_param); + if (ret) + ath11k_warn(ab, "failed to request fw stats: %d\n", ret); + + ath11k_dbg(ab, ATH11K_DBG_WMI, + "debug get fw stat pdev id %d vdev id %d stats id 0x%x\n", + pdev_id, vdev_id, stats_id); + + return ret; +} + +static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + unsigned int link_id, + int *dbm) +{ + struct ath11k *ar = hw->priv; + struct ath11k_base *ab = ar->ab; + struct ath11k_fw_stats_pdev *pdev; + int ret; + + /* Final Tx power is minimum of Target Power, CTL power, Regulatory + * Power, PSD EIRP Power. We just know the Regulatory power from the + * regulatory rules obtained. FW knows all these power and sets the min + * of these. Hence, we request the FW pdev stats in which FW reports + * the minimum of all vdev's channel Tx power. + */ + mutex_lock(&ar->conf_mutex); + + /* Firmware doesn't provide Tx power during CAC hence no need to fetch + * the stats. + */ + if (test_bit(ATH11K_CAC_RUNNING, &ar->dev_flags)) { + mutex_unlock(&ar->conf_mutex); + return -EAGAIN; + } + + ret = ath11k_mac_get_fw_stats(ar, ar->pdev->pdev_id, 0, + WMI_REQUEST_PDEV_STAT); + if (ret) { + ath11k_warn(ab, "failed to request fw pdev stats: %d\n", ret); + goto err_fallback; + } + + spin_lock_bh(&ar->data_lock); + pdev = list_first_entry_or_null(&ar->fw_stats.pdevs, + struct ath11k_fw_stats_pdev, list); + if (!pdev) { + spin_unlock_bh(&ar->data_lock); + goto err_fallback; + } + + /* tx power is set as 2 units per dBm in FW. */ + *dbm = pdev->chan_tx_power / 2; + + spin_unlock_bh(&ar->data_lock); + mutex_unlock(&ar->conf_mutex); + + ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "txpower from firmware %d, reported %d dBm\n", + pdev->chan_tx_power, *dbm); + return 0; + +err_fallback: + mutex_unlock(&ar->conf_mutex); + /* We didn't get txpower from FW. Hence, relying on vif->bss_conf.txpower */ + *dbm = vif->bss_conf.txpower; + ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "txpower from firmware NaN, reported %d dBm\n", + *dbm); + return 0; +} + static int ath11k_mac_op_hw_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_scan_request *hw_req) @@ -9079,81 +9214,6 @@ static void ath11k_mac_put_chain_rssi(struct station_info *sinfo, } } -static void ath11k_mac_fw_stats_reset(struct ath11k *ar) -{ - spin_lock_bh(&ar->data_lock); - ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); - ath11k_fw_stats_vdevs_free(&ar->fw_stats.vdevs); - ar->fw_stats.num_vdev_recvd = 0; - ar->fw_stats.num_bcn_recvd = 0; - spin_unlock_bh(&ar->data_lock); -} - -int ath11k_mac_fw_stats_request(struct ath11k *ar, - struct stats_request_params *req_param) -{ - struct ath11k_base *ab = ar->ab; - unsigned long time_left; - int ret; - - lockdep_assert_held(&ar->conf_mutex); - - ath11k_mac_fw_stats_reset(ar); - - reinit_completion(&ar->fw_stats_complete); - reinit_completion(&ar->fw_stats_done); - - ret = ath11k_wmi_send_stats_request_cmd(ar, req_param); - - if (ret) { - ath11k_warn(ab, "could not request fw stats (%d)\n", - ret); - return ret; - } - - time_left = wait_for_completion_timeout(&ar->fw_stats_complete, 1 * HZ); - if (!time_left) - return -ETIMEDOUT; - - /* FW stats can get split when exceeding the stats data buffer limit. - * In that case, since there is no end marking for the back-to-back - * received 'update stats' event, we keep a 3 seconds timeout in case, - * fw_stats_done is not marked yet - */ - time_left = wait_for_completion_timeout(&ar->fw_stats_done, 3 * HZ); - if (!time_left) - return -ETIMEDOUT; - - return 0; -} - -static int ath11k_mac_get_fw_stats(struct ath11k *ar, u32 pdev_id, - u32 vdev_id, u32 stats_id) -{ - struct ath11k_base *ab = ar->ab; - struct stats_request_params req_param; - int ret; - - lockdep_assert_held(&ar->conf_mutex); - - if (ar->state != ATH11K_STATE_ON) - return -ENETDOWN; - - req_param.pdev_id = pdev_id; - req_param.vdev_id = vdev_id; - req_param.stats_id = stats_id; - - ret = ath11k_mac_fw_stats_request(ar, &req_param); - if (ret) - ath11k_warn(ab, "failed to request fw stats: %d\n", ret); - - ath11k_dbg(ab, ATH11K_DBG_WMI, - "debug get fw stat pdev id %d vdev id %d stats id 0x%x\n", - pdev_id, vdev_id, stats_id); - - return ret; -} - static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, @@ -9539,66 +9599,6 @@ static int ath11k_mac_op_remain_on_channel(struct ieee80211_hw *hw, return ret; } -static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - unsigned int link_id, - int *dbm) -{ - struct ath11k *ar = hw->priv; - struct ath11k_base *ab = ar->ab; - struct ath11k_fw_stats_pdev *pdev; - int ret; - - /* Final Tx power is minimum of Target Power, CTL power, Regulatory - * Power, PSD EIRP Power. We just know the Regulatory power from the - * regulatory rules obtained. FW knows all these power and sets the min - * of these. Hence, we request the FW pdev stats in which FW reports - * the minimum of all vdev's channel Tx power. - */ - mutex_lock(&ar->conf_mutex); - - /* Firmware doesn't provide Tx power during CAC hence no need to fetch - * the stats. - */ - if (test_bit(ATH11K_CAC_RUNNING, &ar->dev_flags)) { - mutex_unlock(&ar->conf_mutex); - return -EAGAIN; - } - - ret = ath11k_mac_get_fw_stats(ar, ar->pdev->pdev_id, 0, - WMI_REQUEST_PDEV_STAT); - if (ret) { - ath11k_warn(ab, "failed to request fw pdev stats: %d\n", ret); - goto err_fallback; - } - - spin_lock_bh(&ar->data_lock); - pdev = list_first_entry_or_null(&ar->fw_stats.pdevs, - struct ath11k_fw_stats_pdev, list); - if (!pdev) { - spin_unlock_bh(&ar->data_lock); - goto err_fallback; - } - - /* tx power is set as 2 units per dBm in FW. */ - *dbm = pdev->chan_tx_power / 2; - - spin_unlock_bh(&ar->data_lock); - mutex_unlock(&ar->conf_mutex); - - ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "txpower from firmware %d, reported %d dBm\n", - pdev->chan_tx_power, *dbm); - return 0; - -err_fallback: - mutex_unlock(&ar->conf_mutex); - /* We didn't get txpower from FW. Hence, relying on vif->bss_conf.txpower */ - *dbm = vif->bss_conf.txpower; - ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "txpower from firmware NaN, reported %d dBm\n", - *dbm); - return 0; -} - static int ath11k_mac_station_add(struct ath11k *ar, struct ieee80211_vif *vif, struct ieee80211_sta *sta) From c243d5e44f6ecbb29bf55b82e6dd92bca4fde0b1 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Fri, 17 Oct 2025 09:37:58 +0530 Subject: [PATCH 199/867] wifi: ath11k: wrap ath11k_mac_op_get_txpower() with lock-aware internal helper Refactor ath11k_mac_op_get_txpower() by introducing a new internal function ath11k_mac_handle_get_txpower(), which assumes the caller holds the appropriate lock. This prepares the codebase for future change where the internal function may be invoked directly with the lock already acquired, improving modularity and lock handling consistency. No functional change intended. Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.9.0.1-02146-QCAHKSWPL_SILICONZ-1 Signed-off-by: Aditya Kumar Singh Reviewed-by: Vasanthakumar Thiagarajan Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20251017-add_tx_power_insertion_support-v1-2-f08feacfca93@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/mac.c | 31 +++++++++++++++++---------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 99dea865e31ea..319b4cfeb368a 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -4103,12 +4103,10 @@ static int ath11k_mac_get_fw_stats(struct ath11k *ar, u32 pdev_id, return ret; } -static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - unsigned int link_id, - int *dbm) +static int ath11k_mac_handle_get_txpower(struct ath11k *ar, + struct ieee80211_vif *vif, + int *dbm) { - struct ath11k *ar = hw->priv; struct ath11k_base *ab = ar->ab; struct ath11k_fw_stats_pdev *pdev; int ret; @@ -4119,15 +4117,13 @@ static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw, * of these. Hence, we request the FW pdev stats in which FW reports * the minimum of all vdev's channel Tx power. */ - mutex_lock(&ar->conf_mutex); + lockdep_assert_held(&ar->conf_mutex); /* Firmware doesn't provide Tx power during CAC hence no need to fetch * the stats. */ - if (test_bit(ATH11K_CAC_RUNNING, &ar->dev_flags)) { - mutex_unlock(&ar->conf_mutex); + if (test_bit(ATH11K_CAC_RUNNING, &ar->dev_flags)) return -EAGAIN; - } ret = ath11k_mac_get_fw_stats(ar, ar->pdev->pdev_id, 0, WMI_REQUEST_PDEV_STAT); @@ -4148,14 +4144,12 @@ static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw, *dbm = pdev->chan_tx_power / 2; spin_unlock_bh(&ar->data_lock); - mutex_unlock(&ar->conf_mutex); ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "txpower from firmware %d, reported %d dBm\n", pdev->chan_tx_power, *dbm); return 0; err_fallback: - mutex_unlock(&ar->conf_mutex); /* We didn't get txpower from FW. Hence, relying on vif->bss_conf.txpower */ *dbm = vif->bss_conf.txpower; ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "txpower from firmware NaN, reported %d dBm\n", @@ -4163,6 +4157,21 @@ static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw, return 0; } +static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + unsigned int link_id, + int *dbm) +{ + struct ath11k *ar = hw->priv; + int ret; + + mutex_lock(&ar->conf_mutex); + ret = ath11k_mac_handle_get_txpower(ar, vif, dbm); + mutex_unlock(&ar->conf_mutex); + + return ret; +} + static int ath11k_mac_op_hw_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_scan_request *hw_req) From 722015690f52d046ae609e1b90cd3f018644d93d Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Fri, 17 Oct 2025 09:37:59 +0530 Subject: [PATCH 200/867] wifi: ath11k: add support for Tx Power insertion in RRM action frame For certain action frames like the TPC Report IE in the spectrum management TPC Report action frame, and in the Radio Measurement Link Measurement Report action frame there is a requirement to fill in the current and max Tx power of the device in the packet. Add support to populate these fields in the relevant packets. In software-encrypted cases such as PMF, skip insertion since the packets are already encrypted and cannot be modified. Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.9.0.1-02146-QCAHKSWPL_SILICONZ-1 Signed-off-by: Aditya Kumar Singh Reviewed-by: Vasanthakumar Thiagarajan Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20251017-add_tx_power_insertion_support-v1-3-f08feacfca93@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/mac.c | 166 ++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 319b4cfeb368a..d15e3b67e6259 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -6251,6 +6251,159 @@ static void ath11k_mgmt_over_wmi_tx_purge(struct ath11k *ar) ath11k_mgmt_over_wmi_tx_drop(ar, skb); } +static int ath11k_mac_mgmt_action_frame_fill_elem_data(struct ath11k_vif *arvif, + struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + u8 category, *buf, iv_len, action_code, dialog_token; + int cur_tx_power, max_tx_power; + struct ath11k *ar = arvif->ar; + struct cfg80211_chan_def def; + struct ath11k_skb_cb *skb_cb; + struct ieee80211_mgmt *mgmt; + unsigned int remaining_len; + bool has_protected; + + lockdep_assert_held(&ar->conf_mutex); + + /* make sure category field is present */ + if (skb->len < IEEE80211_MIN_ACTION_SIZE) + return -EINVAL; + + remaining_len = skb->len - IEEE80211_MIN_ACTION_SIZE; + has_protected = ieee80211_has_protected(hdr->frame_control); + + /* In case of SW crypto and hdr protected (PMF), packet will already be encrypted, + * we can't put in data in this case + */ + if (test_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ar->ab->dev_flags) && + has_protected) + return 0; + + mgmt = (struct ieee80211_mgmt *)hdr; + buf = (u8 *)&mgmt->u.action; + + /* FCTL_PROTECTED frame might have extra space added for HDR_LEN. Offset that + * many bytes if it is there + */ + if (has_protected) { + skb_cb = ATH11K_SKB_CB(skb); + + switch (skb_cb->cipher) { + /* Cipher suite having flag %IEEE80211_KEY_FLAG_GENERATE_IV_MGMT set in + * key needs to be processed. See ath11k_install_key() + */ + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + iv_len = IEEE80211_CCMP_HDR_LEN; + break; + case WLAN_CIPHER_SUITE_TKIP: + iv_len = 0; + break; + default: + return -EINVAL; + } + + if (remaining_len < iv_len) + return -EINVAL; + + buf += iv_len; + remaining_len -= iv_len; + } + + category = *buf++; + /* category code is already taken care in %IEEE80211_MIN_ACTION_SIZE hence + * no need to adjust remaining_len + */ + + switch (category) { + case WLAN_CATEGORY_RADIO_MEASUREMENT: + /* need action code and dialog token */ + if (remaining_len < 2) + return -EINVAL; + + /* Packet Format: + * Action Code | Dialog Token | Variable Len (based on Action Code) + */ + action_code = *buf++; + dialog_token = *buf++; + remaining_len -= 2; + + if (ath11k_mac_vif_chan(arvif->vif, &def)) + return -ENOENT; + + cur_tx_power = arvif->vif->bss_conf.txpower; + max_tx_power = min(def.chan->max_reg_power, (int)ar->max_tx_power / 2); + ath11k_mac_handle_get_txpower(ar, arvif->vif, &cur_tx_power); + + switch (action_code) { + case WLAN_RM_ACTION_LINK_MEASUREMENT_REQUEST: + /* need variable fields to be present in len */ + if (remaining_len < 2) + return -EINVAL; + + /* Variable length format as defined in IEEE 802.11-2024, + * Figure 9-1187-Link Measurement Request frame Action field + * format. + * Transmit Power | Max Tx Power + * We fill both of these. + */ + *buf++ = cur_tx_power; + *buf = max_tx_power; + + ath11k_dbg(ar->ab, ATH11K_DBG_MAC, + "RRM: Link Measurement Req dialog_token %u cur_tx_power %d max_tx_power %d\n", + dialog_token, cur_tx_power, max_tx_power); + break; + case WLAN_RM_ACTION_LINK_MEASUREMENT_REPORT: + /* need variable fields to be present in len */ + if (remaining_len < 3) + return -EINVAL; + + /* Variable length format as defined in IEEE 802.11-2024, + * Figure 9-1188-Link Measurement Report frame Action field format + * TPC Report | Variable Fields + * + * TPC Report Format: + * Element ID | Len | Tx Power | Link Margin + * + * We fill Tx power in the TPC Report (2nd index) + */ + buf[2] = cur_tx_power; + + /* TODO: At present, Link margin data is not present so can't + * really fill it now. Once it is available, it can be added + * here + */ + ath11k_dbg(ar->ab, ATH11K_DBG_MAC, + "RRM: Link Measurement Report dialog_token %u cur_tx_power %d\n", + dialog_token, cur_tx_power); + break; + default: + return -EINVAL; + } + break; + default: + /* nothing to fill */ + return 0; + } + + return 0; +} + +static int ath11k_mac_mgmt_frame_fill_elem_data(struct ath11k_vif *arvif, + struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + + if (!ieee80211_is_action(hdr->frame_control)) + return 0; + + return ath11k_mac_mgmt_action_frame_fill_elem_data(arvif, skb); +} + static void ath11k_mgmt_over_wmi_tx_work(struct work_struct *work) { struct ath11k *ar = container_of(work, struct ath11k, wmi_mgmt_tx_work); @@ -6270,6 +6423,19 @@ static void ath11k_mgmt_over_wmi_tx_work(struct work_struct *work) arvif = ath11k_vif_to_arvif(skb_cb->vif); mutex_lock(&ar->conf_mutex); if (ar->allocated_vdev_map & (1LL << arvif->vdev_id)) { + /* Fill in the data which is required to be filled by the driver + * For example: Max Tx power in Link Measurement Request/Report + */ + ret = ath11k_mac_mgmt_frame_fill_elem_data(arvif, skb); + if (ret) { + /* If we couldn't fill the data due to any reason, + * let's not discard transmitting the packet. + */ + ath11k_dbg(ar->ab, ATH11K_DBG_MAC, + "Failed to fill the required data for the mgmt packet err %d\n", + ret); + } + ret = ath11k_mac_mgmt_tx_wmi(ar, arvif, skb); if (ret) { ath11k_warn(ar->ab, "failed to tx mgmt frame, vdev_id %d :%d\n", From 998c68e96c03f10dec19b65279ade9d4000d1ae9 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Fri, 17 Oct 2025 09:38:00 +0530 Subject: [PATCH 201/867] wifi: ath11k: advertise NL80211_FEATURE_TX_POWER_INSERTION Now that driver is capable of inserting Tx power, advertise the support for the same to upper layers. Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.9.0.1-02146-QCAHKSWPL_SILICONZ-1 Signed-off-by: Aditya Kumar Singh Reviewed-by: Vasanthakumar Thiagarajan Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20251017-add_tx_power_insertion_support-v1-4-f08feacfca93@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/mac.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index d15e3b67e6259..c6360d6a72bd3 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -10543,6 +10543,8 @@ static int __ath11k_mac_register(struct ath11k *ar) ar->hw->wiphy->features |= NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE | NL80211_FEATURE_AP_SCAN; + ar->hw->wiphy->features |= NL80211_FEATURE_TX_POWER_INSERTION; + ar->max_num_stations = TARGET_NUM_STATIONS(ab); ar->max_num_peers = TARGET_NUM_PEERS_PDEV(ab); From 50cb7ccab8176cbce4e32420f9fc2d6d80e69a09 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Wed, 22 Oct 2025 08:24:46 -0700 Subject: [PATCH 202/867] wifi: ath11k: Correctly use "ab" macro parameter The checkpatch script is reporting multiple instances of: Argument 'x' is not used in function-like macro Fix these by renaming the argument to match the usage. In the process, also add parenthesis to the usage to avoid MACRO_ARG_PRECEDENCE issues. Compile tested only. Link: https://patch.msgid.link/20251022-ath11k-bad-macro-arg-v1-1-93a8eadb6191@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/hal.h | 38 +++++++++++++-------------- drivers/net/wireless/ath/ath11k/pci.h | 18 ++++++------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/hal.h b/drivers/net/wireless/ath/ath11k/hal.h index 839095af9267e..82603a389bb92 100644 --- a/drivers/net/wireless/ath/ath11k/hal.h +++ b/drivers/net/wireless/ath/ath11k/hal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #ifndef ATH11K_HAL_H @@ -43,14 +43,14 @@ struct ath11k_base; #define HAL_SEQ_WCSS_UMAC_OFFSET 0x00a00000 #define HAL_SEQ_WCSS_UMAC_REO_REG 0x00a38000 #define HAL_SEQ_WCSS_UMAC_TCL_REG 0x00a44000 -#define HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(x) \ - (ab->hw_params.regs->hal_seq_wcss_umac_ce0_src_reg) -#define HAL_SEQ_WCSS_UMAC_CE0_DST_REG(x) \ - (ab->hw_params.regs->hal_seq_wcss_umac_ce0_dst_reg) -#define HAL_SEQ_WCSS_UMAC_CE1_SRC_REG(x) \ - (ab->hw_params.regs->hal_seq_wcss_umac_ce1_src_reg) -#define HAL_SEQ_WCSS_UMAC_CE1_DST_REG(x) \ - (ab->hw_params.regs->hal_seq_wcss_umac_ce1_dst_reg) +#define HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab) \ + ((ab)->hw_params.regs->hal_seq_wcss_umac_ce0_src_reg) +#define HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) \ + ((ab)->hw_params.regs->hal_seq_wcss_umac_ce0_dst_reg) +#define HAL_SEQ_WCSS_UMAC_CE1_SRC_REG(ab) \ + ((ab)->hw_params.regs->hal_seq_wcss_umac_ce1_src_reg) +#define HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) \ + ((ab)->hw_params.regs->hal_seq_wcss_umac_ce1_dst_reg) #define HAL_SEQ_WCSS_UMAC_WBM_REG 0x00a34000 #define HAL_CE_WFSS_CE_REG_BASE 0x01b80000 @@ -209,10 +209,10 @@ struct ath11k_base; #define HAL_REO_STATUS_HP(ab) ab->hw_params.regs->hal_reo_status_hp /* WBM Idle R0 address */ -#define HAL_WBM_IDLE_LINK_RING_BASE_LSB(x) \ - (ab->hw_params.regs->hal_wbm_idle_link_ring_base_lsb) -#define HAL_WBM_IDLE_LINK_RING_MISC_ADDR(x) \ - (ab->hw_params.regs->hal_wbm_idle_link_ring_misc) +#define HAL_WBM_IDLE_LINK_RING_BASE_LSB(ab) \ + ((ab)->hw_params.regs->hal_wbm_idle_link_ring_base_lsb) +#define HAL_WBM_IDLE_LINK_RING_MISC_ADDR(ab) \ + ((ab)->hw_params.regs->hal_wbm_idle_link_ring_misc) #define HAL_WBM_R0_IDLE_LIST_CONTROL_ADDR 0x00000048 #define HAL_WBM_R0_IDLE_LIST_SIZE_ADDR 0x0000004c #define HAL_WBM_SCATTERED_RING_BASE_LSB 0x00000058 @@ -227,17 +227,17 @@ struct ath11k_base; #define HAL_WBM_IDLE_LINK_RING_HP 0x000030b0 /* SW2WBM R0 release address */ -#define HAL_WBM_RELEASE_RING_BASE_LSB(x) \ - (ab->hw_params.regs->hal_wbm_release_ring_base_lsb) +#define HAL_WBM_RELEASE_RING_BASE_LSB(ab) \ + ((ab)->hw_params.regs->hal_wbm_release_ring_base_lsb) /* SW2WBM R2 release address */ #define HAL_WBM_RELEASE_RING_HP 0x00003018 /* WBM2SW R0 release address */ -#define HAL_WBM0_RELEASE_RING_BASE_LSB(x) \ - (ab->hw_params.regs->hal_wbm0_release_ring_base_lsb) -#define HAL_WBM1_RELEASE_RING_BASE_LSB(x) \ - (ab->hw_params.regs->hal_wbm1_release_ring_base_lsb) +#define HAL_WBM0_RELEASE_RING_BASE_LSB(ab) \ + ((ab)->hw_params.regs->hal_wbm0_release_ring_base_lsb) +#define HAL_WBM1_RELEASE_RING_BASE_LSB(ab) \ + ((ab)->hw_params.regs->hal_wbm1_release_ring_base_lsb) /* WBM2SW R2 release address */ #define HAL_WBM0_RELEASE_RING_HP 0x000030c0 diff --git a/drivers/net/wireless/ath/ath11k/pci.h b/drivers/net/wireless/ath/ath11k/pci.h index c33c7865145cc..1e3005a4b64c6 100644 --- a/drivers/net/wireless/ath/ath11k/pci.h +++ b/drivers/net/wireless/ath/ath11k/pci.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2019-2020 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2022,2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #ifndef _ATH11K_PCI_H #define _ATH11K_PCI_H @@ -35,18 +35,18 @@ #define PCIE_SMLH_REQ_RST_LINK_DOWN 0x2 #define PCIE_INT_CLEAR_ALL 0xffffffff -#define PCIE_QSERDES_COM_SYSCLK_EN_SEL_REG(x) \ - (ab->hw_params.regs->pcie_qserdes_sysclk_en_sel) +#define PCIE_QSERDES_COM_SYSCLK_EN_SEL_REG(ab) \ + ((ab)->hw_params.regs->pcie_qserdes_sysclk_en_sel) #define PCIE_QSERDES_COM_SYSCLK_EN_SEL_VAL 0x10 #define PCIE_QSERDES_COM_SYSCLK_EN_SEL_MSK 0xffffffff -#define PCIE_PCS_OSC_DTCT_CONFIG1_REG(x) \ - (ab->hw_params.regs->pcie_pcs_osc_dtct_config_base) +#define PCIE_PCS_OSC_DTCT_CONFIG1_REG(ab) \ + ((ab)->hw_params.regs->pcie_pcs_osc_dtct_config_base) #define PCIE_PCS_OSC_DTCT_CONFIG1_VAL 0x02 -#define PCIE_PCS_OSC_DTCT_CONFIG2_REG(x) \ - (ab->hw_params.regs->pcie_pcs_osc_dtct_config_base + 0x4) +#define PCIE_PCS_OSC_DTCT_CONFIG2_REG(ab) \ + ((ab)->hw_params.regs->pcie_pcs_osc_dtct_config_base + 0x4) #define PCIE_PCS_OSC_DTCT_CONFIG2_VAL 0x52 -#define PCIE_PCS_OSC_DTCT_CONFIG4_REG(x) \ - (ab->hw_params.regs->pcie_pcs_osc_dtct_config_base + 0xc) +#define PCIE_PCS_OSC_DTCT_CONFIG4_REG(ab) \ + ((ab)->hw_params.regs->pcie_pcs_osc_dtct_config_base + 0xc) #define PCIE_PCS_OSC_DTCT_CONFIG4_VAL 0xff #define PCIE_PCS_OSC_DTCT_CONFIG_MSK 0x000000ff From 99ad2b6815f41acbec15ab051ccc79b11b05710a Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 22 Oct 2025 09:11:12 +0200 Subject: [PATCH 203/867] net: airoha: Remove code duplication in airoha_regs.h This patch does not introduce any logical change, it just removes duplicated code in airoha_regs.h. Fix naming conventions in airoha_regs.h. Reviewed-by: Simon Horman Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20251022-airoha-regs-cosmetics-v2-1-e0425b3f2c2c@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 102 ++++++++++---------- drivers/net/ethernet/airoha/airoha_regs.h | 109 ++++++++++------------ 2 files changed, 100 insertions(+), 111 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 8483ea02603e2..e17a285a9e8fa 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -137,11 +137,11 @@ static void airoha_fe_maccr_init(struct airoha_eth *eth) for (p = 1; p <= ARRAY_SIZE(eth->ports); p++) airoha_fe_set(eth, REG_GDM_FWD_CFG(p), - GDM_TCP_CKSUM | GDM_UDP_CKSUM | GDM_IP4_CKSUM | - GDM_DROP_CRC_ERR); + GDM_TCP_CKSUM_MASK | GDM_UDP_CKSUM_MASK | + GDM_IP4_CKSUM_MASK | GDM_DROP_CRC_ERR_MASK); - airoha_fe_rmw(eth, REG_CDM1_VLAN_CTRL, CDM1_VLAN_MASK, - FIELD_PREP(CDM1_VLAN_MASK, 0x8100)); + airoha_fe_rmw(eth, REG_CDM_VLAN_CTRL(1), CDM_VLAN_MASK, + FIELD_PREP(CDM_VLAN_MASK, 0x8100)); airoha_fe_set(eth, REG_FE_CPORT_CFG, FE_CPORT_PAD); } @@ -403,46 +403,46 @@ static int airoha_fe_mc_vlan_clear(struct airoha_eth *eth) static void airoha_fe_crsn_qsel_init(struct airoha_eth *eth) { /* CDM1_CRSN_QSEL */ - airoha_fe_rmw(eth, REG_CDM1_CRSN_QSEL(CRSN_22 >> 2), - CDM1_CRSN_QSEL_REASON_MASK(CRSN_22), - FIELD_PREP(CDM1_CRSN_QSEL_REASON_MASK(CRSN_22), + airoha_fe_rmw(eth, REG_CDM_CRSN_QSEL(1, CRSN_22 >> 2), + CDM_CRSN_QSEL_REASON_MASK(CRSN_22), + FIELD_PREP(CDM_CRSN_QSEL_REASON_MASK(CRSN_22), CDM_CRSN_QSEL_Q1)); - airoha_fe_rmw(eth, REG_CDM1_CRSN_QSEL(CRSN_08 >> 2), - CDM1_CRSN_QSEL_REASON_MASK(CRSN_08), - FIELD_PREP(CDM1_CRSN_QSEL_REASON_MASK(CRSN_08), + airoha_fe_rmw(eth, REG_CDM_CRSN_QSEL(1, CRSN_08 >> 2), + CDM_CRSN_QSEL_REASON_MASK(CRSN_08), + FIELD_PREP(CDM_CRSN_QSEL_REASON_MASK(CRSN_08), CDM_CRSN_QSEL_Q1)); - airoha_fe_rmw(eth, REG_CDM1_CRSN_QSEL(CRSN_21 >> 2), - CDM1_CRSN_QSEL_REASON_MASK(CRSN_21), - FIELD_PREP(CDM1_CRSN_QSEL_REASON_MASK(CRSN_21), + airoha_fe_rmw(eth, REG_CDM_CRSN_QSEL(1, CRSN_21 >> 2), + CDM_CRSN_QSEL_REASON_MASK(CRSN_21), + FIELD_PREP(CDM_CRSN_QSEL_REASON_MASK(CRSN_21), CDM_CRSN_QSEL_Q1)); - airoha_fe_rmw(eth, REG_CDM1_CRSN_QSEL(CRSN_24 >> 2), - CDM1_CRSN_QSEL_REASON_MASK(CRSN_24), - FIELD_PREP(CDM1_CRSN_QSEL_REASON_MASK(CRSN_24), + airoha_fe_rmw(eth, REG_CDM_CRSN_QSEL(1, CRSN_24 >> 2), + CDM_CRSN_QSEL_REASON_MASK(CRSN_24), + FIELD_PREP(CDM_CRSN_QSEL_REASON_MASK(CRSN_24), CDM_CRSN_QSEL_Q6)); - airoha_fe_rmw(eth, REG_CDM1_CRSN_QSEL(CRSN_25 >> 2), - CDM1_CRSN_QSEL_REASON_MASK(CRSN_25), - FIELD_PREP(CDM1_CRSN_QSEL_REASON_MASK(CRSN_25), + airoha_fe_rmw(eth, REG_CDM_CRSN_QSEL(1, CRSN_25 >> 2), + CDM_CRSN_QSEL_REASON_MASK(CRSN_25), + FIELD_PREP(CDM_CRSN_QSEL_REASON_MASK(CRSN_25), CDM_CRSN_QSEL_Q1)); /* CDM2_CRSN_QSEL */ - airoha_fe_rmw(eth, REG_CDM2_CRSN_QSEL(CRSN_08 >> 2), - CDM2_CRSN_QSEL_REASON_MASK(CRSN_08), - FIELD_PREP(CDM2_CRSN_QSEL_REASON_MASK(CRSN_08), + airoha_fe_rmw(eth, REG_CDM_CRSN_QSEL(2, CRSN_08 >> 2), + CDM_CRSN_QSEL_REASON_MASK(CRSN_08), + FIELD_PREP(CDM_CRSN_QSEL_REASON_MASK(CRSN_08), CDM_CRSN_QSEL_Q1)); - airoha_fe_rmw(eth, REG_CDM2_CRSN_QSEL(CRSN_21 >> 2), - CDM2_CRSN_QSEL_REASON_MASK(CRSN_21), - FIELD_PREP(CDM2_CRSN_QSEL_REASON_MASK(CRSN_21), + airoha_fe_rmw(eth, REG_CDM_CRSN_QSEL(2, CRSN_21 >> 2), + CDM_CRSN_QSEL_REASON_MASK(CRSN_21), + FIELD_PREP(CDM_CRSN_QSEL_REASON_MASK(CRSN_21), CDM_CRSN_QSEL_Q1)); - airoha_fe_rmw(eth, REG_CDM2_CRSN_QSEL(CRSN_22 >> 2), - CDM2_CRSN_QSEL_REASON_MASK(CRSN_22), - FIELD_PREP(CDM2_CRSN_QSEL_REASON_MASK(CRSN_22), + airoha_fe_rmw(eth, REG_CDM_CRSN_QSEL(2, CRSN_22 >> 2), + CDM_CRSN_QSEL_REASON_MASK(CRSN_22), + FIELD_PREP(CDM_CRSN_QSEL_REASON_MASK(CRSN_22), CDM_CRSN_QSEL_Q1)); - airoha_fe_rmw(eth, REG_CDM2_CRSN_QSEL(CRSN_24 >> 2), - CDM2_CRSN_QSEL_REASON_MASK(CRSN_24), - FIELD_PREP(CDM2_CRSN_QSEL_REASON_MASK(CRSN_24), + airoha_fe_rmw(eth, REG_CDM_CRSN_QSEL(2, CRSN_24 >> 2), + CDM_CRSN_QSEL_REASON_MASK(CRSN_24), + FIELD_PREP(CDM_CRSN_QSEL_REASON_MASK(CRSN_24), CDM_CRSN_QSEL_Q6)); - airoha_fe_rmw(eth, REG_CDM2_CRSN_QSEL(CRSN_25 >> 2), - CDM2_CRSN_QSEL_REASON_MASK(CRSN_25), - FIELD_PREP(CDM2_CRSN_QSEL_REASON_MASK(CRSN_25), + airoha_fe_rmw(eth, REG_CDM_CRSN_QSEL(2, CRSN_25 >> 2), + CDM_CRSN_QSEL_REASON_MASK(CRSN_25), + FIELD_PREP(CDM_CRSN_QSEL_REASON_MASK(CRSN_25), CDM_CRSN_QSEL_Q1)); } @@ -462,18 +462,18 @@ static int airoha_fe_init(struct airoha_eth *eth) airoha_fe_wr(eth, REG_FE_PCE_CFG, PCE_DPI_EN_MASK | PCE_KA_EN_MASK | PCE_MC_EN_MASK); /* set vip queue selection to ring 1 */ - airoha_fe_rmw(eth, REG_CDM1_FWD_CFG, CDM1_VIP_QSEL_MASK, - FIELD_PREP(CDM1_VIP_QSEL_MASK, 0x4)); - airoha_fe_rmw(eth, REG_CDM2_FWD_CFG, CDM2_VIP_QSEL_MASK, - FIELD_PREP(CDM2_VIP_QSEL_MASK, 0x4)); + airoha_fe_rmw(eth, REG_CDM_FWD_CFG(1), CDM_VIP_QSEL_MASK, + FIELD_PREP(CDM_VIP_QSEL_MASK, 0x4)); + airoha_fe_rmw(eth, REG_CDM_FWD_CFG(2), CDM_VIP_QSEL_MASK, + FIELD_PREP(CDM_VIP_QSEL_MASK, 0x4)); /* set GDM4 source interface offset to 8 */ - airoha_fe_rmw(eth, REG_GDM4_SRC_PORT_SET, - GDM4_SPORT_OFF2_MASK | - GDM4_SPORT_OFF1_MASK | - GDM4_SPORT_OFF0_MASK, - FIELD_PREP(GDM4_SPORT_OFF2_MASK, 8) | - FIELD_PREP(GDM4_SPORT_OFF1_MASK, 8) | - FIELD_PREP(GDM4_SPORT_OFF0_MASK, 8)); + airoha_fe_rmw(eth, REG_GDM_SRC_PORT_SET(4), + GDM_SPORT_OFF2_MASK | + GDM_SPORT_OFF1_MASK | + GDM_SPORT_OFF0_MASK, + FIELD_PREP(GDM_SPORT_OFF2_MASK, 8) | + FIELD_PREP(GDM_SPORT_OFF1_MASK, 8) | + FIELD_PREP(GDM_SPORT_OFF0_MASK, 8)); /* set PSE Page as 128B */ airoha_fe_rmw(eth, REG_FE_DMA_GLO_CFG, @@ -499,8 +499,8 @@ static int airoha_fe_init(struct airoha_eth *eth) airoha_fe_set(eth, REG_GDM_MISC_CFG, GDM2_RDM_ACK_WAIT_PREF_MASK | GDM2_CHN_VLD_MODE_MASK); - airoha_fe_rmw(eth, REG_CDM2_FWD_CFG, CDM2_OAM_QSEL_MASK, - FIELD_PREP(CDM2_OAM_QSEL_MASK, 15)); + airoha_fe_rmw(eth, REG_CDM_FWD_CFG(2), CDM_OAM_QSEL_MASK, + FIELD_PREP(CDM_OAM_QSEL_MASK, 15)); /* init fragment and assemble Force Port */ /* NPU Core-3, NPU Bridge Channel-3 */ @@ -514,8 +514,8 @@ static int airoha_fe_init(struct airoha_eth *eth) FIELD_PREP(IP_ASSEMBLE_PORT_MASK, 0) | FIELD_PREP(IP_ASSEMBLE_NBQ_MASK, 22)); - airoha_fe_set(eth, REG_GDM3_FWD_CFG, GDM3_PAD_EN_MASK); - airoha_fe_set(eth, REG_GDM4_FWD_CFG, GDM4_PAD_EN_MASK); + airoha_fe_set(eth, REG_GDM_FWD_CFG(3), GDM_PAD_EN_MASK); + airoha_fe_set(eth, REG_GDM_FWD_CFG(4), GDM_PAD_EN_MASK); airoha_fe_crsn_qsel_init(eth); @@ -523,7 +523,7 @@ static int airoha_fe_init(struct airoha_eth *eth) airoha_fe_set(eth, REG_FE_CPORT_CFG, FE_CPORT_PORT_XFC_MASK); /* default aging mode for mbi unlock issue */ - airoha_fe_rmw(eth, REG_GDM2_CHN_RLS, + airoha_fe_rmw(eth, REG_GDM_CHN_RLS(2), MBI_RX_AGE_SEL_MASK | MBI_TX_AGE_SEL_MASK, FIELD_PREP(MBI_RX_AGE_SEL_MASK, 3) | FIELD_PREP(MBI_TX_AGE_SEL_MASK, 3)); @@ -1692,7 +1692,7 @@ static int airhoha_set_gdm2_loopback(struct airoha_gdm_port *port) pse_port = port->id == AIROHA_GDM3_IDX ? FE_PSE_PORT_GDM3 : FE_PSE_PORT_GDM4; airoha_set_gdm_port_fwd_cfg(eth, REG_GDM_FWD_CFG(2), pse_port); - airoha_fe_clear(eth, REG_GDM_FWD_CFG(2), GDM_STRIP_CRC); + airoha_fe_clear(eth, REG_GDM_FWD_CFG(2), GDM_STRIP_CRC_MASK); /* Enable GDM2 loopback */ airoha_fe_wr(eth, REG_GDM_TXCHN_EN(2), 0xffffffff); diff --git a/drivers/net/ethernet/airoha/airoha_regs.h b/drivers/net/ethernet/airoha/airoha_regs.h index ebcce00d9bc6f..ed4e3407f4a0e 100644 --- a/drivers/net/ethernet/airoha/airoha_regs.h +++ b/drivers/net/ethernet/airoha/airoha_regs.h @@ -23,6 +23,8 @@ #define GDM3_BASE 0x1100 #define GDM4_BASE 0x2500 +#define CDM_BASE(_n) \ + ((_n) == 2 ? CDM2_BASE : CDM1_BASE) #define GDM_BASE(_n) \ ((_n) == 4 ? GDM4_BASE : \ (_n) == 3 ? GDM3_BASE : \ @@ -109,30 +111,24 @@ #define PATN_DP_MASK GENMASK(31, 16) #define PATN_SP_MASK GENMASK(15, 0) -#define REG_CDM1_VLAN_CTRL CDM1_BASE -#define CDM1_VLAN_MASK GENMASK(31, 16) +#define REG_CDM_VLAN_CTRL(_n) CDM_BASE(_n) +#define CDM_VLAN_MASK GENMASK(31, 16) -#define REG_CDM1_FWD_CFG (CDM1_BASE + 0x08) -#define CDM1_VIP_QSEL_MASK GENMASK(24, 20) +#define REG_CDM_FWD_CFG(_n) (CDM_BASE(_n) + 0x08) +#define CDM_OAM_QSEL_MASK GENMASK(31, 27) +#define CDM_VIP_QSEL_MASK GENMASK(24, 20) -#define REG_CDM1_CRSN_QSEL(_n) (CDM1_BASE + 0x10 + ((_n) << 2)) -#define CDM1_CRSN_QSEL_REASON_MASK(_n) \ - GENMASK(4 + (((_n) % 4) << 3), (((_n) % 4) << 3)) - -#define REG_CDM2_FWD_CFG (CDM2_BASE + 0x08) -#define CDM2_OAM_QSEL_MASK GENMASK(31, 27) -#define CDM2_VIP_QSEL_MASK GENMASK(24, 20) - -#define REG_CDM2_CRSN_QSEL(_n) (CDM2_BASE + 0x10 + ((_n) << 2)) -#define CDM2_CRSN_QSEL_REASON_MASK(_n) \ +#define REG_CDM_CRSN_QSEL(_n, _m) (CDM_BASE(_n) + 0x10 + ((_m) << 2)) +#define CDM_CRSN_QSEL_REASON_MASK(_n) \ GENMASK(4 + (((_n) % 4) << 3), (((_n) % 4) << 3)) #define REG_GDM_FWD_CFG(_n) GDM_BASE(_n) -#define GDM_DROP_CRC_ERR BIT(23) -#define GDM_IP4_CKSUM BIT(22) -#define GDM_TCP_CKSUM BIT(21) -#define GDM_UDP_CKSUM BIT(20) -#define GDM_STRIP_CRC BIT(16) +#define GDM_PAD_EN_MASK BIT(28) +#define GDM_DROP_CRC_ERR_MASK BIT(23) +#define GDM_IP4_CKSUM_MASK BIT(22) +#define GDM_TCP_CKSUM_MASK BIT(21) +#define GDM_UDP_CKSUM_MASK BIT(20) +#define GDM_STRIP_CRC_MASK BIT(16) #define GDM_UCFQ_MASK GENMASK(15, 12) #define GDM_BCFQ_MASK GENMASK(11, 8) #define GDM_MCFQ_MASK GENMASK(7, 4) @@ -156,6 +152,10 @@ #define LBK_CHAN_MODE_MASK BIT(1) #define LPBK_EN_MASK BIT(0) +#define REG_GDM_CHN_RLS(_n) (GDM_BASE(_n) + 0x20) +#define MBI_RX_AGE_SEL_MASK GENMASK(26, 25) +#define MBI_TX_AGE_SEL_MASK GENMASK(18, 17) + #define REG_GDM_TXCHN_EN(_n) (GDM_BASE(_n) + 0x24) #define REG_GDM_RXCHN_EN(_n) (GDM_BASE(_n) + 0x28) @@ -168,10 +168,10 @@ #define FE_GDM_MIB_RX_CLEAR_MASK BIT(1) #define FE_GDM_MIB_TX_CLEAR_MASK BIT(0) -#define REG_FE_GDM1_MIB_CFG (GDM1_BASE + 0xf4) +#define REG_FE_GDM_MIB_CFG(_n) (GDM_BASE(_n) + 0xf4) #define FE_STRICT_RFC2819_MODE_MASK BIT(31) -#define FE_GDM1_TX_MIB_SPLIT_EN_MASK BIT(17) -#define FE_GDM1_RX_MIB_SPLIT_EN_MASK BIT(16) +#define FE_GDM_TX_MIB_SPLIT_EN_MASK BIT(17) +#define FE_GDM_RX_MIB_SPLIT_EN_MASK BIT(16) #define FE_TX_MIB_ID_MASK GENMASK(15, 8) #define FE_RX_MIB_ID_MASK GENMASK(7, 0) @@ -214,6 +214,33 @@ #define REG_FE_GDM_RX_ETH_L511_CNT_L(_n) (GDM_BASE(_n) + 0x198) #define REG_FE_GDM_RX_ETH_L1023_CNT_L(_n) (GDM_BASE(_n) + 0x19c) +#define REG_GDM_SRC_PORT_SET(_n) (GDM_BASE(_n) + 0x23c) +#define GDM_SPORT_OFF2_MASK GENMASK(19, 16) +#define GDM_SPORT_OFF1_MASK GENMASK(15, 12) +#define GDM_SPORT_OFF0_MASK GENMASK(11, 8) + +#define REG_FE_GDM_TX_OK_PKT_CNT_H(_n) (GDM_BASE(_n) + 0x280) +#define REG_FE_GDM_TX_OK_BYTE_CNT_H(_n) (GDM_BASE(_n) + 0x284) +#define REG_FE_GDM_TX_ETH_PKT_CNT_H(_n) (GDM_BASE(_n) + 0x288) +#define REG_FE_GDM_TX_ETH_BYTE_CNT_H(_n) (GDM_BASE(_n) + 0x28c) + +#define REG_FE_GDM_RX_OK_PKT_CNT_H(_n) (GDM_BASE(_n) + 0x290) +#define REG_FE_GDM_RX_OK_BYTE_CNT_H(_n) (GDM_BASE(_n) + 0x294) +#define REG_FE_GDM_RX_ETH_PKT_CNT_H(_n) (GDM_BASE(_n) + 0x298) +#define REG_FE_GDM_RX_ETH_BYTE_CNT_H(_n) (GDM_BASE(_n) + 0x29c) +#define REG_FE_GDM_TX_ETH_E64_CNT_H(_n) (GDM_BASE(_n) + 0x2b8) +#define REG_FE_GDM_TX_ETH_L64_CNT_H(_n) (GDM_BASE(_n) + 0x2bc) +#define REG_FE_GDM_TX_ETH_L127_CNT_H(_n) (GDM_BASE(_n) + 0x2c0) +#define REG_FE_GDM_TX_ETH_L255_CNT_H(_n) (GDM_BASE(_n) + 0x2c4) +#define REG_FE_GDM_TX_ETH_L511_CNT_H(_n) (GDM_BASE(_n) + 0x2c8) +#define REG_FE_GDM_TX_ETH_L1023_CNT_H(_n) (GDM_BASE(_n) + 0x2cc) +#define REG_FE_GDM_RX_ETH_E64_CNT_H(_n) (GDM_BASE(_n) + 0x2e8) +#define REG_FE_GDM_RX_ETH_L64_CNT_H(_n) (GDM_BASE(_n) + 0x2ec) +#define REG_FE_GDM_RX_ETH_L127_CNT_H(_n) (GDM_BASE(_n) + 0x2f0) +#define REG_FE_GDM_RX_ETH_L255_CNT_H(_n) (GDM_BASE(_n) + 0x2f4) +#define REG_FE_GDM_RX_ETH_L511_CNT_H(_n) (GDM_BASE(_n) + 0x2f8) +#define REG_FE_GDM_RX_ETH_L1023_CNT_H(_n) (GDM_BASE(_n) + 0x2fc) + #define REG_PPE_GLO_CFG(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x200) #define PPE_GLO_CFG_BUSY_MASK BIT(31) #define PPE_GLO_CFG_FLOW_DROP_UPDATE_MASK BIT(9) @@ -326,44 +353,6 @@ #define REG_UPDMEM_DATA(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x374) -#define REG_FE_GDM_TX_OK_PKT_CNT_H(_n) (GDM_BASE(_n) + 0x280) -#define REG_FE_GDM_TX_OK_BYTE_CNT_H(_n) (GDM_BASE(_n) + 0x284) -#define REG_FE_GDM_TX_ETH_PKT_CNT_H(_n) (GDM_BASE(_n) + 0x288) -#define REG_FE_GDM_TX_ETH_BYTE_CNT_H(_n) (GDM_BASE(_n) + 0x28c) - -#define REG_FE_GDM_RX_OK_PKT_CNT_H(_n) (GDM_BASE(_n) + 0x290) -#define REG_FE_GDM_RX_OK_BYTE_CNT_H(_n) (GDM_BASE(_n) + 0x294) -#define REG_FE_GDM_RX_ETH_PKT_CNT_H(_n) (GDM_BASE(_n) + 0x298) -#define REG_FE_GDM_RX_ETH_BYTE_CNT_H(_n) (GDM_BASE(_n) + 0x29c) -#define REG_FE_GDM_TX_ETH_E64_CNT_H(_n) (GDM_BASE(_n) + 0x2b8) -#define REG_FE_GDM_TX_ETH_L64_CNT_H(_n) (GDM_BASE(_n) + 0x2bc) -#define REG_FE_GDM_TX_ETH_L127_CNT_H(_n) (GDM_BASE(_n) + 0x2c0) -#define REG_FE_GDM_TX_ETH_L255_CNT_H(_n) (GDM_BASE(_n) + 0x2c4) -#define REG_FE_GDM_TX_ETH_L511_CNT_H(_n) (GDM_BASE(_n) + 0x2c8) -#define REG_FE_GDM_TX_ETH_L1023_CNT_H(_n) (GDM_BASE(_n) + 0x2cc) -#define REG_FE_GDM_RX_ETH_E64_CNT_H(_n) (GDM_BASE(_n) + 0x2e8) -#define REG_FE_GDM_RX_ETH_L64_CNT_H(_n) (GDM_BASE(_n) + 0x2ec) -#define REG_FE_GDM_RX_ETH_L127_CNT_H(_n) (GDM_BASE(_n) + 0x2f0) -#define REG_FE_GDM_RX_ETH_L255_CNT_H(_n) (GDM_BASE(_n) + 0x2f4) -#define REG_FE_GDM_RX_ETH_L511_CNT_H(_n) (GDM_BASE(_n) + 0x2f8) -#define REG_FE_GDM_RX_ETH_L1023_CNT_H(_n) (GDM_BASE(_n) + 0x2fc) - -#define REG_GDM2_CHN_RLS (GDM2_BASE + 0x20) -#define MBI_RX_AGE_SEL_MASK GENMASK(26, 25) -#define MBI_TX_AGE_SEL_MASK GENMASK(18, 17) - -#define REG_GDM3_FWD_CFG GDM3_BASE -#define GDM3_PAD_EN_MASK BIT(28) - -#define REG_GDM4_FWD_CFG GDM4_BASE -#define GDM4_PAD_EN_MASK BIT(28) -#define GDM4_SPORT_OFFSET0_MASK GENMASK(11, 8) - -#define REG_GDM4_SRC_PORT_SET (GDM4_BASE + 0x23c) -#define GDM4_SPORT_OFF2_MASK GENMASK(19, 16) -#define GDM4_SPORT_OFF1_MASK GENMASK(15, 12) -#define GDM4_SPORT_OFF0_MASK GENMASK(11, 8) - #define REG_IP_FRAG_FP 0x2010 #define IP_ASSEMBLE_PORT_MASK GENMASK(24, 21) #define IP_ASSEMBLE_NBQ_MASK GENMASK(20, 16) From ec538867a376c10161decc24318ab25a503622c7 Mon Sep 17 00:00:00 2001 From: Sunday Adelodun Date: Tue, 21 Oct 2025 20:59:06 +0100 Subject: [PATCH 204/867] net: unix: remove outdated BSD behavior comment in unix_release_sock() Remove the long-standing comment in unix_release_sock() that described a behavioral difference between Linux and BSD regarding when ECONNRESET is sent to connected UNIX sockets upon closure. As confirmed by testing on macOS (similar to BSD behavior), ECONNRESET is only observed for SOCK_DGRAM sockets, not for SOCK_STREAM. Meanwhile, Linux already returns ECONNRESET in cases where a socket is closed with unread data or is not yet accept()ed. This means the previous comment no longer accurately describes current behavior and is misleading. Suggested-by: Kuniyuki Iwashima Signed-off-by: Sunday Adelodun Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20251021195906.20389-1-adelodunolaoluwa@yahoo.com Signed-off-by: Jakub Kicinski --- net/unix/af_unix.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 768098dec2310..54177caa9c12c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -733,17 +733,6 @@ static void unix_release_sock(struct sock *sk, int embrion) /* ---- Socket is dead now and most probably destroyed ---- */ - /* - * Fixme: BSD difference: In BSD all sockets connected to us get - * ECONNRESET and we die on the spot. In Linux we behave - * like files and pipes do and wait for the last - * dereference. - * - * Can't we simply set sock->err? - * - * What the above comment does talk about? --ANK(980817) - */ - if (READ_ONCE(unix_tot_inflight)) unix_gc(); /* Garbage collect fds */ } From 9ff86092655f7a22cc115188e3bf1b88ef49ec9e Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Wed, 22 Oct 2025 09:54:57 +0700 Subject: [PATCH 205/867] net: rmnet: Use section heading markup for packet format subsections Format subsections of "Packet format" section as reST subsections. Link: https://lore.kernel.org/linux-doc/aO_MefPIlQQrCU3j@horms.kernel.org/ Suggested-by: Simon Horman Signed-off-by: Bagas Sanjaya Reviewed-by: Randy Dunlap Tested-by: Randy Dunlap Link: https://patch.msgid.link/20251022025456.19004-2-bagasdotme@gmail.com Signed-off-by: Jakub Kicinski --- .../device_drivers/cellular/qualcomm/rmnet.rst | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst b/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst index 6877a32605820..5aedbabb73827 100644 --- a/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst +++ b/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst @@ -28,6 +28,7 @@ these MAP frames and send them to appropriate PDN's. ================ a. MAP packet v1 (data / control) +--------------------------------- MAP header fields are in big endian format. @@ -54,6 +55,7 @@ Payload length includes the padding length but does not include MAP header length. b. Map packet v4 (data / control) +--------------------------------- MAP header fields are in big endian format. @@ -107,6 +109,7 @@ over which checksum is computed. Checksum value, indicates the checksum computed. c. MAP packet v5 (data / control) +--------------------------------- MAP header fields are in big endian format. @@ -134,6 +137,7 @@ Payload length includes the padding length but does not include MAP header length. d. Checksum offload header v5 +----------------------------- Checksum offload header fields are in big endian format. @@ -158,7 +162,10 @@ indicates that the calculated packet checksum is invalid. Reserved bits must be zero when sent and ignored when received. -e. MAP packet v1/v5 (command specific):: +e. MAP packet v1/v5 (command specific) +-------------------------------------- + +Packet format:: Bit 0 1 2-7 8 - 15 16 - 31 Function Command Reserved Pad Multiplexer ID Payload length @@ -181,6 +188,7 @@ Command types = ========================================== f. Aggregation +-------------- Aggregation is multiple MAP packets (can be data or command) delivered to rmnet in a single linear skb. rmnet will process the individual From 05774d7e4201b673b415d65a1344dc396e7c00e1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 22 Oct 2025 15:12:09 -0700 Subject: [PATCH 206/867] tcp: Remove unnecessary null check in tcp_inbound_md5_hash() The 'if (!key && hash_location)' check in tcp_inbound_md5_hash() implies that hash_location might be null. However, later code in the function dereferences hash_location anyway, without checking for null first. Fortunately, there is no real bug, since tcp_inbound_md5_hash() is called only with non-null values of hash_location. Therefore, remove the unnecessary and misleading null check of hash_location. This silences a Smatch static checker warning (https://lore.kernel.org/netdev/aPi4b6aWBbBR52P1@stanley.mountain/) Also fix the related comment at the beginning of the function. Signed-off-by: Eric Biggers Reviewed-by: Kuniyuki Iwashima Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://patch.msgid.link/20251022221209.19716-1-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e15b38f6bd2d5..b79da6d393927 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4886,18 +4886,16 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, int family, int l3index, const __u8 *hash_location) { /* This gets called for each TCP segment that has TCP-MD5 option. - * We have 3 drop cases: - * o No MD5 hash and one expected. - * o MD5 hash and we're not expecting one. - * o MD5 hash and its wrong. + * We have 2 drop cases: + * o An MD5 signature is present, but we're not expecting one. + * o The MD5 signature is wrong. */ const struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; u8 newhash[16]; key = tcp_md5_do_lookup(sk, l3index, saddr, family); - - if (!key && hash_location) { + if (!key) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); trace_tcp_hash_md5_unexpected(sk, skb); return SKB_DROP_REASON_TCP_MD5UNEXPECTED; From 41b66240e90b0dceb488ce5a4a285a97e6ca63c7 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 21 Oct 2025 12:16:30 +0100 Subject: [PATCH 207/867] net: dsa: lantiq_gswip: clarify GSWIP 2.2 VLAN mode in comment The comment above writing the default PVID incorrectly states that "GSWIP 2.2 (GRX300) and later program here the VID directly." The truth is that even GSWIP 2.2 and newer maintain the behavior of GSWIP 2.1 unless the VLANMD bit in PCE Global Control Register 1 is set ("GSWIP2.2 VLAN Mode"). Fix the misleading comment accordingly. Signed-off-by: Daniel Golle Acked-by; Hauke Mehrtens : Acked-by; Hauke Mehrtens : Link: https://patch.msgid.link/018056a575503d9797f3222f71a988e825316be0.1761045000.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/lantiq_gswip.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c index 25f6b46957a01..86b410a40d32c 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip.c @@ -588,7 +588,11 @@ static void gswip_port_commit_pvid(struct gswip_priv *priv, int port) FIELD_PREP(GSWIP_PCE_VCTRL_VINR, vinr), GSWIP_PCE_VCTRL(port)); - /* GSWIP 2.2 (GRX300) and later program here the VID directly. */ + /* Note that in GSWIP 2.2 VLAN mode the VID needs to be programmed + * directly instead of referencing the index in the Active VLAN Tablet. + * However, without the VLANMD bit (9) in PCE_GCTRL_1 (0x457) even + * GSWIP 2.2 and newer hardware maintain the GSWIP 2.1 behavior. + */ gswip_switch_w(priv, idx, GSWIP_PCE_DEFPVID(port)); } From 70535979738937d8397e76f850bd00e97115d02d Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 21 Oct 2025 12:16:37 +0100 Subject: [PATCH 208/867] net: dsa: lantiq_gswip: convert accessors to use regmap Use regmap for register access in preparation for supporting the MaxLinear GSW1xx family of switches connected via MDIO or SPI. Rewrite the existing accessor read-poll-timeout functions to use calls to the regmap API for now. Signed-off-by: Daniel Golle Reviewed-by: Maxime Chevallier Acked-by; Hauke Mehrtens : Acked-by; Hauke Mehrtens : Link: https://patch.msgid.link/535d968bc6319a74bdf76166ef19364ee659285f.1761045000.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/Kconfig | 1 + drivers/net/dsa/lantiq/lantiq_gswip.c | 107 +++++++++++++++----------- drivers/net/dsa/lantiq/lantiq_gswip.h | 6 +- 3 files changed, 67 insertions(+), 47 deletions(-) diff --git a/drivers/net/dsa/lantiq/Kconfig b/drivers/net/dsa/lantiq/Kconfig index 1cb053c823f7f..3cfa16840cf59 100644 --- a/drivers/net/dsa/lantiq/Kconfig +++ b/drivers/net/dsa/lantiq/Kconfig @@ -2,6 +2,7 @@ config NET_DSA_LANTIQ_GSWIP tristate "Lantiq / Intel GSWIP" depends on HAS_IOMEM select NET_DSA_TAG_GSWIP + select REGMAP help This enables support for the Lantiq / Intel GSWIP 2.1 found in the xrx200 / VR9 SoC. diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c index 86b410a40d32c..8a448e1f5eef4 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip.c @@ -113,22 +113,22 @@ static const struct gswip_rmon_cnt_desc gswip_rmon_cnt[] = { static u32 gswip_switch_r(struct gswip_priv *priv, u32 offset) { - return __raw_readl(priv->gswip + (offset * 4)); + u32 val; + + regmap_read(priv->gswip, offset, &val); + + return val; } static void gswip_switch_w(struct gswip_priv *priv, u32 val, u32 offset) { - __raw_writel(val, priv->gswip + (offset * 4)); + regmap_write(priv->gswip, offset, val); } static void gswip_switch_mask(struct gswip_priv *priv, u32 clear, u32 set, u32 offset) { - u32 val = gswip_switch_r(priv, offset); - - val &= ~(clear); - val |= set; - gswip_switch_w(priv, val, offset); + regmap_write_bits(priv->gswip, offset, clear | set, set); } static u32 gswip_switch_r_timeout(struct gswip_priv *priv, u32 offset, @@ -136,48 +136,34 @@ static u32 gswip_switch_r_timeout(struct gswip_priv *priv, u32 offset, { u32 val; - return readx_poll_timeout(__raw_readl, priv->gswip + (offset * 4), val, - (val & cleared) == 0, 20, 50000); + return regmap_read_poll_timeout(priv->gswip, offset, val, + !(val & cleared), 20, 50000); } static u32 gswip_mdio_r(struct gswip_priv *priv, u32 offset) { - return __raw_readl(priv->mdio + (offset * 4)); + u32 val; + + regmap_read(priv->mdio, offset, &val); + + return val; } static void gswip_mdio_w(struct gswip_priv *priv, u32 val, u32 offset) { - __raw_writel(val, priv->mdio + (offset * 4)); + regmap_write(priv->mdio, offset, val); } static void gswip_mdio_mask(struct gswip_priv *priv, u32 clear, u32 set, u32 offset) { - u32 val = gswip_mdio_r(priv, offset); - - val &= ~(clear); - val |= set; - gswip_mdio_w(priv, val, offset); -} - -static u32 gswip_mii_r(struct gswip_priv *priv, u32 offset) -{ - return __raw_readl(priv->mii + (offset * 4)); -} - -static void gswip_mii_w(struct gswip_priv *priv, u32 val, u32 offset) -{ - __raw_writel(val, priv->mii + (offset * 4)); + regmap_write_bits(priv->mdio, offset, clear | set, set); } static void gswip_mii_mask(struct gswip_priv *priv, u32 clear, u32 set, u32 offset) { - u32 val = gswip_mii_r(priv, offset); - - val &= ~(clear); - val |= set; - gswip_mii_w(priv, val, offset); + regmap_write_bits(priv->mii, offset, clear | set, set); } static void gswip_mii_mask_cfg(struct gswip_priv *priv, u32 clear, u32 set, @@ -220,17 +206,10 @@ static void gswip_mii_mask_pcdu(struct gswip_priv *priv, u32 clear, u32 set, static int gswip_mdio_poll(struct gswip_priv *priv) { - int cnt = 100; - - while (likely(cnt--)) { - u32 ctrl = gswip_mdio_r(priv, GSWIP_MDIO_CTRL); - - if ((ctrl & GSWIP_MDIO_CTRL_BUSY) == 0) - return 0; - usleep_range(20, 40); - } + u32 ctrl; - return -ETIMEDOUT; + return regmap_read_poll_timeout(priv->mdio, GSWIP_MDIO_CTRL, ctrl, + !(ctrl & GSWIP_MDIO_CTRL_BUSY), 40, 4000); } static int gswip_mdio_wr(struct mii_bus *bus, int addr, int reg, u16 val) @@ -1893,9 +1872,37 @@ static int gswip_validate_cpu_port(struct dsa_switch *ds) return 0; } +static const struct regmap_config sw_regmap_config = { + .name = "switch", + .reg_bits = 32, + .val_bits = 32, + .reg_shift = REGMAP_UPSHIFT(2), + .val_format_endian = REGMAP_ENDIAN_NATIVE, + .max_register = GSWIP_SDMA_PCTRLp(6), +}; + +static const struct regmap_config mdio_regmap_config = { + .name = "mdio", + .reg_bits = 32, + .val_bits = 32, + .reg_shift = REGMAP_UPSHIFT(2), + .val_format_endian = REGMAP_ENDIAN_NATIVE, + .max_register = GSWIP_MDIO_PHYp(0), +}; + +static const struct regmap_config mii_regmap_config = { + .name = "mii", + .reg_bits = 32, + .val_bits = 32, + .reg_shift = REGMAP_UPSHIFT(2), + .val_format_endian = REGMAP_ENDIAN_NATIVE, + .max_register = GSWIP_MII_CFGp(6), +}; + static int gswip_probe(struct platform_device *pdev) { struct device_node *np, *gphy_fw_np; + __iomem void *gswip, *mdio, *mii; struct device *dev = &pdev->dev; struct gswip_priv *priv; int err; @@ -1906,15 +1913,27 @@ static int gswip_probe(struct platform_device *pdev) if (!priv) return -ENOMEM; - priv->gswip = devm_platform_ioremap_resource(pdev, 0); + gswip = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(gswip)) + return PTR_ERR(gswip); + + mdio = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(mdio)) + return PTR_ERR(mdio); + + mii = devm_platform_ioremap_resource(pdev, 2); + if (IS_ERR(mii)) + return PTR_ERR(mii); + + priv->gswip = devm_regmap_init_mmio(dev, gswip, &sw_regmap_config); if (IS_ERR(priv->gswip)) return PTR_ERR(priv->gswip); - priv->mdio = devm_platform_ioremap_resource(pdev, 1); + priv->mdio = devm_regmap_init_mmio(dev, mdio, &mdio_regmap_config); if (IS_ERR(priv->mdio)) return PTR_ERR(priv->mdio); - priv->mii = devm_platform_ioremap_resource(pdev, 2); + priv->mii = devm_regmap_init_mmio(dev, mii, &mii_regmap_config); if (IS_ERR(priv->mii)) return PTR_ERR(priv->mii); diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.h b/drivers/net/dsa/lantiq/lantiq_gswip.h index 69c8d2deff2d4..24d759e06e153 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.h +++ b/drivers/net/dsa/lantiq/lantiq_gswip.h @@ -263,9 +263,9 @@ struct gswip_vlan { }; struct gswip_priv { - __iomem void *gswip; - __iomem void *mdio; - __iomem void *mii; + struct regmap *gswip; + struct regmap *mdio; + struct regmap *mii; const struct gswip_hw_info *hw_info; const struct xway_gphy_match_data *gphy_fw_name_cfg; struct dsa_switch *ds; From 128f5cf40fa5146267e2acfa29ad0bceaef763a3 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 21 Oct 2025 12:16:50 +0100 Subject: [PATCH 209/867] net: dsa: lantiq_gswip: convert trivial accessor uses to regmap Use coccinelle semantic patch to convert all trivial uses of the register accessor functions to use the regmap API directly. // Replace gswip_switch_w with regmap_write @@ expression priv, val, offset; @@ - gswip_switch_w(priv, val, offset) + regmap_write(priv->gswip, offset, val) // Replace gswip_mdio_w with regmap_write @@ expression priv, val, offset; @@ - gswip_mdio_w(priv, val, offset) + regmap_write(priv->mdio, offset, val) // Replace gswip_switch_r in simple assignment - only for u32 @@ expression priv, offset; u32 var; @@ - var = gswip_switch_r(priv, offset) + regmap_read(priv->gswip, offset, &var) // Replace gswip_switch_mask with regmap_set_bits when clear is 0 @@ expression priv, set, offset; @@ - gswip_switch_mask(priv, 0, set, offset) + regmap_set_bits(priv->gswip, offset, set) // Replace gswip_mdio_mask with regmap_set_bits when clear is 0 @@ expression priv, set, offset; @@ - gswip_mdio_mask(priv, 0, set, offset) + regmap_set_bits(priv->mdio, offset, set) // Replace gswip_switch_mask with regmap_clear_bits when set is 0 @@ expression priv, clear, offset; @@ - gswip_switch_mask(priv, clear, 0, offset) + regmap_clear_bits(priv->gswip, offset, clear) // Replace gswip_mdio_mask with regmap_clear_bits when set is 0 @@ expression priv, clear, offset; @@ - gswip_mdio_mask(priv, clear, 0, offset) + regmap_clear_bits(priv->mdio, offset, clear) Remove gswip_switch_w() and gswip_mdio_w() functions as they now no longer have any users. Signed-off-by: Daniel Golle Acked-by; Hauke Mehrtens : Acked-by; Hauke Mehrtens : Link: https://patch.msgid.link/48a60f386b1bd487c410b1f5fb25ba50ceddc6f7.1761045000.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/lantiq_gswip.c | 160 ++++++++++++-------------- 1 file changed, 76 insertions(+), 84 deletions(-) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c index 8a448e1f5eef4..e58320eaf9da8 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip.c @@ -120,11 +120,6 @@ static u32 gswip_switch_r(struct gswip_priv *priv, u32 offset) return val; } -static void gswip_switch_w(struct gswip_priv *priv, u32 val, u32 offset) -{ - regmap_write(priv->gswip, offset, val); -} - static void gswip_switch_mask(struct gswip_priv *priv, u32 clear, u32 set, u32 offset) { @@ -149,11 +144,6 @@ static u32 gswip_mdio_r(struct gswip_priv *priv, u32 offset) return val; } -static void gswip_mdio_w(struct gswip_priv *priv, u32 val, u32 offset) -{ - regmap_write(priv->mdio, offset, val); -} - static void gswip_mdio_mask(struct gswip_priv *priv, u32 clear, u32 set, u32 offset) { @@ -223,11 +213,11 @@ static int gswip_mdio_wr(struct mii_bus *bus, int addr, int reg, u16 val) return err; } - gswip_mdio_w(priv, val, GSWIP_MDIO_WRITE); - gswip_mdio_w(priv, GSWIP_MDIO_CTRL_BUSY | GSWIP_MDIO_CTRL_WR | - ((addr & GSWIP_MDIO_CTRL_PHYAD_MASK) << GSWIP_MDIO_CTRL_PHYAD_SHIFT) | - (reg & GSWIP_MDIO_CTRL_REGAD_MASK), - GSWIP_MDIO_CTRL); + regmap_write(priv->mdio, GSWIP_MDIO_WRITE, val); + regmap_write(priv->mdio, GSWIP_MDIO_CTRL, + GSWIP_MDIO_CTRL_BUSY | GSWIP_MDIO_CTRL_WR | + ((addr & GSWIP_MDIO_CTRL_PHYAD_MASK) << GSWIP_MDIO_CTRL_PHYAD_SHIFT) | + (reg & GSWIP_MDIO_CTRL_REGAD_MASK)); return 0; } @@ -243,10 +233,10 @@ static int gswip_mdio_rd(struct mii_bus *bus, int addr, int reg) return err; } - gswip_mdio_w(priv, GSWIP_MDIO_CTRL_BUSY | GSWIP_MDIO_CTRL_RD | - ((addr & GSWIP_MDIO_CTRL_PHYAD_MASK) << GSWIP_MDIO_CTRL_PHYAD_SHIFT) | - (reg & GSWIP_MDIO_CTRL_REGAD_MASK), - GSWIP_MDIO_CTRL); + regmap_write(priv->mdio, GSWIP_MDIO_CTRL, + GSWIP_MDIO_CTRL_BUSY | GSWIP_MDIO_CTRL_RD | + ((addr & GSWIP_MDIO_CTRL_PHYAD_MASK) << GSWIP_MDIO_CTRL_PHYAD_SHIFT) | + (reg & GSWIP_MDIO_CTRL_REGAD_MASK)); err = gswip_mdio_poll(priv); if (err) { @@ -310,7 +300,7 @@ static int gswip_pce_table_entry_read(struct gswip_priv *priv, return err; } - gswip_switch_w(priv, tbl->index, GSWIP_PCE_TBL_ADDR); + regmap_write(priv->gswip, GSWIP_PCE_TBL_ADDR, tbl->index); gswip_switch_mask(priv, GSWIP_PCE_TBL_CTRL_ADDR_MASK | GSWIP_PCE_TBL_CTRL_OPMOD_MASK, tbl->table | addr_mode | GSWIP_PCE_TBL_CTRL_BAS, @@ -360,24 +350,24 @@ static int gswip_pce_table_entry_write(struct gswip_priv *priv, return err; } - gswip_switch_w(priv, tbl->index, GSWIP_PCE_TBL_ADDR); + regmap_write(priv->gswip, GSWIP_PCE_TBL_ADDR, tbl->index); gswip_switch_mask(priv, GSWIP_PCE_TBL_CTRL_ADDR_MASK | GSWIP_PCE_TBL_CTRL_OPMOD_MASK, tbl->table | addr_mode, GSWIP_PCE_TBL_CTRL); for (i = 0; i < ARRAY_SIZE(tbl->key); i++) - gswip_switch_w(priv, tbl->key[i], GSWIP_PCE_TBL_KEY(i)); + regmap_write(priv->gswip, GSWIP_PCE_TBL_KEY(i), tbl->key[i]); for (i = 0; i < ARRAY_SIZE(tbl->val); i++) - gswip_switch_w(priv, tbl->val[i], GSWIP_PCE_TBL_VAL(i)); + regmap_write(priv->gswip, GSWIP_PCE_TBL_VAL(i), tbl->val[i]); gswip_switch_mask(priv, GSWIP_PCE_TBL_CTRL_ADDR_MASK | GSWIP_PCE_TBL_CTRL_OPMOD_MASK, tbl->table | addr_mode, GSWIP_PCE_TBL_CTRL); - gswip_switch_w(priv, tbl->mask, GSWIP_PCE_TBL_MASK); + regmap_write(priv->gswip, GSWIP_PCE_TBL_MASK, tbl->mask); crtl = gswip_switch_r(priv, GSWIP_PCE_TBL_CTRL); crtl &= ~(GSWIP_PCE_TBL_CTRL_TYPE | GSWIP_PCE_TBL_CTRL_VLD | @@ -388,7 +378,7 @@ static int gswip_pce_table_entry_write(struct gswip_priv *priv, crtl |= GSWIP_PCE_TBL_CTRL_VLD; crtl |= (tbl->gmap << 7) & GSWIP_PCE_TBL_CTRL_GMAP_MASK; crtl |= GSWIP_PCE_TBL_CTRL_BAS; - gswip_switch_w(priv, crtl, GSWIP_PCE_TBL_CTRL); + regmap_write(priv->gswip, GSWIP_PCE_TBL_CTRL, crtl); err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL, GSWIP_PCE_TBL_CTRL_BAS); @@ -467,14 +457,13 @@ static int gswip_port_enable(struct dsa_switch *ds, int port, } /* RMON Counter Enable for port */ - gswip_switch_w(priv, GSWIP_BM_PCFG_CNTEN, GSWIP_BM_PCFGp(port)); + regmap_write(priv->gswip, GSWIP_BM_PCFGp(port), GSWIP_BM_PCFG_CNTEN); /* enable port fetch/store dma & VLAN Modification */ - gswip_switch_mask(priv, 0, GSWIP_FDMA_PCTRL_EN | - GSWIP_FDMA_PCTRL_VLANMOD_BOTH, - GSWIP_FDMA_PCTRLp(port)); - gswip_switch_mask(priv, 0, GSWIP_SDMA_PCTRL_EN, - GSWIP_SDMA_PCTRLp(port)); + regmap_set_bits(priv->gswip, GSWIP_FDMA_PCTRLp(port), + GSWIP_FDMA_PCTRL_EN | GSWIP_FDMA_PCTRL_VLANMOD_BOTH); + regmap_set_bits(priv->gswip, GSWIP_SDMA_PCTRLp(port), + GSWIP_SDMA_PCTRL_EN); return 0; } @@ -483,10 +472,10 @@ static void gswip_port_disable(struct dsa_switch *ds, int port) { struct gswip_priv *priv = ds->priv; - gswip_switch_mask(priv, GSWIP_FDMA_PCTRL_EN, 0, - GSWIP_FDMA_PCTRLp(port)); - gswip_switch_mask(priv, GSWIP_SDMA_PCTRL_EN, 0, - GSWIP_SDMA_PCTRLp(port)); + regmap_clear_bits(priv->gswip, GSWIP_FDMA_PCTRLp(port), + GSWIP_FDMA_PCTRL_EN); + regmap_clear_bits(priv->gswip, GSWIP_SDMA_PCTRLp(port), + GSWIP_SDMA_PCTRL_EN); } static int gswip_pce_load_microcode(struct gswip_priv *priv) @@ -497,22 +486,22 @@ static int gswip_pce_load_microcode(struct gswip_priv *priv) gswip_switch_mask(priv, GSWIP_PCE_TBL_CTRL_ADDR_MASK | GSWIP_PCE_TBL_CTRL_OPMOD_MASK, GSWIP_PCE_TBL_CTRL_OPMOD_ADWR, GSWIP_PCE_TBL_CTRL); - gswip_switch_w(priv, 0, GSWIP_PCE_TBL_MASK); + regmap_write(priv->gswip, GSWIP_PCE_TBL_MASK, 0); for (i = 0; i < priv->hw_info->pce_microcode_size; i++) { - gswip_switch_w(priv, i, GSWIP_PCE_TBL_ADDR); - gswip_switch_w(priv, (*priv->hw_info->pce_microcode)[i].val_0, - GSWIP_PCE_TBL_VAL(0)); - gswip_switch_w(priv, (*priv->hw_info->pce_microcode)[i].val_1, - GSWIP_PCE_TBL_VAL(1)); - gswip_switch_w(priv, (*priv->hw_info->pce_microcode)[i].val_2, - GSWIP_PCE_TBL_VAL(2)); - gswip_switch_w(priv, (*priv->hw_info->pce_microcode)[i].val_3, - GSWIP_PCE_TBL_VAL(3)); + regmap_write(priv->gswip, GSWIP_PCE_TBL_ADDR, i); + regmap_write(priv->gswip, GSWIP_PCE_TBL_VAL(0), + (*priv->hw_info->pce_microcode)[i].val_0); + regmap_write(priv->gswip, GSWIP_PCE_TBL_VAL(1), + (*priv->hw_info->pce_microcode)[i].val_1); + regmap_write(priv->gswip, GSWIP_PCE_TBL_VAL(2), + (*priv->hw_info->pce_microcode)[i].val_2); + regmap_write(priv->gswip, GSWIP_PCE_TBL_VAL(3), + (*priv->hw_info->pce_microcode)[i].val_3); /* start the table access: */ - gswip_switch_mask(priv, 0, GSWIP_PCE_TBL_CTRL_BAS, - GSWIP_PCE_TBL_CTRL); + regmap_set_bits(priv->gswip, GSWIP_PCE_TBL_CTRL, + GSWIP_PCE_TBL_CTRL_BAS); err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL, GSWIP_PCE_TBL_CTRL_BAS); if (err) @@ -520,8 +509,8 @@ static int gswip_pce_load_microcode(struct gswip_priv *priv) } /* tell the switch that the microcode is loaded */ - gswip_switch_mask(priv, 0, GSWIP_PCE_GCTRL_0_MC_VALID, - GSWIP_PCE_GCTRL_0); + regmap_set_bits(priv->gswip, GSWIP_PCE_GCTRL_0, + GSWIP_PCE_GCTRL_0_MC_VALID); return 0; } @@ -572,7 +561,7 @@ static void gswip_port_commit_pvid(struct gswip_priv *priv, int port) * However, without the VLANMD bit (9) in PCE_GCTRL_1 (0x457) even * GSWIP 2.2 and newer hardware maintain the GSWIP 2.1 behavior. */ - gswip_switch_w(priv, idx, GSWIP_PCE_DEFPVID(port)); + regmap_write(priv->gswip, GSWIP_PCE_DEFPVID(port), idx); } static int gswip_port_vlan_filtering(struct dsa_switch *ds, int port, @@ -588,8 +577,8 @@ static int gswip_port_vlan_filtering(struct dsa_switch *ds, int port, GSWIP_PCE_VCTRL_UVR | GSWIP_PCE_VCTRL_VIMR | GSWIP_PCE_VCTRL_VEMR | GSWIP_PCE_VCTRL_VID0, GSWIP_PCE_VCTRL(port)); - gswip_switch_mask(priv, GSWIP_PCE_PCTRL_0_TVM, 0, - GSWIP_PCE_PCTRL_0p(port)); + regmap_clear_bits(priv->gswip, GSWIP_PCE_PCTRL_0p(port), + GSWIP_PCE_PCTRL_0_TVM); } else { /* Use port based VLAN */ gswip_switch_mask(priv, @@ -597,8 +586,8 @@ static int gswip_port_vlan_filtering(struct dsa_switch *ds, int port, GSWIP_PCE_VCTRL_VEMR | GSWIP_PCE_VCTRL_VID0, GSWIP_PCE_VCTRL_VSR, GSWIP_PCE_VCTRL(port)); - gswip_switch_mask(priv, 0, GSWIP_PCE_PCTRL_0_TVM, - GSWIP_PCE_PCTRL_0p(port)); + regmap_set_bits(priv->gswip, GSWIP_PCE_PCTRL_0p(port), + GSWIP_PCE_PCTRL_0_TVM); } gswip_port_commit_pvid(priv, port); @@ -613,9 +602,9 @@ static int gswip_setup(struct dsa_switch *ds) struct dsa_port *cpu_dp; int err, i; - gswip_switch_w(priv, GSWIP_SWRES_R0, GSWIP_SWRES); + regmap_write(priv->gswip, GSWIP_SWRES, GSWIP_SWRES_R0); usleep_range(5000, 10000); - gswip_switch_w(priv, 0, GSWIP_SWRES); + regmap_write(priv->gswip, GSWIP_SWRES, 0); /* disable port fetch/store dma on all ports */ for (i = 0; i < priv->hw_info->max_ports; i++) { @@ -624,7 +613,7 @@ static int gswip_setup(struct dsa_switch *ds) } /* enable Switch */ - gswip_mdio_mask(priv, 0, GSWIP_MDIO_GLOB_ENABLE, GSWIP_MDIO_GLOB); + regmap_set_bits(priv->mdio, GSWIP_MDIO_GLOB, GSWIP_MDIO_GLOB_ENABLE); err = gswip_pce_load_microcode(priv); if (err) { @@ -633,9 +622,9 @@ static int gswip_setup(struct dsa_switch *ds) } /* Default unknown Broadcast/Multicast/Unicast port maps */ - gswip_switch_w(priv, cpu_ports, GSWIP_PCE_PMAP1); - gswip_switch_w(priv, cpu_ports, GSWIP_PCE_PMAP2); - gswip_switch_w(priv, cpu_ports, GSWIP_PCE_PMAP3); + regmap_write(priv->gswip, GSWIP_PCE_PMAP1, cpu_ports); + regmap_write(priv->gswip, GSWIP_PCE_PMAP2, cpu_ports); + regmap_write(priv->gswip, GSWIP_PCE_PMAP3, cpu_ports); /* Deactivate MDIO PHY auto polling. Some PHYs as the AR8030 have an * interoperability problem with this auto polling mechanism because @@ -653,7 +642,7 @@ static int gswip_setup(struct dsa_switch *ds) * Testing shows that when PHY auto polling is disabled these problems * go away. */ - gswip_mdio_w(priv, 0x0, GSWIP_MDIO_MDC_CFG0); + regmap_write(priv->mdio, GSWIP_MDIO_MDC_CFG0, 0x0); /* Configure the MDIO Clock 2.5 MHz */ gswip_mdio_mask(priv, 0xff, 0x09, GSWIP_MDIO_MDC_CFG1); @@ -673,22 +662,25 @@ static int gswip_setup(struct dsa_switch *ds) dsa_switch_for_each_cpu_port(cpu_dp, ds) { /* enable special tag insertion on cpu port */ - gswip_switch_mask(priv, 0, GSWIP_FDMA_PCTRL_STEN, - GSWIP_FDMA_PCTRLp(cpu_dp->index)); + regmap_set_bits(priv->gswip, GSWIP_FDMA_PCTRLp(cpu_dp->index), + GSWIP_FDMA_PCTRL_STEN); /* accept special tag in ingress direction */ - gswip_switch_mask(priv, 0, GSWIP_PCE_PCTRL_0_INGRESS, - GSWIP_PCE_PCTRL_0p(cpu_dp->index)); + regmap_set_bits(priv->gswip, + GSWIP_PCE_PCTRL_0p(cpu_dp->index), + GSWIP_PCE_PCTRL_0_INGRESS); } - gswip_switch_mask(priv, 0, GSWIP_BM_QUEUE_GCTRL_GL_MOD, - GSWIP_BM_QUEUE_GCTRL); + regmap_set_bits(priv->gswip, GSWIP_BM_QUEUE_GCTRL, + GSWIP_BM_QUEUE_GCTRL_GL_MOD); /* VLAN aware Switching */ - gswip_switch_mask(priv, 0, GSWIP_PCE_GCTRL_0_VLAN, GSWIP_PCE_GCTRL_0); + regmap_set_bits(priv->gswip, GSWIP_PCE_GCTRL_0, + GSWIP_PCE_GCTRL_0_VLAN); /* Flush MAC Table */ - gswip_switch_mask(priv, 0, GSWIP_PCE_GCTRL_0_MTFL, GSWIP_PCE_GCTRL_0); + regmap_set_bits(priv->gswip, GSWIP_PCE_GCTRL_0, + GSWIP_PCE_GCTRL_0_MTFL); err = gswip_switch_r_timeout(priv, GSWIP_PCE_GCTRL_0, GSWIP_PCE_GCTRL_0_MTFL); @@ -1074,8 +1066,8 @@ static void gswip_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) switch (state) { case BR_STATE_DISABLED: - gswip_switch_mask(priv, GSWIP_SDMA_PCTRL_EN, 0, - GSWIP_SDMA_PCTRLp(port)); + regmap_clear_bits(priv->gswip, GSWIP_SDMA_PCTRLp(port), + GSWIP_SDMA_PCTRL_EN); return; case BR_STATE_BLOCKING: case BR_STATE_LISTENING: @@ -1092,8 +1084,8 @@ static void gswip_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) return; } - gswip_switch_mask(priv, 0, GSWIP_SDMA_PCTRL_EN, - GSWIP_SDMA_PCTRLp(port)); + regmap_set_bits(priv->gswip, GSWIP_SDMA_PCTRLp(port), + GSWIP_SDMA_PCTRL_EN); gswip_switch_mask(priv, GSWIP_PCE_PCTRL_0_PSTATE_MASK, stp_state, GSWIP_PCE_PCTRL_0p(port)); } @@ -1222,19 +1214,19 @@ static int gswip_port_change_mtu(struct dsa_switch *ds, int port, int new_mtu) */ if (dsa_is_cpu_port(ds, port)) { new_mtu += 8; - gswip_switch_w(priv, VLAN_ETH_HLEN + new_mtu + ETH_FCS_LEN, - GSWIP_MAC_FLEN); + regmap_write(priv->gswip, GSWIP_MAC_FLEN, + VLAN_ETH_HLEN + new_mtu + ETH_FCS_LEN); } /* Enable MLEN for ports with non-standard MTUs, including the special * header on the CPU port added above. */ if (new_mtu != ETH_DATA_LEN) - gswip_switch_mask(priv, 0, GSWIP_MAC_CTRL_2_MLEN, - GSWIP_MAC_CTRL_2p(port)); + regmap_set_bits(priv->gswip, GSWIP_MAC_CTRL_2p(port), + GSWIP_MAC_CTRL_2_MLEN); else - gswip_switch_mask(priv, GSWIP_MAC_CTRL_2_MLEN, 0, - GSWIP_MAC_CTRL_2p(port)); + regmap_clear_bits(priv->gswip, GSWIP_MAC_CTRL_2p(port), + GSWIP_MAC_CTRL_2_MLEN); return 0; } @@ -1536,7 +1528,7 @@ static u32 gswip_bcm_ram_entry_read(struct gswip_priv *priv, u32 table, u32 result; int err; - gswip_switch_w(priv, index, GSWIP_BM_RAM_ADDR); + regmap_write(priv->gswip, GSWIP_BM_RAM_ADDR, index); gswip_switch_mask(priv, GSWIP_BM_RAM_CTRL_ADDR_MASK | GSWIP_BM_RAM_CTRL_OPMOD, table | GSWIP_BM_RAM_CTRL_BAS, @@ -1550,7 +1542,7 @@ static u32 gswip_bcm_ram_entry_read(struct gswip_priv *priv, u32 table, return 0; } - result = gswip_switch_r(priv, GSWIP_BM_RAM_VAL(0)); + regmap_read(priv->gswip, GSWIP_BM_RAM_VAL(0), &result); result |= gswip_switch_r(priv, GSWIP_BM_RAM_VAL(1)) << 16; return result; @@ -1952,7 +1944,7 @@ static int gswip_probe(struct platform_device *pdev) priv->ds->phylink_mac_ops = &gswip_phylink_mac_ops; priv->dev = dev; mutex_init(&priv->pce_table_lock); - version = gswip_switch_r(priv, GSWIP_VERSION); + regmap_read(priv->gswip, GSWIP_VERSION, &version); /* The hardware has the 'major/minor' version bytes in the wrong order * preventing numerical comparisons. Construct a 16-bit unsigned integer @@ -2009,7 +2001,7 @@ static int gswip_probe(struct platform_device *pdev) return 0; disable_switch: - gswip_mdio_mask(priv, GSWIP_MDIO_GLOB_ENABLE, 0, GSWIP_MDIO_GLOB); + regmap_clear_bits(priv->mdio, GSWIP_MDIO_GLOB, GSWIP_MDIO_GLOB_ENABLE); dsa_unregister_switch(priv->ds); gphy_fw_remove: for (i = 0; i < priv->num_gphy_fw; i++) @@ -2026,7 +2018,7 @@ static void gswip_remove(struct platform_device *pdev) return; /* disable the switch */ - gswip_mdio_mask(priv, GSWIP_MDIO_GLOB_ENABLE, 0, GSWIP_MDIO_GLOB); + regmap_clear_bits(priv->mdio, GSWIP_MDIO_GLOB, GSWIP_MDIO_GLOB_ENABLE); dsa_unregister_switch(priv->ds); From 4cc06901ef34be259e8af850bfc3c3a17178486a Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 21 Oct 2025 12:16:58 +0100 Subject: [PATCH 210/867] net: dsa: lantiq_gswip: manually convert remaining uses of read accessors Manually convert the remaining uses of the read accessor functions and remove them now that they are unused. Signed-off-by: Daniel Golle Acked-by; Hauke Mehrtens : Acked-by; Hauke Mehrtens : Link: https://patch.msgid.link/0e2a44b83131b40fc1ee558ed1f536c26e1232ba.1761045000.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/lantiq_gswip.c | 78 +++++++++++++-------------- 1 file changed, 38 insertions(+), 40 deletions(-) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c index e58320eaf9da8..46fdc9d9c2c98 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip.c @@ -111,15 +111,6 @@ static const struct gswip_rmon_cnt_desc gswip_rmon_cnt[] = { MIB_DESC(2, 0x0E, "TxGoodBytes"), }; -static u32 gswip_switch_r(struct gswip_priv *priv, u32 offset) -{ - u32 val; - - regmap_read(priv->gswip, offset, &val); - - return val; -} - static void gswip_switch_mask(struct gswip_priv *priv, u32 clear, u32 set, u32 offset) { @@ -135,15 +126,6 @@ static u32 gswip_switch_r_timeout(struct gswip_priv *priv, u32 offset, !(val & cleared), 20, 50000); } -static u32 gswip_mdio_r(struct gswip_priv *priv, u32 offset) -{ - u32 val; - - regmap_read(priv->mdio, offset, &val); - - return val; -} - static void gswip_mdio_mask(struct gswip_priv *priv, u32 clear, u32 set, u32 offset) { @@ -225,6 +207,7 @@ static int gswip_mdio_wr(struct mii_bus *bus, int addr, int reg, u16 val) static int gswip_mdio_rd(struct mii_bus *bus, int addr, int reg) { struct gswip_priv *priv = bus->priv; + u32 val; int err; err = gswip_mdio_poll(priv); @@ -244,7 +227,11 @@ static int gswip_mdio_rd(struct mii_bus *bus, int addr, int reg) return err; } - return gswip_mdio_r(priv, GSWIP_MDIO_READ); + err = regmap_read(priv->mdio, GSWIP_MDIO_READ, &val); + if (err) + return err; + + return val; } static int gswip_mdio(struct gswip_priv *priv) @@ -287,7 +274,8 @@ static int gswip_pce_table_entry_read(struct gswip_priv *priv, { int i; int err; - u16 crtl; + u32 crtl; + u32 tmp; u16 addr_mode = tbl->key_mode ? GSWIP_PCE_TBL_CTRL_OPMOD_KSRD : GSWIP_PCE_TBL_CTRL_OPMOD_ADRD; @@ -295,10 +283,8 @@ static int gswip_pce_table_entry_read(struct gswip_priv *priv, err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL, GSWIP_PCE_TBL_CTRL_BAS); - if (err) { - mutex_unlock(&priv->pce_table_lock); - return err; - } + if (err) + goto out_unlock; regmap_write(priv->gswip, GSWIP_PCE_TBL_ADDR, tbl->index); gswip_switch_mask(priv, GSWIP_PCE_TBL_CTRL_ADDR_MASK | @@ -308,28 +294,39 @@ static int gswip_pce_table_entry_read(struct gswip_priv *priv, err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL, GSWIP_PCE_TBL_CTRL_BAS); - if (err) { - mutex_unlock(&priv->pce_table_lock); - return err; - } - - for (i = 0; i < ARRAY_SIZE(tbl->key); i++) - tbl->key[i] = gswip_switch_r(priv, GSWIP_PCE_TBL_KEY(i)); + if (err) + goto out_unlock; - for (i = 0; i < ARRAY_SIZE(tbl->val); i++) - tbl->val[i] = gswip_switch_r(priv, GSWIP_PCE_TBL_VAL(i)); + for (i = 0; i < ARRAY_SIZE(tbl->key); i++) { + err = regmap_read(priv->gswip, GSWIP_PCE_TBL_KEY(i), &tmp); + if (err) + goto out_unlock; + tbl->key[i] = tmp; + } + for (i = 0; i < ARRAY_SIZE(tbl->val); i++) { + err = regmap_read(priv->gswip, GSWIP_PCE_TBL_VAL(i), &tmp); + if (err) + goto out_unlock; + tbl->val[i] = tmp; + } - tbl->mask = gswip_switch_r(priv, GSWIP_PCE_TBL_MASK); + err = regmap_read(priv->gswip, GSWIP_PCE_TBL_MASK, &tmp); + if (err) + goto out_unlock; - crtl = gswip_switch_r(priv, GSWIP_PCE_TBL_CTRL); + tbl->mask = tmp; + err = regmap_read(priv->gswip, GSWIP_PCE_TBL_CTRL, &crtl); + if (err) + goto out_unlock; tbl->type = !!(crtl & GSWIP_PCE_TBL_CTRL_TYPE); tbl->valid = !!(crtl & GSWIP_PCE_TBL_CTRL_VLD); tbl->gmap = (crtl & GSWIP_PCE_TBL_CTRL_GMAP_MASK) >> 7; +out_unlock: mutex_unlock(&priv->pce_table_lock); - return 0; + return err; } static int gswip_pce_table_entry_write(struct gswip_priv *priv, @@ -337,7 +334,7 @@ static int gswip_pce_table_entry_write(struct gswip_priv *priv, { int i; int err; - u16 crtl; + u32 crtl; u16 addr_mode = tbl->key_mode ? GSWIP_PCE_TBL_CTRL_OPMOD_KSWR : GSWIP_PCE_TBL_CTRL_OPMOD_ADWR; @@ -369,7 +366,7 @@ static int gswip_pce_table_entry_write(struct gswip_priv *priv, regmap_write(priv->gswip, GSWIP_PCE_TBL_MASK, tbl->mask); - crtl = gswip_switch_r(priv, GSWIP_PCE_TBL_CTRL); + regmap_read(priv->gswip, GSWIP_PCE_TBL_CTRL, &crtl); crtl &= ~(GSWIP_PCE_TBL_CTRL_TYPE | GSWIP_PCE_TBL_CTRL_VLD | GSWIP_PCE_TBL_CTRL_GMAP_MASK); if (tbl->type) @@ -1525,7 +1522,7 @@ static void gswip_get_strings(struct dsa_switch *ds, int port, u32 stringset, static u32 gswip_bcm_ram_entry_read(struct gswip_priv *priv, u32 table, u32 index) { - u32 result; + u32 result, val; int err; regmap_write(priv->gswip, GSWIP_BM_RAM_ADDR, index); @@ -1543,7 +1540,8 @@ static u32 gswip_bcm_ram_entry_read(struct gswip_priv *priv, u32 table, } regmap_read(priv->gswip, GSWIP_BM_RAM_VAL(0), &result); - result |= gswip_switch_r(priv, GSWIP_BM_RAM_VAL(1)) << 16; + regmap_read(priv->gswip, GSWIP_BM_RAM_VAL(1), &val); + result |= val << 16; return result; } From 748b0aebd48f77f147418a6280a3792a487adcc0 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 21 Oct 2025 12:17:06 +0100 Subject: [PATCH 211/867] net: dsa: lantiq_gswip: replace *_mask() functions with regmap API Use coccinelle to replace all uses of *_mask() with an equivalent call to regmap_write_bits(). // Replace gswip_switch_mask with regmap_write_bits @@ expression priv, clear, set, offset; @@ - gswip_switch_mask(priv, clear, set, offset) + regmap_write_bits(priv->gswip, offset, clear | set, set) // Replace gswip_mdio_mask with regmap_write_bits @@ expression priv, clear, set, offset; @@ - gswip_mdio_mask(priv, clear, set, offset) + regmap_write_bits(priv->mdio, offset, clear | set, set) // Replace gswip_mii_mask with regmap_write_bits @@ expression priv, clear, set, offset; @@ - gswip_mii_mask(priv, clear, set, offset) + regmap_write_bits(priv->mii, offset, clear | set, set) Remove the new unused *_mask() functions. This naive approach will be further optmized manually in the next commit. Signed-off-by: Daniel Golle Acked-by; Hauke Mehrtens : Acked-by; Hauke Mehrtens : Link: https://patch.msgid.link/258d931386a512b7089924c70073ca7acba71168.1761045000.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/lantiq_gswip.c | 140 +++++++++++++------------- 1 file changed, 70 insertions(+), 70 deletions(-) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c index 46fdc9d9c2c98..71dfddd62d9fb 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip.c @@ -111,12 +111,6 @@ static const struct gswip_rmon_cnt_desc gswip_rmon_cnt[] = { MIB_DESC(2, 0x0E, "TxGoodBytes"), }; -static void gswip_switch_mask(struct gswip_priv *priv, u32 clear, u32 set, - u32 offset) -{ - regmap_write_bits(priv->gswip, offset, clear | set, set); -} - static u32 gswip_switch_r_timeout(struct gswip_priv *priv, u32 offset, u32 cleared) { @@ -126,18 +120,6 @@ static u32 gswip_switch_r_timeout(struct gswip_priv *priv, u32 offset, !(val & cleared), 20, 50000); } -static void gswip_mdio_mask(struct gswip_priv *priv, u32 clear, u32 set, - u32 offset) -{ - regmap_write_bits(priv->mdio, offset, clear | set, set); -} - -static void gswip_mii_mask(struct gswip_priv *priv, u32 clear, u32 set, - u32 offset) -{ - regmap_write_bits(priv->mii, offset, clear | set, set); -} - static void gswip_mii_mask_cfg(struct gswip_priv *priv, u32 clear, u32 set, int port) { @@ -149,7 +131,8 @@ static void gswip_mii_mask_cfg(struct gswip_priv *priv, u32 clear, u32 set, reg_port = port + priv->hw_info->mii_port_reg_offset; - gswip_mii_mask(priv, clear, set, GSWIP_MII_CFGp(reg_port)); + regmap_write_bits(priv->mii, GSWIP_MII_CFGp(reg_port), clear | set, + set); } static void gswip_mii_mask_pcdu(struct gswip_priv *priv, u32 clear, u32 set, @@ -165,13 +148,16 @@ static void gswip_mii_mask_pcdu(struct gswip_priv *priv, u32 clear, u32 set, switch (reg_port) { case 0: - gswip_mii_mask(priv, clear, set, GSWIP_MII_PCDU0); + regmap_write_bits(priv->mii, GSWIP_MII_PCDU0, clear | set, + set); break; case 1: - gswip_mii_mask(priv, clear, set, GSWIP_MII_PCDU1); + regmap_write_bits(priv->mii, GSWIP_MII_PCDU1, clear | set, + set); break; case 5: - gswip_mii_mask(priv, clear, set, GSWIP_MII_PCDU5); + regmap_write_bits(priv->mii, GSWIP_MII_PCDU5, clear | set, + set); break; } } @@ -287,10 +273,11 @@ static int gswip_pce_table_entry_read(struct gswip_priv *priv, goto out_unlock; regmap_write(priv->gswip, GSWIP_PCE_TBL_ADDR, tbl->index); - gswip_switch_mask(priv, GSWIP_PCE_TBL_CTRL_ADDR_MASK | - GSWIP_PCE_TBL_CTRL_OPMOD_MASK, + regmap_write_bits(priv->gswip, GSWIP_PCE_TBL_CTRL, + GSWIP_PCE_TBL_CTRL_ADDR_MASK | + GSWIP_PCE_TBL_CTRL_OPMOD_MASK | tbl->table | addr_mode | GSWIP_PCE_TBL_CTRL_BAS, - GSWIP_PCE_TBL_CTRL); + tbl->table | addr_mode | GSWIP_PCE_TBL_CTRL_BAS); err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL, GSWIP_PCE_TBL_CTRL_BAS); @@ -348,10 +335,11 @@ static int gswip_pce_table_entry_write(struct gswip_priv *priv, } regmap_write(priv->gswip, GSWIP_PCE_TBL_ADDR, tbl->index); - gswip_switch_mask(priv, GSWIP_PCE_TBL_CTRL_ADDR_MASK | - GSWIP_PCE_TBL_CTRL_OPMOD_MASK, + regmap_write_bits(priv->gswip, GSWIP_PCE_TBL_CTRL, + GSWIP_PCE_TBL_CTRL_ADDR_MASK | + GSWIP_PCE_TBL_CTRL_OPMOD_MASK | tbl->table | addr_mode, - GSWIP_PCE_TBL_CTRL); + tbl->table | addr_mode); for (i = 0; i < ARRAY_SIZE(tbl->key); i++) regmap_write(priv->gswip, GSWIP_PCE_TBL_KEY(i), tbl->key[i]); @@ -359,10 +347,11 @@ static int gswip_pce_table_entry_write(struct gswip_priv *priv, for (i = 0; i < ARRAY_SIZE(tbl->val); i++) regmap_write(priv->gswip, GSWIP_PCE_TBL_VAL(i), tbl->val[i]); - gswip_switch_mask(priv, GSWIP_PCE_TBL_CTRL_ADDR_MASK | - GSWIP_PCE_TBL_CTRL_OPMOD_MASK, + regmap_write_bits(priv->gswip, GSWIP_PCE_TBL_CTRL, + GSWIP_PCE_TBL_CTRL_ADDR_MASK | + GSWIP_PCE_TBL_CTRL_OPMOD_MASK | tbl->table | addr_mode, - GSWIP_PCE_TBL_CTRL); + tbl->table | addr_mode); regmap_write(priv->gswip, GSWIP_PCE_TBL_MASK, tbl->mask); @@ -449,8 +438,9 @@ static int gswip_port_enable(struct dsa_switch *ds, int port, if (phydev) mdio_phy = phydev->mdio.addr & GSWIP_MDIO_PHY_ADDR_MASK; - gswip_mdio_mask(priv, GSWIP_MDIO_PHY_ADDR_MASK, mdio_phy, - GSWIP_MDIO_PHYp(port)); + regmap_write_bits(priv->mdio, GSWIP_MDIO_PHYp(port), + GSWIP_MDIO_PHY_ADDR_MASK | mdio_phy, + mdio_phy); } /* RMON Counter Enable for port */ @@ -480,9 +470,11 @@ static int gswip_pce_load_microcode(struct gswip_priv *priv) int i; int err; - gswip_switch_mask(priv, GSWIP_PCE_TBL_CTRL_ADDR_MASK | - GSWIP_PCE_TBL_CTRL_OPMOD_MASK, - GSWIP_PCE_TBL_CTRL_OPMOD_ADWR, GSWIP_PCE_TBL_CTRL); + regmap_write_bits(priv->gswip, GSWIP_PCE_TBL_CTRL, + GSWIP_PCE_TBL_CTRL_ADDR_MASK | + GSWIP_PCE_TBL_CTRL_OPMOD_MASK | + GSWIP_PCE_TBL_CTRL_OPMOD_ADWR, + GSWIP_PCE_TBL_CTRL_OPMOD_ADWR); regmap_write(priv->gswip, GSWIP_PCE_TBL_MASK, 0); for (i = 0; i < priv->hw_info->pce_microcode_size; i++) { @@ -549,9 +541,10 @@ static void gswip_port_commit_pvid(struct gswip_priv *priv, int port) } vinr = idx ? GSWIP_PCE_VCTRL_VINR_ALL : GSWIP_PCE_VCTRL_VINR_TAGGED; - gswip_switch_mask(priv, GSWIP_PCE_VCTRL_VINR, + regmap_write_bits(priv->gswip, GSWIP_PCE_VCTRL(port), + GSWIP_PCE_VCTRL_VINR | FIELD_PREP(GSWIP_PCE_VCTRL_VINR, vinr), - GSWIP_PCE_VCTRL(port)); + FIELD_PREP(GSWIP_PCE_VCTRL_VINR, vinr)); /* Note that in GSWIP 2.2 VLAN mode the VID needs to be programmed * directly instead of referencing the index in the Active VLAN Tablet. @@ -569,20 +562,27 @@ static int gswip_port_vlan_filtering(struct dsa_switch *ds, int port, if (vlan_filtering) { /* Use tag based VLAN */ - gswip_switch_mask(priv, - GSWIP_PCE_VCTRL_VSR, - GSWIP_PCE_VCTRL_UVR | GSWIP_PCE_VCTRL_VIMR | - GSWIP_PCE_VCTRL_VEMR | GSWIP_PCE_VCTRL_VID0, - GSWIP_PCE_VCTRL(port)); + regmap_write_bits(priv->gswip, GSWIP_PCE_VCTRL(port), + GSWIP_PCE_VCTRL_VSR | + GSWIP_PCE_VCTRL_UVR | + GSWIP_PCE_VCTRL_VIMR | + GSWIP_PCE_VCTRL_VEMR | + GSWIP_PCE_VCTRL_VID0, + GSWIP_PCE_VCTRL_UVR | + GSWIP_PCE_VCTRL_VIMR | + GSWIP_PCE_VCTRL_VEMR | + GSWIP_PCE_VCTRL_VID0); regmap_clear_bits(priv->gswip, GSWIP_PCE_PCTRL_0p(port), GSWIP_PCE_PCTRL_0_TVM); } else { /* Use port based VLAN */ - gswip_switch_mask(priv, - GSWIP_PCE_VCTRL_UVR | GSWIP_PCE_VCTRL_VIMR | - GSWIP_PCE_VCTRL_VEMR | GSWIP_PCE_VCTRL_VID0, + regmap_write_bits(priv->gswip, GSWIP_PCE_VCTRL(port), + GSWIP_PCE_VCTRL_UVR | + GSWIP_PCE_VCTRL_VIMR | + GSWIP_PCE_VCTRL_VEMR | + GSWIP_PCE_VCTRL_VID0 | GSWIP_PCE_VCTRL_VSR, - GSWIP_PCE_VCTRL(port)); + GSWIP_PCE_VCTRL_VSR); regmap_set_bits(priv->gswip, GSWIP_PCE_PCTRL_0p(port), GSWIP_PCE_PCTRL_0_TVM); } @@ -642,7 +642,7 @@ static int gswip_setup(struct dsa_switch *ds) regmap_write(priv->mdio, GSWIP_MDIO_MDC_CFG0, 0x0); /* Configure the MDIO Clock 2.5 MHz */ - gswip_mdio_mask(priv, 0xff, 0x09, GSWIP_MDIO_MDC_CFG1); + regmap_write_bits(priv->mdio, GSWIP_MDIO_MDC_CFG1, 0xff | 0x09, 0x09); /* bring up the mdio bus */ err = gswip_mdio(priv); @@ -1083,8 +1083,9 @@ static void gswip_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) regmap_set_bits(priv->gswip, GSWIP_SDMA_PCTRLp(port), GSWIP_SDMA_PCTRL_EN); - gswip_switch_mask(priv, GSWIP_PCE_PCTRL_0_PSTATE_MASK, stp_state, - GSWIP_PCE_PCTRL_0p(port)); + regmap_write_bits(priv->gswip, GSWIP_PCE_PCTRL_0p(port), + GSWIP_PCE_PCTRL_0_PSTATE_MASK | stp_state, + stp_state); } static int gswip_port_fdb(struct dsa_switch *ds, int port, @@ -1313,8 +1314,8 @@ static void gswip_port_set_link(struct gswip_priv *priv, int port, bool link) else mdio_phy = GSWIP_MDIO_PHY_LINK_DOWN; - gswip_mdio_mask(priv, GSWIP_MDIO_PHY_LINK_MASK, mdio_phy, - GSWIP_MDIO_PHYp(port)); + regmap_write_bits(priv->mdio, GSWIP_MDIO_PHYp(port), + GSWIP_MDIO_PHY_LINK_MASK | mdio_phy, mdio_phy); } static void gswip_port_set_speed(struct gswip_priv *priv, int port, int speed, @@ -1354,11 +1355,11 @@ static void gswip_port_set_speed(struct gswip_priv *priv, int port, int speed, break; } - gswip_mdio_mask(priv, GSWIP_MDIO_PHY_SPEED_MASK, mdio_phy, - GSWIP_MDIO_PHYp(port)); + regmap_write_bits(priv->mdio, GSWIP_MDIO_PHYp(port), + GSWIP_MDIO_PHY_SPEED_MASK | mdio_phy, mdio_phy); gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_RATE_MASK, mii_cfg, port); - gswip_switch_mask(priv, GSWIP_MAC_CTRL_0_GMII_MASK, mac_ctrl_0, - GSWIP_MAC_CTRL_0p(port)); + regmap_write_bits(priv->gswip, GSWIP_MAC_CTRL_0p(port), + GSWIP_MAC_CTRL_0_GMII_MASK | mac_ctrl_0, mac_ctrl_0); } static void gswip_port_set_duplex(struct gswip_priv *priv, int port, int duplex) @@ -1373,10 +1374,10 @@ static void gswip_port_set_duplex(struct gswip_priv *priv, int port, int duplex) mdio_phy = GSWIP_MDIO_PHY_FDUP_DIS; } - gswip_switch_mask(priv, GSWIP_MAC_CTRL_0_FDUP_MASK, mac_ctrl_0, - GSWIP_MAC_CTRL_0p(port)); - gswip_mdio_mask(priv, GSWIP_MDIO_PHY_FDUP_MASK, mdio_phy, - GSWIP_MDIO_PHYp(port)); + regmap_write_bits(priv->gswip, GSWIP_MAC_CTRL_0p(port), + GSWIP_MAC_CTRL_0_FDUP_MASK | mac_ctrl_0, mac_ctrl_0); + regmap_write_bits(priv->mdio, GSWIP_MDIO_PHYp(port), + GSWIP_MDIO_PHY_FDUP_MASK | mdio_phy, mdio_phy); } static void gswip_port_set_pause(struct gswip_priv *priv, int port, @@ -1402,12 +1403,11 @@ static void gswip_port_set_pause(struct gswip_priv *priv, int port, GSWIP_MDIO_PHY_FCONRX_DIS; } - gswip_switch_mask(priv, GSWIP_MAC_CTRL_0_FCON_MASK, - mac_ctrl_0, GSWIP_MAC_CTRL_0p(port)); - gswip_mdio_mask(priv, - GSWIP_MDIO_PHY_FCONTX_MASK | - GSWIP_MDIO_PHY_FCONRX_MASK, - mdio_phy, GSWIP_MDIO_PHYp(port)); + regmap_write_bits(priv->gswip, GSWIP_MAC_CTRL_0p(port), + GSWIP_MAC_CTRL_0_FCON_MASK | mac_ctrl_0, mac_ctrl_0); + regmap_write_bits(priv->mdio, GSWIP_MDIO_PHYp(port), + GSWIP_MDIO_PHY_FCONTX_MASK | GSWIP_MDIO_PHY_FCONRX_MASK | mdio_phy, + mdio_phy); } static void gswip_phylink_mac_config(struct phylink_config *config, @@ -1526,10 +1526,10 @@ static u32 gswip_bcm_ram_entry_read(struct gswip_priv *priv, u32 table, int err; regmap_write(priv->gswip, GSWIP_BM_RAM_ADDR, index); - gswip_switch_mask(priv, GSWIP_BM_RAM_CTRL_ADDR_MASK | - GSWIP_BM_RAM_CTRL_OPMOD, - table | GSWIP_BM_RAM_CTRL_BAS, - GSWIP_BM_RAM_CTRL); + regmap_write_bits(priv->gswip, GSWIP_BM_RAM_CTRL, + GSWIP_BM_RAM_CTRL_ADDR_MASK | GSWIP_BM_RAM_CTRL_OPMOD | + table | GSWIP_BM_RAM_CTRL_BAS, + table | GSWIP_BM_RAM_CTRL_BAS); err = gswip_switch_r_timeout(priv, GSWIP_BM_RAM_CTRL, GSWIP_BM_RAM_CTRL_BAS); From 1d88358303fc81896b92e50eb732b726e7b88ee6 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 21 Oct 2025 12:17:14 +0100 Subject: [PATCH 212/867] net: dsa: lantiq_gswip: optimize regmap_write_bits() statements Further optimize the previous naive conversion of the *_mask() accessor functions to regmap_write_bits by manually removing redundant mask operands. Signed-off-by: Daniel Golle Acked-by; Hauke Mehrtens : Acked-by; Hauke Mehrtens : Link: https://patch.msgid.link/fce2f964b22fe3efc234c664b1e50de28dddf512.1761045000.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/lantiq_gswip.c | 33 ++++++++++++--------------- 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c index 71dfddd62d9fb..2483235241666 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip.c @@ -276,7 +276,7 @@ static int gswip_pce_table_entry_read(struct gswip_priv *priv, regmap_write_bits(priv->gswip, GSWIP_PCE_TBL_CTRL, GSWIP_PCE_TBL_CTRL_ADDR_MASK | GSWIP_PCE_TBL_CTRL_OPMOD_MASK | - tbl->table | addr_mode | GSWIP_PCE_TBL_CTRL_BAS, + GSWIP_PCE_TBL_CTRL_BAS, tbl->table | addr_mode | GSWIP_PCE_TBL_CTRL_BAS); err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL, @@ -337,8 +337,7 @@ static int gswip_pce_table_entry_write(struct gswip_priv *priv, regmap_write(priv->gswip, GSWIP_PCE_TBL_ADDR, tbl->index); regmap_write_bits(priv->gswip, GSWIP_PCE_TBL_CTRL, GSWIP_PCE_TBL_CTRL_ADDR_MASK | - GSWIP_PCE_TBL_CTRL_OPMOD_MASK | - tbl->table | addr_mode, + GSWIP_PCE_TBL_CTRL_OPMOD_MASK, tbl->table | addr_mode); for (i = 0; i < ARRAY_SIZE(tbl->key); i++) @@ -349,8 +348,7 @@ static int gswip_pce_table_entry_write(struct gswip_priv *priv, regmap_write_bits(priv->gswip, GSWIP_PCE_TBL_CTRL, GSWIP_PCE_TBL_CTRL_ADDR_MASK | - GSWIP_PCE_TBL_CTRL_OPMOD_MASK | - tbl->table | addr_mode, + GSWIP_PCE_TBL_CTRL_OPMOD_MASK, tbl->table | addr_mode); regmap_write(priv->gswip, GSWIP_PCE_TBL_MASK, tbl->mask); @@ -439,7 +437,7 @@ static int gswip_port_enable(struct dsa_switch *ds, int port, mdio_phy = phydev->mdio.addr & GSWIP_MDIO_PHY_ADDR_MASK; regmap_write_bits(priv->mdio, GSWIP_MDIO_PHYp(port), - GSWIP_MDIO_PHY_ADDR_MASK | mdio_phy, + GSWIP_MDIO_PHY_ADDR_MASK, mdio_phy); } @@ -542,8 +540,7 @@ static void gswip_port_commit_pvid(struct gswip_priv *priv, int port) vinr = idx ? GSWIP_PCE_VCTRL_VINR_ALL : GSWIP_PCE_VCTRL_VINR_TAGGED; regmap_write_bits(priv->gswip, GSWIP_PCE_VCTRL(port), - GSWIP_PCE_VCTRL_VINR | - FIELD_PREP(GSWIP_PCE_VCTRL_VINR, vinr), + GSWIP_PCE_VCTRL_VINR, FIELD_PREP(GSWIP_PCE_VCTRL_VINR, vinr)); /* Note that in GSWIP 2.2 VLAN mode the VID needs to be programmed @@ -642,7 +639,7 @@ static int gswip_setup(struct dsa_switch *ds) regmap_write(priv->mdio, GSWIP_MDIO_MDC_CFG0, 0x0); /* Configure the MDIO Clock 2.5 MHz */ - regmap_write_bits(priv->mdio, GSWIP_MDIO_MDC_CFG1, 0xff | 0x09, 0x09); + regmap_write_bits(priv->mdio, GSWIP_MDIO_MDC_CFG1, 0xff, 0x09); /* bring up the mdio bus */ err = gswip_mdio(priv); @@ -1084,7 +1081,7 @@ static void gswip_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) regmap_set_bits(priv->gswip, GSWIP_SDMA_PCTRLp(port), GSWIP_SDMA_PCTRL_EN); regmap_write_bits(priv->gswip, GSWIP_PCE_PCTRL_0p(port), - GSWIP_PCE_PCTRL_0_PSTATE_MASK | stp_state, + GSWIP_PCE_PCTRL_0_PSTATE_MASK, stp_state); } @@ -1315,7 +1312,7 @@ static void gswip_port_set_link(struct gswip_priv *priv, int port, bool link) mdio_phy = GSWIP_MDIO_PHY_LINK_DOWN; regmap_write_bits(priv->mdio, GSWIP_MDIO_PHYp(port), - GSWIP_MDIO_PHY_LINK_MASK | mdio_phy, mdio_phy); + GSWIP_MDIO_PHY_LINK_MASK, mdio_phy); } static void gswip_port_set_speed(struct gswip_priv *priv, int port, int speed, @@ -1356,10 +1353,10 @@ static void gswip_port_set_speed(struct gswip_priv *priv, int port, int speed, } regmap_write_bits(priv->mdio, GSWIP_MDIO_PHYp(port), - GSWIP_MDIO_PHY_SPEED_MASK | mdio_phy, mdio_phy); + GSWIP_MDIO_PHY_SPEED_MASK, mdio_phy); gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_RATE_MASK, mii_cfg, port); regmap_write_bits(priv->gswip, GSWIP_MAC_CTRL_0p(port), - GSWIP_MAC_CTRL_0_GMII_MASK | mac_ctrl_0, mac_ctrl_0); + GSWIP_MAC_CTRL_0_GMII_MASK, mac_ctrl_0); } static void gswip_port_set_duplex(struct gswip_priv *priv, int port, int duplex) @@ -1375,9 +1372,9 @@ static void gswip_port_set_duplex(struct gswip_priv *priv, int port, int duplex) } regmap_write_bits(priv->gswip, GSWIP_MAC_CTRL_0p(port), - GSWIP_MAC_CTRL_0_FDUP_MASK | mac_ctrl_0, mac_ctrl_0); + GSWIP_MAC_CTRL_0_FDUP_MASK, mac_ctrl_0); regmap_write_bits(priv->mdio, GSWIP_MDIO_PHYp(port), - GSWIP_MDIO_PHY_FDUP_MASK | mdio_phy, mdio_phy); + GSWIP_MDIO_PHY_FDUP_MASK, mdio_phy); } static void gswip_port_set_pause(struct gswip_priv *priv, int port, @@ -1404,9 +1401,9 @@ static void gswip_port_set_pause(struct gswip_priv *priv, int port, } regmap_write_bits(priv->gswip, GSWIP_MAC_CTRL_0p(port), - GSWIP_MAC_CTRL_0_FCON_MASK | mac_ctrl_0, mac_ctrl_0); + GSWIP_MAC_CTRL_0_FCON_MASK, mac_ctrl_0); regmap_write_bits(priv->mdio, GSWIP_MDIO_PHYp(port), - GSWIP_MDIO_PHY_FCONTX_MASK | GSWIP_MDIO_PHY_FCONRX_MASK | mdio_phy, + GSWIP_MDIO_PHY_FCONTX_MASK | GSWIP_MDIO_PHY_FCONRX_MASK, mdio_phy); } @@ -1528,7 +1525,7 @@ static u32 gswip_bcm_ram_entry_read(struct gswip_priv *priv, u32 table, regmap_write(priv->gswip, GSWIP_BM_RAM_ADDR, index); regmap_write_bits(priv->gswip, GSWIP_BM_RAM_CTRL, GSWIP_BM_RAM_CTRL_ADDR_MASK | GSWIP_BM_RAM_CTRL_OPMOD | - table | GSWIP_BM_RAM_CTRL_BAS, + GSWIP_BM_RAM_CTRL_BAS, table | GSWIP_BM_RAM_CTRL_BAS); err = gswip_switch_r_timeout(priv, GSWIP_BM_RAM_CTRL, From b0911b9e014004d3e617e8bba27000f521a26422 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 21 Oct 2025 12:17:33 +0100 Subject: [PATCH 213/867] net: dsa: lantiq_gswip: harmonize gswip_mii_mask_*() parameters The 'clear' parameter of gswip_mii_mask_cfg() and gswip_mii_mask_pcdu() is inconsistent with the semantics of regmap_write_bits() which also applies the mask to the value to be written. Change the semantic mask/set of the functions gswip_mii_mask_cfg() and gswip_mii_mask_pcdu() to follow the regmap_write_bits() pattern. Signed-off-by: Daniel Golle Acked-by; Hauke Mehrtens : Acked-by; Hauke Mehrtens : Link: https://patch.msgid.link/218854236c97a152af071852bda83d02ff2dd918.1761045000.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/lantiq_gswip.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c index 2483235241666..38f7f6352e8d0 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip.c @@ -120,7 +120,7 @@ static u32 gswip_switch_r_timeout(struct gswip_priv *priv, u32 offset, !(val & cleared), 20, 50000); } -static void gswip_mii_mask_cfg(struct gswip_priv *priv, u32 clear, u32 set, +static void gswip_mii_mask_cfg(struct gswip_priv *priv, u32 mask, u32 set, int port) { int reg_port; @@ -131,11 +131,11 @@ static void gswip_mii_mask_cfg(struct gswip_priv *priv, u32 clear, u32 set, reg_port = port + priv->hw_info->mii_port_reg_offset; - regmap_write_bits(priv->mii, GSWIP_MII_CFGp(reg_port), clear | set, + regmap_write_bits(priv->mii, GSWIP_MII_CFGp(reg_port), mask, set); } -static void gswip_mii_mask_pcdu(struct gswip_priv *priv, u32 clear, u32 set, +static void gswip_mii_mask_pcdu(struct gswip_priv *priv, u32 mask, u32 set, int port) { int reg_port; @@ -148,16 +148,13 @@ static void gswip_mii_mask_pcdu(struct gswip_priv *priv, u32 clear, u32 set, switch (reg_port) { case 0: - regmap_write_bits(priv->mii, GSWIP_MII_PCDU0, clear | set, - set); + regmap_write_bits(priv->mii, GSWIP_MII_PCDU0, mask, set); break; case 1: - regmap_write_bits(priv->mii, GSWIP_MII_PCDU1, clear | set, - set); + regmap_write_bits(priv->mii, GSWIP_MII_PCDU1, mask, set); break; case 5: - regmap_write_bits(priv->mii, GSWIP_MII_PCDU5, clear | set, - set); + regmap_write_bits(priv->mii, GSWIP_MII_PCDU5, mask, set); break; } } @@ -1501,7 +1498,7 @@ static void gswip_phylink_mac_link_up(struct phylink_config *config, gswip_port_set_pause(priv, port, tx_pause, rx_pause); } - gswip_mii_mask_cfg(priv, 0, GSWIP_MII_CFG_EN, port); + gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, GSWIP_MII_CFG_EN, port); } static void gswip_get_strings(struct dsa_switch *ds, int port, u32 stringset, From 7df699c2132f36359f8f79e6a163c3b3fe0b0e3d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 10 Sep 2025 12:47:02 +0200 Subject: [PATCH 214/867] media: v4l2-subdev / pdx86: int3472: Use "privacy" as con_id for the privacy LED MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During DT-binding review for extending the V4L2 camera sensor privacy LED support to systems using devicetree, it has come up that having a "-led" suffix for the LED name / con_id is undesirable since it already is clear that it is a LED. Drop the "-led" suffix from the con_id in both the lookup table in the int3472 code, as well as from the con_id led_get() argument in the v4l2-subdev code. Signed-off-by: Hans de Goede Acked-by: Ilpo Järvinen Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/v4l2-core/v4l2-subdev.c | 2 +- drivers/platform/x86/intel/int3472/led.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-subdev.c b/drivers/media/v4l2-core/v4l2-subdev.c index 1da953629010b..25e66bf18f5fc 100644 --- a/drivers/media/v4l2-core/v4l2-subdev.c +++ b/drivers/media/v4l2-core/v4l2-subdev.c @@ -2608,7 +2608,7 @@ EXPORT_SYMBOL_GPL(v4l2_subdev_is_streaming); int v4l2_subdev_get_privacy_led(struct v4l2_subdev *sd) { #if IS_REACHABLE(CONFIG_LEDS_CLASS) - sd->privacy_led = led_get(sd->dev, "privacy-led"); + sd->privacy_led = led_get(sd->dev, "privacy"); if (IS_ERR(sd->privacy_led) && PTR_ERR(sd->privacy_led) != -ENOENT) return dev_err_probe(sd->dev, PTR_ERR(sd->privacy_led), "getting privacy LED\n"); diff --git a/drivers/platform/x86/intel/int3472/led.c b/drivers/platform/x86/intel/int3472/led.c index f1d6d7b0cb75a..b1d84b9681124 100644 --- a/drivers/platform/x86/intel/int3472/led.c +++ b/drivers/platform/x86/intel/int3472/led.c @@ -43,7 +43,7 @@ int skl_int3472_register_pled(struct int3472_discrete_device *int3472, struct gp int3472->pled.lookup.provider = int3472->pled.name; int3472->pled.lookup.dev_id = int3472->sensor_name; - int3472->pled.lookup.con_id = "privacy-led"; + int3472->pled.lookup.con_id = "privacy"; led_add_lookup(&int3472->pled.lookup); return 0; From 758dbc756aad429da11c569c0d067f7fd032bcf7 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Tue, 21 Oct 2025 10:36:17 +0000 Subject: [PATCH 215/867] media: uvcvideo: Use heuristic to find stream entity Some devices, like the Grandstream GUV3100 webcam, have an invalid UVC descriptor where multiple entities share the same ID, this is invalid and makes it impossible to make a proper entity tree without heuristics. We have recently introduced a change in the way that we handle invalid entities that has caused a regression on broken devices. Implement a new heuristic to handle these devices properly. Reported-by: Angel4005 Closes: https://lore.kernel.org/linux-media/CAOzBiVuS7ygUjjhCbyWg-KiNx+HFTYnqH5+GJhd6cYsNLT=DaA@mail.gmail.com/ Fixes: 0e2ee70291e6 ("media: uvcvideo: Mark invalid entities with id UVC_INVALID_ENTITY_ID") Cc: stable@vger.kernel.org Signed-off-by: Ricardo Ribalda Reviewed-by: Hans de Goede Signed-off-by: Hans Verkuil --- drivers/media/usb/uvc/uvc_driver.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index fb6afb8e84f00..ee4f54d683496 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -167,13 +167,26 @@ static struct uvc_entity *uvc_entity_by_reference(struct uvc_device *dev, static struct uvc_streaming *uvc_stream_by_id(struct uvc_device *dev, int id) { - struct uvc_streaming *stream; + struct uvc_streaming *stream, *last_stream; + unsigned int count = 0; list_for_each_entry(stream, &dev->streams, list) { + count += 1; + last_stream = stream; if (stream->header.bTerminalLink == id) return stream; } + /* + * If the streaming entity is referenced by an invalid ID, notify the + * user and use heuristics to guess the correct entity. + */ + if (count == 1 && id == UVC_INVALID_ENTITY_ID) { + dev_warn(&dev->intf->dev, + "UVC non compliance: Invalid USB header. The streaming entity has an invalid ID, guessing the correct one."); + return last_stream; + } + return NULL; } From 27afd6e066cfd80ddbe22a4a11b99174ac89cced Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 23 Oct 2025 16:26:34 +0200 Subject: [PATCH 216/867] media: videobuf2: forbid remove_bufs when legacy fileio is active vb2_ioctl_remove_bufs() call manipulates queue internal buffer list, potentially overwriting some pointers used by the legacy fileio access mode. Forbid that ioctl when fileio is active to protect internal queue state between subsequent read/write calls. CC: stable@vger.kernel.org Fixes: a3293a85381e ("media: v4l2: Add REMOVE_BUFS ioctl") Reported-by: Shuangpeng Bai Closes: https://lore.kernel.org/linux-media/5317B590-AAB4-4F17-8EA1-621965886D49@psu.edu/ Signed-off-by: Marek Szyprowski Signed-off-by: Hans Verkuil --- drivers/media/common/videobuf2/videobuf2-v4l2.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/media/common/videobuf2/videobuf2-v4l2.c b/drivers/media/common/videobuf2/videobuf2-v4l2.c index d911021c1bb05..83862d57b126d 100644 --- a/drivers/media/common/videobuf2/videobuf2-v4l2.c +++ b/drivers/media/common/videobuf2/videobuf2-v4l2.c @@ -1010,6 +1010,11 @@ int vb2_ioctl_remove_bufs(struct file *file, void *priv, if (vb2_queue_is_busy(vdev->queue, file)) return -EBUSY; + if (vb2_fileio_is_active(vdev->queue)) { + dprintk(vdev->queue, 1, "file io in progress\n"); + return -EBUSY; + } + return vb2_core_remove_bufs(vdev->queue, d->index, d->count); } EXPORT_SYMBOL_GPL(vb2_ioctl_remove_bufs); From f7746cfcdbc5d3af9c5059deed30e277d926098b Mon Sep 17 00:00:00 2001 From: Wei Zhang Date: Thu, 16 Oct 2025 23:01:00 -0700 Subject: [PATCH 217/867] wifi: ath12k: add support for BSS color change Add support for handling BSS color collision events reported by firmware. There are two scenarios where a BSS color collision may be detected: 1. The AP's MAC detects the collision directly, and firmware reports a BSS color collision event to the host. 2. A STA associated with the AP detects the collision. The notification frame from the peer is routed directly to the AP firmware, which handles it and sends the BSS color collision event to the host. Add logic to parse and handle such events, and pass the data up to mac80211. Unlike CSA, firmware does not provide an offload mechanism for BSS color change. Instead, the color change process is triggered via beacon offload TX completion events sent by firmware. BSS color feature is enabled depending on service flag advertised by firmware, based on which color change functionality is invoked. This change builds upon the following ath11k patch. commit 886433a98425 ("ath11k: add support for BSS color change") Tested-on: WCN7850 hw2.0 PCI WLAN.IOE_HMT.1.1-00011-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1 Signed-off-by: Wei Zhang Reviewed-by: Vasanthakumar Thiagarajan Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20251017060100.1751692-1-wei.zhang@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.h | 1 + drivers/net/wireless/ath/ath12k/mac.c | 58 +++++++++++++++++++- drivers/net/wireless/ath/ath12k/wmi.c | 73 +++++++++++++++++++++++++- drivers/net/wireless/ath/ath12k/wmi.h | 24 +++++++++ 4 files changed, 154 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 02a32b9f3ac29..41da0efaa8547 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -356,6 +356,7 @@ struct ath12k_link_vif { bool pairwise_key_done; u16 num_stations; bool is_csa_in_progress; + struct wiphy_work bcn_tx_work; }; struct ath12k_vif { diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index d7bc19cea2a64..e79d457e3c03d 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -3834,6 +3834,38 @@ static void ath12k_recalculate_mgmt_rate(struct ath12k *ar, ath12k_warn(ar->ab, "failed to set beacon tx rate %d\n", ret); } +static void ath12k_mac_bcn_tx_event(struct ath12k_link_vif *arvif) +{ + struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); + struct ieee80211_bss_conf *link_conf; + + link_conf = ath12k_mac_get_link_bss_conf(arvif); + if (!link_conf) { + ath12k_warn(arvif->ar->ab, "failed to get link conf for vdev %u\n", + arvif->vdev_id); + return; + } + + if (link_conf->color_change_active) { + if (ieee80211_beacon_cntdwn_is_complete(vif, arvif->link_id)) { + ieee80211_color_change_finish(vif, arvif->link_id); + return; + } + + ieee80211_beacon_update_cntdwn(vif, arvif->link_id); + ath12k_mac_setup_bcn_tmpl(arvif); + } +} + +static void ath12k_mac_bcn_tx_work(struct wiphy *wiphy, struct wiphy_work *work) +{ + struct ath12k_link_vif *arvif = container_of(work, struct ath12k_link_vif, + bcn_tx_work); + + lockdep_assert_wiphy(wiphy); + ath12k_mac_bcn_tx_event(arvif); +} + static void ath12k_mac_init_arvif(struct ath12k_vif *ahvif, struct ath12k_link_vif *arvif, int link_id) { @@ -3863,6 +3895,7 @@ static void ath12k_mac_init_arvif(struct ath12k_vif *ahvif, INIT_LIST_HEAD(&arvif->list); INIT_DELAYED_WORK(&arvif->connection_loss_work, ath12k_mac_vif_sta_connection_loss_work); + wiphy_work_init(&arvif->bcn_tx_work, ath12k_mac_bcn_tx_work); arvif->num_stations = 0; @@ -3900,6 +3933,7 @@ static void ath12k_mac_remove_link_interface(struct ieee80211_hw *hw, lockdep_assert_wiphy(ah->hw->wiphy); cancel_delayed_work_sync(&arvif->connection_loss_work); + wiphy_work_cancel(ath12k_ar_to_hw(ar)->wiphy, &arvif->bcn_tx_work); ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac remove link interface (vdev %d link id %d)", arvif->vdev_id, arvif->link_id); @@ -4547,8 +4581,25 @@ static void ath12k_mac_bss_info_changed(struct ath12k *ar, ATH12K_BSS_COLOR_AP_PERIODS, info->he_bss_color.enabled); if (ret) - ath12k_warn(ar->ab, "failed to set bss color collision on vdev %i: %d\n", + ath12k_warn(ar->ab, "failed to set bss color collision on vdev %u: %d\n", arvif->vdev_id, ret); + + param_id = WMI_VDEV_PARAM_BSS_COLOR; + if (info->he_bss_color.enabled) + param_value = info->he_bss_color.color << + IEEE80211_HE_OPERATION_BSS_COLOR_OFFSET; + else + param_value = IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED; + + ret = ath12k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, + param_id, + param_value); + if (ret) + ath12k_warn(ar->ab, "failed to set bss color param on vdev %u: %d\n", + arvif->vdev_id, ret); + else + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "bss color param 0x%x set on vdev %u\n", + param_value, arvif->vdev_id); } else if (vif->type == NL80211_IFTYPE_STATION) { ret = ath12k_wmi_send_bss_color_change_enable_cmd(ar, arvif->vdev_id, @@ -13970,6 +14021,11 @@ static int ath12k_mac_hw_register(struct ath12k_hw *ah) wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST); wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_STA_TX_PWR); wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT); + if (test_bit(WMI_TLV_SERVICE_BSS_COLOR_OFFLOAD, + ab->wmi_ab.svc_map)) { + wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_BSS_COLOR); + ieee80211_hw_set(hw, DETECTS_COLOR_COLLISION); + } wiphy->cipher_suites = cipher_suites; wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites); diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index e76275bd6916f..5075d86df36ff 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "core.h" #include "debugfs.h" #include "debug.h" @@ -190,6 +191,8 @@ static const struct ath12k_wmi_tlv_policy ath12k_wmi_tlv_policies[] = { .min_len = sizeof(struct wmi_11d_new_cc_event) }, [WMI_TAG_PER_CHAIN_RSSI_STATS] = { .min_len = sizeof(struct wmi_per_chain_rssi_stat_params) }, + [WMI_TAG_OBSS_COLOR_COLLISION_EVT] = { + .min_len = sizeof(struct wmi_obss_color_collision_event) }, }; __le32 ath12k_wmi_tlv_hdr(u32 cmd, u32 len) @@ -3850,6 +3853,58 @@ int ath12k_wmi_fils_discovery(struct ath12k *ar, u32 vdev_id, u32 interval, return ret; } +static void +ath12k_wmi_obss_color_collision_event(struct ath12k_base *ab, struct sk_buff *skb) +{ + const struct wmi_obss_color_collision_event *ev; + struct ath12k_link_vif *arvif; + u32 vdev_id, evt_type; + u64 bitmap; + + const void **tb __free(kfree) = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC); + if (IS_ERR(tb)) { + ath12k_warn(ab, "failed to parse OBSS color collision tlv %ld\n", + PTR_ERR(tb)); + return; + } + + ev = tb[WMI_TAG_OBSS_COLOR_COLLISION_EVT]; + if (!ev) { + ath12k_warn(ab, "failed to fetch OBSS color collision event\n"); + return; + } + + vdev_id = le32_to_cpu(ev->vdev_id); + evt_type = le32_to_cpu(ev->evt_type); + bitmap = le64_to_cpu(ev->obss_color_bitmap); + + guard(rcu)(); + + arvif = ath12k_mac_get_arvif_by_vdev_id(ab, vdev_id); + if (!arvif) { + ath12k_warn(ab, "no arvif found for vdev %u in OBSS color collision event\n", + vdev_id); + return; + } + + switch (evt_type) { + case WMI_BSS_COLOR_COLLISION_DETECTION: + ieee80211_obss_color_collision_notify(arvif->ahvif->vif, + bitmap, + arvif->link_id); + ath12k_dbg(ab, ATH12K_DBG_WMI, + "obss color collision detected vdev %u event %d bitmap %016llx\n", + vdev_id, evt_type, bitmap); + break; + case WMI_BSS_COLOR_COLLISION_DISABLE: + case WMI_BSS_COLOR_FREE_SLOT_TIMER_EXPIRY: + case WMI_BSS_COLOR_FREE_SLOT_AVAILABLE: + break; + default: + ath12k_warn(ab, "unknown OBSS color collision event type %d\n", evt_type); + } +} + static void ath12k_fill_band_to_mac_param(struct ath12k_base *soc, struct ath12k_wmi_pdev_band_arg *arg) @@ -7014,12 +7069,26 @@ static void ath12k_vdev_start_resp_event(struct ath12k_base *ab, struct sk_buff static void ath12k_bcn_tx_status_event(struct ath12k_base *ab, struct sk_buff *skb) { + struct ath12k_link_vif *arvif; + struct ath12k *ar; u32 vdev_id, tx_status; if (ath12k_pull_bcn_tx_status_ev(ab, skb, &vdev_id, &tx_status) != 0) { ath12k_warn(ab, "failed to extract bcn tx status"); return; } + + guard(rcu)(); + + arvif = ath12k_mac_get_arvif_by_vdev_id(ab, vdev_id); + if (!arvif) { + ath12k_warn(ab, "invalid vdev %u in bcn tx status\n", + vdev_id); + return; + } + + ar = arvif->ar; + wiphy_work_queue(ath12k_ar_to_hw(ar)->wiphy, &arvif->bcn_tx_work); } static void ath12k_vdev_stopped_event(struct ath12k_base *ab, struct sk_buff *skb) @@ -9877,6 +9946,9 @@ static void ath12k_wmi_op_rx(struct ath12k_base *ab, struct sk_buff *skb) case WMI_PDEV_RSSI_DBM_CONVERSION_PARAMS_INFO_EVENTID: ath12k_wmi_rssi_dbm_conversion_params_info_event(ab, skb); break; + case WMI_OBSS_COLOR_COLLISION_DETECTION_EVENTID: + ath12k_wmi_obss_color_collision_event(ab, skb); + break; /* add Unsupported events (rare) here */ case WMI_TBTTOFFSET_EXT_UPDATE_EVENTID: case WMI_PEER_OPER_MODE_CHANGE_EVENTID: @@ -9887,7 +9959,6 @@ static void ath12k_wmi_op_rx(struct ath12k_base *ab, struct sk_buff *skb) /* add Unsupported events (frequent) here */ case WMI_PDEV_GET_HALPHY_CAL_STATUS_EVENTID: case WMI_MGMT_RX_FW_CONSUMED_EVENTID: - case WMI_OBSS_COLOR_COLLISION_DETECTION_EVENTID: /* debug might flood hence silently ignore (no-op) */ break; case WMI_PDEV_UTF_EVENTID: diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index 64bd968989c84..911ef9d528177 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -4928,6 +4928,24 @@ struct wmi_obss_spatial_reuse_params_cmd { #define ATH12K_BSS_COLOR_STA_PERIODS 10000 #define ATH12K_BSS_COLOR_AP_PERIODS 5000 +/** + * enum wmi_bss_color_collision - Event types for BSS color collision handling + * @WMI_BSS_COLOR_COLLISION_DISABLE: Indicates that BSS color collision detection + * is disabled. + * @WMI_BSS_COLOR_COLLISION_DETECTION: Event triggered when a BSS color collision + * is detected. + * @WMI_BSS_COLOR_FREE_SLOT_TIMER_EXPIRY: Event indicating that the timer for waiting + * on a free BSS color slot has expired. + * @WMI_BSS_COLOR_FREE_SLOT_AVAILABLE: Event indicating that a free BSS color slot + * has become available. + */ +enum wmi_bss_color_collision { + WMI_BSS_COLOR_COLLISION_DISABLE = 0, + WMI_BSS_COLOR_COLLISION_DETECTION, + WMI_BSS_COLOR_FREE_SLOT_TIMER_EXPIRY, + WMI_BSS_COLOR_FREE_SLOT_AVAILABLE, +}; + struct wmi_obss_color_collision_cfg_params_cmd { __le32 tlv_header; __le32 vdev_id; @@ -4945,6 +4963,12 @@ struct wmi_bss_color_change_enable_params_cmd { __le32 enable; } __packed; +struct wmi_obss_color_collision_event { + __le32 vdev_id; + __le32 evt_type; + __le64 obss_color_bitmap; +} __packed; + #define ATH12K_IPV4_TH_SEED_SIZE 5 #define ATH12K_IPV6_TH_SEED_SIZE 11 From a41281f6518e485220d180a6031d302a736fc463 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Fri, 17 Oct 2025 10:36:36 +0800 Subject: [PATCH 218/867] wifi: ath12k: restore register window after global reset Hardware target implements an address space larger than that PCI BAR can map. In order to be able to access the whole target address space, the BAR space is split into 4 segments, of which the last 3, called windows, can be dynamically mapped to the desired area. This is achieved by updating WINDOW_REG_ADDRESS register with appropriate window value. Currently each time when accessing a register that beyond WINDOW_START, host calculates the window value and caches it after window update, this way next time when accessing a register falling in the same window, host knows that the window is already good hence no additional update needed. However this mechanism breaks after global reset is triggered in ath12k_pci_soc_global_reset(), because with global reset hardware resets WINDOW_REG_ADDRESS register hence the window is not properly mapped any more. Current host does nothing about this, as a result a subsequent register access may not work as expected if it falls in a window same as before. Although there is no obvious issue seen now, better to fix it to avoid future problem. The fix is done by restoring the window register after global reset. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00284.1-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20251017-ath12k-reset-window-cache-v1-1-29e0e751deed@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/pci.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c index 48161db6af579..a12c8379cb466 100644 --- a/drivers/net/wireless/ath/ath12k/pci.c +++ b/drivers/net/wireless/ath/ath12k/pci.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2019-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include @@ -218,6 +218,19 @@ static inline bool ath12k_pci_is_offset_within_mhi_region(u32 offset) return (offset >= PCI_MHIREGLEN_REG && offset <= PCI_MHI_REGION_END); } +static void ath12k_pci_restore_window(struct ath12k_base *ab) +{ + struct ath12k_pci *ab_pci = ath12k_pci_priv(ab); + + spin_lock_bh(&ab_pci->window_lock); + + iowrite32(WINDOW_ENABLE_BIT | ab_pci->register_window, + ab->mem + WINDOW_REG_ADDRESS); + ioread32(ab->mem + WINDOW_REG_ADDRESS); + + spin_unlock_bh(&ab_pci->window_lock); +} + static void ath12k_pci_soc_global_reset(struct ath12k_base *ab) { u32 val, delay; @@ -242,6 +255,11 @@ static void ath12k_pci_soc_global_reset(struct ath12k_base *ab) val = ath12k_pci_read32(ab, PCIE_SOC_GLOBAL_RESET); if (val == 0xffffffff) ath12k_warn(ab, "link down error during global reset\n"); + + /* Restore window register as its content is cleared during + * hardware global reset, such that it aligns with host cache. + */ + ath12k_pci_restore_window(ab); } static void ath12k_pci_clear_dbg_registers(struct ath12k_base *ab) From 197498315de711140bcc4722fdeb7c1761777100 Mon Sep 17 00:00:00 2001 From: Sarika Sharma Date: Tue, 21 Oct 2025 16:52:04 +0530 Subject: [PATCH 219/867] wifi: ath12k: Assert base_lock is held before allocating REO update element Add a lockdep assertion to verify that ab->base_lock is held prior to allocating a REO update element in ath12k_dp_prepare_reo_update_elem(). This helps detect potential concurrency issues during development and improves code robustness. Compiled tested only. Signed-off-by: Sarika Sharma Reviewed-by: Vasanthakumar Thiagarajan Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20251021112204.323242-1-sarika.sharma@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/dp_rx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 6c9f0839c83a3..d28d8ffec0f83 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -1089,6 +1089,8 @@ static int ath12k_dp_prepare_reo_update_elem(struct ath12k_dp *dp, { struct dp_reo_update_rx_queue_elem *elem; + lockdep_assert_held(&dp->ab->base_lock); + elem = kzalloc(sizeof(*elem), GFP_ATOMIC); if (!elem) return -ENOMEM; From c35c178fcdffa5f3bedf261a628769b9a52c2436 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Fri, 12 Sep 2025 15:06:19 +0200 Subject: [PATCH 220/867] ice: enforce RTNL assumption of queue NAPI manipulation Instead of making assumptions in comments move them into code. Be also more precise, RTNL must be locked only when there is NAPI, and we have VSIs w/o NAPI that call ice_vsi_clear_napi_queues() during rmmod. Signed-off-by: Przemek Kitszel Reviewed-by: Paul Menzel Reviewed-by: Aleksandr Loktionov Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 4479c824561e9..69cb0381c4609 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2769,7 +2769,6 @@ void ice_dis_vsi(struct ice_vsi *vsi, bool locked) * @vsi: VSI pointer * * Associate queue[s] with napi for all vectors. - * The caller must hold rtnl_lock. */ void ice_vsi_set_napi_queues(struct ice_vsi *vsi) { @@ -2779,6 +2778,7 @@ void ice_vsi_set_napi_queues(struct ice_vsi *vsi) if (!netdev) return; + ASSERT_RTNL(); ice_for_each_rxq(vsi, q_idx) netif_queue_set_napi(netdev, q_idx, NETDEV_QUEUE_TYPE_RX, &vsi->rx_rings[q_idx]->q_vector->napi); @@ -2799,7 +2799,6 @@ void ice_vsi_set_napi_queues(struct ice_vsi *vsi) * @vsi: VSI pointer * * Clear the association between all VSI queues queue[s] and napi. - * The caller must hold rtnl_lock. */ void ice_vsi_clear_napi_queues(struct ice_vsi *vsi) { @@ -2809,6 +2808,7 @@ void ice_vsi_clear_napi_queues(struct ice_vsi *vsi) if (!netdev) return; + ASSERT_RTNL(); /* Clear the NAPI's interrupt number */ ice_for_each_q_vector(vsi, v_idx) { struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; From 806c4f32a80627f8977fda53520afc41491d162f Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Fri, 12 Sep 2025 15:06:20 +0200 Subject: [PATCH 221/867] ice: move service task start out of ice_init_pf() Move service task start out of ice_init_pf(). Do analogous with deinit. Service task is needed up to the very end of driver removal, later commit of the series will move it later on execution timeline. Signed-off-by: Przemek Kitszel Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 1 + drivers/net/ethernet/intel/ice/ice_main.c | 18 +++++++++++------- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 22b8323ff0d0e..0e58a58c23eb3 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -1029,6 +1029,7 @@ int ice_open(struct net_device *netdev); int ice_open_internal(struct net_device *netdev); int ice_stop(struct net_device *netdev); void ice_service_task_schedule(struct ice_pf *pf); +void ice_start_service_task(struct ice_pf *pf); int ice_load(struct ice_pf *pf); void ice_unload(struct ice_pf *pf); void ice_adv_lnk_speed_maps_init(void); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ca95b8800bb3b..f9e464b79bcaa 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3951,7 +3951,6 @@ u16 ice_get_avail_rxq_count(struct ice_pf *pf) */ static void ice_deinit_pf(struct ice_pf *pf) { - ice_service_task_stop(pf); mutex_destroy(&pf->lag_mutex); mutex_destroy(&pf->adev_mutex); mutex_destroy(&pf->sw_mutex); @@ -4030,6 +4029,14 @@ static void ice_set_pf_caps(struct ice_pf *pf) pf->max_pf_rxqs = func_caps->common_cap.num_rxq; } +void ice_start_service_task(struct ice_pf *pf) +{ + timer_setup(&pf->serv_tmr, ice_service_timer, 0); + pf->serv_tmr_period = HZ; + INIT_WORK(&pf->serv_task, ice_service_task); + clear_bit(ICE_SERVICE_SCHED, pf->state); +} + /** * ice_init_pf - Initialize general software structures (struct ice_pf) * @pf: board private structure to initialize @@ -4049,12 +4056,6 @@ static int ice_init_pf(struct ice_pf *pf) init_waitqueue_head(&pf->reset_wait_queue); - /* setup service timer and periodic service task */ - timer_setup(&pf->serv_tmr, ice_service_timer, 0); - pf->serv_tmr_period = HZ; - INIT_WORK(&pf->serv_task, ice_service_task); - clear_bit(ICE_SERVICE_SCHED, pf->state); - mutex_init(&pf->avail_q_mutex); pf->avail_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL); if (!pf->avail_txqs) @@ -4745,6 +4746,7 @@ int ice_init_dev(struct ice_pf *pf) ice_set_safe_mode_caps(hw); } + ice_start_service_task(pf); err = ice_init_pf(pf); if (err) { dev_err(dev, "ice_init_pf failed: %d\n", err); @@ -4791,6 +4793,7 @@ int ice_init_dev(struct ice_pf *pf) ice_clear_interrupt_scheme(pf); unroll_pf_init: ice_deinit_pf(pf); + ice_service_task_stop(pf); return err; } @@ -4799,6 +4802,7 @@ void ice_deinit_dev(struct ice_pf *pf) ice_free_irq_msix_misc(pf); ice_deinit_pf(pf); ice_deinit_hw(&pf->hw); + ice_service_task_stop(pf); /* Service task is already stopped, so call reset directly. */ ice_reset(&pf->hw, ICE_RESET_PFR); From 2fe18288fce6b421f1dc585bcb9dd3afa32d3ad9 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Fri, 12 Sep 2025 15:06:21 +0200 Subject: [PATCH 222/867] ice: move ice_init_interrupt_scheme() prior ice_init_pf() Move ice_init_interrupt_scheme() prior ice_init_pf(). To enable the move ice_set_pf_caps() was moved out from ice_init_pf() to the caller (ice_init_dev()), and placed prior to the irq scheme init. The move makes deinit order of ice_deinit_dev() and failure-path of ice_init_pf() match (at least in terms of not calling ice_clear_interrupt_scheme() and ice_deinit_pf() in opposite ways). The new order aligns with findings made by Jakub Buchocki in the commit 24b454bc354a ("ice: Fix ice module unload"). Signed-off-by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 25 ++++++++++------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index f9e464b79bcaa..e00c282a8c18f 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4043,8 +4043,6 @@ void ice_start_service_task(struct ice_pf *pf) */ static int ice_init_pf(struct ice_pf *pf) { - ice_set_pf_caps(pf); - mutex_init(&pf->sw_mutex); mutex_init(&pf->tc_mutex); mutex_init(&pf->adev_mutex); @@ -4746,11 +4744,18 @@ int ice_init_dev(struct ice_pf *pf) ice_set_safe_mode_caps(hw); } + ice_set_pf_caps(pf); + err = ice_init_interrupt_scheme(pf); + if (err) { + dev_err(dev, "ice_init_interrupt_scheme failed: %d\n", err); + return -EIO; + } + ice_start_service_task(pf); err = ice_init_pf(pf); if (err) { dev_err(dev, "ice_init_pf failed: %d\n", err); - return err; + goto unroll_irq_scheme_init; } pf->hw.udp_tunnel_nic.set_port = ice_udp_tunnel_set_port; @@ -4768,14 +4773,6 @@ int ice_init_dev(struct ice_pf *pf) pf->hw.udp_tunnel_nic.tables[1].tunnel_types = UDP_TUNNEL_TYPE_GENEVE; } - - err = ice_init_interrupt_scheme(pf); - if (err) { - dev_err(dev, "ice_init_interrupt_scheme failed: %d\n", err); - err = -EIO; - goto unroll_pf_init; - } - /* In case of MSIX we are going to setup the misc vector right here * to handle admin queue events etc. In case of legacy and MSI * the misc functionality and queue processing is combined in @@ -4784,16 +4781,16 @@ int ice_init_dev(struct ice_pf *pf) err = ice_req_irq_msix_misc(pf); if (err) { dev_err(dev, "setup of misc vector failed: %d\n", err); - goto unroll_irq_scheme_init; + goto unroll_pf_init; } return 0; -unroll_irq_scheme_init: - ice_clear_interrupt_scheme(pf); unroll_pf_init: ice_deinit_pf(pf); +unroll_irq_scheme_init: ice_service_task_stop(pf); + ice_clear_interrupt_scheme(pf); return err; } From 71430451f81bd6550e46d89b69103a111fc42982 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Fri, 12 Sep 2025 15:06:22 +0200 Subject: [PATCH 223/867] ice: ice_init_pf: destroy mutexes and xarrays on memory alloc failure Unroll actions of ice_init_pf() when it fails. ice_deinit_pf() happens to be perfect to call here. Signed-off-by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 31 +++++++++-------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index e00c282a8c18f..09dee43e48aab 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3951,6 +3951,8 @@ u16 ice_get_avail_rxq_count(struct ice_pf *pf) */ static void ice_deinit_pf(struct ice_pf *pf) { + /* note that we unroll also on ice_init_pf() failure here */ + mutex_destroy(&pf->lag_mutex); mutex_destroy(&pf->adev_mutex); mutex_destroy(&pf->sw_mutex); @@ -4055,25 +4057,6 @@ static int ice_init_pf(struct ice_pf *pf) init_waitqueue_head(&pf->reset_wait_queue); mutex_init(&pf->avail_q_mutex); - pf->avail_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL); - if (!pf->avail_txqs) - return -ENOMEM; - - pf->avail_rxqs = bitmap_zalloc(pf->max_pf_rxqs, GFP_KERNEL); - if (!pf->avail_rxqs) { - bitmap_free(pf->avail_txqs); - pf->avail_txqs = NULL; - return -ENOMEM; - } - - pf->txtime_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL); - if (!pf->txtime_txqs) { - bitmap_free(pf->avail_txqs); - pf->avail_txqs = NULL; - bitmap_free(pf->avail_rxqs); - pf->avail_rxqs = NULL; - return -ENOMEM; - } mutex_init(&pf->vfs.table_lock); hash_init(pf->vfs.table); @@ -4086,7 +4069,17 @@ static int ice_init_pf(struct ice_pf *pf) xa_init(&pf->dyn_ports); xa_init(&pf->sf_nums); + pf->avail_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL); + pf->avail_rxqs = bitmap_zalloc(pf->max_pf_rxqs, GFP_KERNEL); + pf->txtime_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL); + if (!pf->avail_txqs || !pf->avail_rxqs || !pf->txtime_txqs) + goto undo_init; + return 0; +undo_init: + /* deinit handles half-initialized pf just fine */ + ice_deinit_pf(pf); + return -ENOMEM; } /** From e3bf1cdde7471bab7fc20dd1a37c2cdb82d3f76b Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Fri, 12 Sep 2025 15:06:23 +0200 Subject: [PATCH 224/867] ice: move udp_tunnel_nic and misc IRQ setup into ice_init_pf() Move udp_tunnel_nic setup and ice_req_irq_msix_misc() call into ice_init_pf(), remove some redundancy in the former while moving. Move ice_free_irq_msix_misc() call into ice_deinit_pf(), to mimic the above in terms of needed cleanup. Guard it via emptiness check, to keep the allowance of half-initialized pf being cleaned up. Signed-off-by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 58 +++++++++++------------ 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 09dee43e48aab..1691dda1067ed 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3978,6 +3978,9 @@ static void ice_deinit_pf(struct ice_pf *pf) if (pf->ptp.clock) ptp_clock_unregister(pf->ptp.clock); + if (!xa_empty(&pf->irq_tracker.entries)) + ice_free_irq_msix_misc(pf); + xa_destroy(&pf->dyn_ports); xa_destroy(&pf->sf_nums); } @@ -4045,6 +4048,11 @@ void ice_start_service_task(struct ice_pf *pf) */ static int ice_init_pf(struct ice_pf *pf) { + struct udp_tunnel_nic_info *udp_tunnel_nic = &pf->hw.udp_tunnel_nic; + struct device *dev = ice_pf_to_dev(pf); + struct ice_hw *hw = &pf->hw; + int err = -ENOMEM; + mutex_init(&pf->sw_mutex); mutex_init(&pf->tc_mutex); mutex_init(&pf->adev_mutex); @@ -4075,11 +4083,30 @@ static int ice_init_pf(struct ice_pf *pf) if (!pf->avail_txqs || !pf->avail_rxqs || !pf->txtime_txqs) goto undo_init; + udp_tunnel_nic->set_port = ice_udp_tunnel_set_port; + udp_tunnel_nic->unset_port = ice_udp_tunnel_unset_port; + udp_tunnel_nic->shared = &hw->udp_tunnel_shared; + udp_tunnel_nic->tables[0].n_entries = hw->tnl.valid_count[TNL_VXLAN]; + udp_tunnel_nic->tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN; + udp_tunnel_nic->tables[1].n_entries = hw->tnl.valid_count[TNL_GENEVE]; + udp_tunnel_nic->tables[1].tunnel_types = UDP_TUNNEL_TYPE_GENEVE; + + /* In case of MSIX we are going to setup the misc vector right here + * to handle admin queue events etc. In case of legacy and MSI + * the misc functionality and queue processing is combined in + * the same vector and that gets setup at open. + */ + err = ice_req_irq_msix_misc(pf); + if (err) { + dev_err(dev, "setup of misc vector failed: %d\n", err); + goto undo_init; + } + return 0; undo_init: /* deinit handles half-initialized pf just fine */ ice_deinit_pf(pf); - return -ENOMEM; + return err; } /** @@ -4751,36 +4778,8 @@ int ice_init_dev(struct ice_pf *pf) goto unroll_irq_scheme_init; } - pf->hw.udp_tunnel_nic.set_port = ice_udp_tunnel_set_port; - pf->hw.udp_tunnel_nic.unset_port = ice_udp_tunnel_unset_port; - pf->hw.udp_tunnel_nic.shared = &pf->hw.udp_tunnel_shared; - if (pf->hw.tnl.valid_count[TNL_VXLAN]) { - pf->hw.udp_tunnel_nic.tables[0].n_entries = - pf->hw.tnl.valid_count[TNL_VXLAN]; - pf->hw.udp_tunnel_nic.tables[0].tunnel_types = - UDP_TUNNEL_TYPE_VXLAN; - } - if (pf->hw.tnl.valid_count[TNL_GENEVE]) { - pf->hw.udp_tunnel_nic.tables[1].n_entries = - pf->hw.tnl.valid_count[TNL_GENEVE]; - pf->hw.udp_tunnel_nic.tables[1].tunnel_types = - UDP_TUNNEL_TYPE_GENEVE; - } - /* In case of MSIX we are going to setup the misc vector right here - * to handle admin queue events etc. In case of legacy and MSI - * the misc functionality and queue processing is combined in - * the same vector and that gets setup at open. - */ - err = ice_req_irq_msix_misc(pf); - if (err) { - dev_err(dev, "setup of misc vector failed: %d\n", err); - goto unroll_pf_init; - } - return 0; -unroll_pf_init: - ice_deinit_pf(pf); unroll_irq_scheme_init: ice_service_task_stop(pf); ice_clear_interrupt_scheme(pf); @@ -4789,7 +4788,6 @@ int ice_init_dev(struct ice_pf *pf) void ice_deinit_dev(struct ice_pf *pf) { - ice_free_irq_msix_misc(pf); ice_deinit_pf(pf); ice_deinit_hw(&pf->hw); ice_service_task_stop(pf); From ef825bdb4605742c4efc03f67d930e80c42f33cb Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Fri, 12 Sep 2025 15:06:24 +0200 Subject: [PATCH 225/867] ice: move ice_init_pf() out of ice_init_dev() Move ice_init_pf() out of ice_init_dev(). Do the same for deinit counterpart. Signed-off-by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/ice/devlink/devlink.c | 16 ++++++++-- drivers/net/ethernet/intel/ice/ice.h | 2 ++ drivers/net/ethernet/intel/ice/ice_main.c | 32 +++++++++---------- 3 files changed, 31 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c index fb2de521731ae..c354a03c950cd 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -459,6 +459,7 @@ static void ice_devlink_reinit_down(struct ice_pf *pf) rtnl_lock(); ice_vsi_decfg(ice_get_main_vsi(pf)); rtnl_unlock(); + ice_deinit_pf(pf); ice_deinit_dev(pf); } @@ -1231,11 +1232,12 @@ static void ice_set_min_max_msix(struct ice_pf *pf) static int ice_devlink_reinit_up(struct ice_pf *pf) { struct ice_vsi *vsi = ice_get_main_vsi(pf); + struct device *dev = ice_pf_to_dev(pf); int err; err = ice_init_hw(&pf->hw); if (err) { - dev_err(ice_pf_to_dev(pf), "ice_init_hw failed: %d\n", err); + dev_err(dev, "ice_init_hw failed: %d\n", err); return err; } @@ -1246,13 +1248,19 @@ static int ice_devlink_reinit_up(struct ice_pf *pf) if (err) goto unroll_hw_init; + err = ice_init_pf(pf); + if (err) { + dev_err(dev, "ice_init_pf failed: %d\n", err); + goto unroll_dev_init; + } + vsi->flags = ICE_VSI_FLAG_INIT; rtnl_lock(); err = ice_vsi_cfg(vsi); rtnl_unlock(); if (err) - goto err_vsi_cfg; + goto unroll_pf_init; /* No need to take devl_lock, it's already taken by devlink API */ err = ice_load(pf); @@ -1265,7 +1273,9 @@ static int ice_devlink_reinit_up(struct ice_pf *pf) rtnl_lock(); ice_vsi_decfg(vsi); rtnl_unlock(); -err_vsi_cfg: +unroll_pf_init: + ice_deinit_pf(pf); +unroll_dev_init: ice_deinit_dev(pf); unroll_hw_init: ice_deinit_hw(&pf->hw); diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 0e58a58c23eb3..9a1abd4573372 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -1035,6 +1035,8 @@ void ice_unload(struct ice_pf *pf); void ice_adv_lnk_speed_maps_init(void); int ice_init_dev(struct ice_pf *pf); void ice_deinit_dev(struct ice_pf *pf); +int ice_init_pf(struct ice_pf *pf); +void ice_deinit_pf(struct ice_pf *pf); int ice_change_mtu(struct net_device *netdev, int new_mtu); void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue); int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 1691dda1067ed..a4acc42fabab4 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3949,7 +3949,7 @@ u16 ice_get_avail_rxq_count(struct ice_pf *pf) * ice_deinit_pf - Unrolls initialziations done by ice_init_pf * @pf: board private structure to initialize */ -static void ice_deinit_pf(struct ice_pf *pf) +void ice_deinit_pf(struct ice_pf *pf) { /* note that we unroll also on ice_init_pf() failure here */ @@ -4045,8 +4045,9 @@ void ice_start_service_task(struct ice_pf *pf) /** * ice_init_pf - Initialize general software structures (struct ice_pf) * @pf: board private structure to initialize + * Return: 0 on success, negative errno otherwise. */ -static int ice_init_pf(struct ice_pf *pf) +int ice_init_pf(struct ice_pf *pf) { struct udp_tunnel_nic_info *udp_tunnel_nic = &pf->hw.udp_tunnel_nic; struct device *dev = ice_pf_to_dev(pf); @@ -4772,23 +4773,12 @@ int ice_init_dev(struct ice_pf *pf) } ice_start_service_task(pf); - err = ice_init_pf(pf); - if (err) { - dev_err(dev, "ice_init_pf failed: %d\n", err); - goto unroll_irq_scheme_init; - } return 0; - -unroll_irq_scheme_init: - ice_service_task_stop(pf); - ice_clear_interrupt_scheme(pf); - return err; } void ice_deinit_dev(struct ice_pf *pf) { - ice_deinit_pf(pf); ice_deinit_hw(&pf->hw); ice_service_task_stop(pf); @@ -5030,21 +5020,28 @@ static void ice_deinit_devlink(struct ice_pf *pf) static int ice_init(struct ice_pf *pf) { + struct device *dev = ice_pf_to_dev(pf); int err; err = ice_init_dev(pf); if (err) return err; + err = ice_init_pf(pf); + if (err) { + dev_err(dev, "ice_init_pf failed: %d\n", err); + goto unroll_dev_init; + } + if (pf->hw.mac_type == ICE_MAC_E830) { err = pci_enable_ptm(pf->pdev, NULL); if (err) - dev_dbg(ice_pf_to_dev(pf), "PCIe PTM not supported by PCIe bus/controller\n"); + dev_dbg(dev, "PCIe PTM not supported by PCIe bus/controller\n"); } err = ice_alloc_vsis(pf); if (err) - goto err_alloc_vsis; + goto unroll_pf_init; err = ice_init_pf_sw(pf); if (err) @@ -5081,7 +5078,9 @@ static int ice_init(struct ice_pf *pf) ice_deinit_pf_sw(pf); err_init_pf_sw: ice_dealloc_vsis(pf); -err_alloc_vsis: +unroll_pf_init: + ice_deinit_pf(pf); +unroll_dev_init: ice_deinit_dev(pf); return err; } @@ -5093,6 +5092,7 @@ static void ice_deinit(struct ice_pf *pf) ice_deinit_pf_sw(pf); ice_dealloc_vsis(pf); + ice_deinit_pf(pf); ice_deinit_dev(pf); } From c2fb9398f73d41cb2b5da74ff505578525ee3fd8 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Fri, 12 Sep 2025 15:06:25 +0200 Subject: [PATCH 226/867] ice: extract ice_init_dev() from ice_init() Extract ice_init_dev() from ice_init(), to allow service task and IRQ scheme teardown to be put after clearing SW constructs in the subsequent commit. Signed-off-by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index a4acc42fabab4..9a817c3c8b99d 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5023,14 +5023,10 @@ static int ice_init(struct ice_pf *pf) struct device *dev = ice_pf_to_dev(pf); int err; - err = ice_init_dev(pf); - if (err) - return err; - err = ice_init_pf(pf); if (err) { dev_err(dev, "ice_init_pf failed: %d\n", err); - goto unroll_dev_init; + return err; } if (pf->hw.mac_type == ICE_MAC_E830) { @@ -5080,8 +5076,6 @@ static int ice_init(struct ice_pf *pf) ice_dealloc_vsis(pf); unroll_pf_init: ice_deinit_pf(pf); -unroll_dev_init: - ice_deinit_dev(pf); return err; } @@ -5093,7 +5087,6 @@ static void ice_deinit(struct ice_pf *pf) ice_deinit_pf_sw(pf); ice_dealloc_vsis(pf); ice_deinit_pf(pf); - ice_deinit_dev(pf); } /** @@ -5323,10 +5316,14 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) } pf->adapter = adapter; - err = ice_init(pf); + err = ice_init_dev(pf); if (err) goto unroll_adapter; + err = ice_init(pf); + if (err) + goto unroll_dev_init; + devl_lock(priv_to_devlink(pf)); err = ice_load(pf); if (err) @@ -5344,6 +5341,8 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) unroll_init: devl_unlock(priv_to_devlink(pf)); ice_deinit(pf); +unroll_dev_init: + ice_deinit_dev(pf); unroll_adapter: ice_adapter_put(pdev); unroll_hw_init: @@ -5457,6 +5456,7 @@ static void ice_remove(struct pci_dev *pdev) devl_unlock(priv_to_devlink(pf)); ice_deinit(pf); + ice_deinit_dev(pf); ice_vsi_release_all(pf); ice_setup_mc_magic_wake(pf); From 8a37f9e2ff40f4a4fa8def22febefe4daf58e573 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Fri, 12 Sep 2025 15:06:26 +0200 Subject: [PATCH 227/867] ice: move ice_deinit_dev() to the end of deinit paths ice_deinit_dev() takes care of turning off adminq processing, which is much needed during driver teardown (remove, reset, error path). Move it to the very end where applicable. For example, ice_deinit_hw() called after adminq deinit slows rmmod on my two-card setup by about 60 seconds. ice_init_dev() and ice_deinit_dev() scopes were reduced by previous commits of the series, with a final touch of extracting ice_init_dev_hw() out now (there is no deinit counterpart). Note that removed ice_service_task_stop() call from ice_remove() is placed in the ice_deinit_dev() (and stopping twice makes no sense). Signed-off-by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/ice/devlink/devlink.c | 5 +++- drivers/net/ethernet/intel/ice/ice.h | 1 + drivers/net/ethernet/intel/ice/ice_common.c | 3 +++ drivers/net/ethernet/intel/ice/ice_main.c | 23 ++++++++++++------- 4 files changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c index c354a03c950cd..938914abbe066 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -1233,6 +1233,7 @@ static int ice_devlink_reinit_up(struct ice_pf *pf) { struct ice_vsi *vsi = ice_get_main_vsi(pf); struct device *dev = ice_pf_to_dev(pf); + bool need_dev_deinit = false; int err; err = ice_init_hw(&pf->hw); @@ -1276,9 +1277,11 @@ static int ice_devlink_reinit_up(struct ice_pf *pf) unroll_pf_init: ice_deinit_pf(pf); unroll_dev_init: - ice_deinit_dev(pf); + need_dev_deinit = true; unroll_hw_init: ice_deinit_hw(&pf->hw); + if (need_dev_deinit) + ice_deinit_dev(pf); return err; } diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 9a1abd4573372..9ee596773f34e 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -1033,6 +1033,7 @@ void ice_start_service_task(struct ice_pf *pf); int ice_load(struct ice_pf *pf); void ice_unload(struct ice_pf *pf); void ice_adv_lnk_speed_maps_init(void); +void ice_init_dev_hw(struct ice_pf *pf); int ice_init_dev(struct ice_pf *pf); void ice_deinit_dev(struct ice_pf *pf); int ice_init_pf(struct ice_pf *pf); diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 2250426ec91b4..b097cc8b175cb 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1161,6 +1161,9 @@ int ice_init_hw(struct ice_hw *hw) status = ice_init_hw_tbls(hw); if (status) goto err_unroll_fltr_mgmt_struct; + + ice_init_dev_hw(hw->back); + mutex_init(&hw->tnl_lock); ice_init_chk_recipe_reuse_support(hw); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 9a817c3c8b99d..a1fe2d363adb0 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4742,9 +4742,8 @@ static void ice_decfg_netdev(struct ice_vsi *vsi) vsi->netdev = NULL; } -int ice_init_dev(struct ice_pf *pf) +void ice_init_dev_hw(struct ice_pf *pf) { - struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; int err; @@ -4764,6 +4763,12 @@ int ice_init_dev(struct ice_pf *pf) */ ice_set_safe_mode_caps(hw); } +} + +int ice_init_dev(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + int err; ice_set_pf_caps(pf); err = ice_init_interrupt_scheme(pf); @@ -5220,6 +5225,7 @@ static int ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) { struct device *dev = &pdev->dev; + bool need_dev_deinit = false; struct ice_adapter *adapter; struct ice_pf *pf; struct ice_hw *hw; @@ -5342,11 +5348,13 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) devl_unlock(priv_to_devlink(pf)); ice_deinit(pf); unroll_dev_init: - ice_deinit_dev(pf); + need_dev_deinit = true; unroll_adapter: ice_adapter_put(pdev); unroll_hw_init: ice_deinit_hw(hw); + if (need_dev_deinit) + ice_deinit_dev(pf); return err; } @@ -5441,10 +5449,6 @@ static void ice_remove(struct pci_dev *pdev) ice_hwmon_exit(pf); - ice_service_task_stop(pf); - ice_aq_cancel_waiting_tasks(pf); - set_bit(ICE_DOWN, pf->state); - if (!ice_is_safe_mode(pf)) ice_remove_arfs(pf); @@ -5456,13 +5460,16 @@ static void ice_remove(struct pci_dev *pdev) devl_unlock(priv_to_devlink(pf)); ice_deinit(pf); - ice_deinit_dev(pf); ice_vsi_release_all(pf); ice_setup_mc_magic_wake(pf); ice_set_wake(pf); ice_adapter_put(pdev); + + ice_deinit_dev(pf); + ice_aq_cancel_waiting_tasks(pf); + set_bit(ICE_DOWN, pf->state); } /** From 1390b8b3d2bef9bfbb852fc735430798bfca36e7 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Fri, 12 Sep 2025 15:06:27 +0200 Subject: [PATCH 228/867] ice: remove duplicate call to ice_deinit_hw() on error paths Current unwinding code on error paths of ice_devlink_reinit_up() and ice_probe() have manual call to ice_deinit_hw() (which is good, as there is also manual call to ice_hw_init() there), which is then duplicated (and was prior current series) in ice_deinit_dev(). Fix the above by removing ice_deinit_hw() from ice_deinit_dev(). Add a (now missing) call in ice_remove(). Reported-by: Jacob Keller Link: https://lore.kernel.org/intel-wired-lan/20250717-jk-ddp-safe-mode-issue-v1-1-e113b2baed79@intel.com/ Fixes: 4d3f59bfa2cd ("ice: split ice_init_hw() out from ice_init_dev()") Signed-off-by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index a1fe2d363adb0..1de3da7b3907d 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4784,7 +4784,6 @@ int ice_init_dev(struct ice_pf *pf) void ice_deinit_dev(struct ice_pf *pf) { - ice_deinit_hw(&pf->hw); ice_service_task_stop(pf); /* Service task is already stopped, so call reset directly. */ @@ -5466,6 +5465,7 @@ static void ice_remove(struct pci_dev *pdev) ice_set_wake(pf); ice_adapter_put(pdev); + ice_deinit_hw(&pf->hw); ice_deinit_dev(pf); ice_aq_cancel_waiting_tasks(pf); From 06d6322280d95757cef1e3ee0fc62c644629523e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 22 Oct 2025 05:39:45 +0000 Subject: [PATCH 229/867] neighbour: Use RCU list helpers for neigh_parms.list writers. We will convert RTM_GETNEIGHTBL to RCU soon, where we traverse tbl->parms_list under RCU in neightbl_dump_info(). Let's use RCU list helper for neigh_parms in neigh_parms_alloc() and neigh_parms_release(). neigh_table_init() uses the plain list_add() for the default neigh_parm that is embedded in the table and not yet published. Note that neigh_parms_release() already uses call_rcu() to free neigh_parms. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251022054004.2514876-2-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index bddfa389effa7..98428f60731bd 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1764,7 +1764,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, } write_lock_bh(&tbl->lock); - list_add(&p->list, &tbl->parms.list); + list_add_rcu(&p->list, &tbl->parms.list); write_unlock_bh(&tbl->lock); neigh_parms_data_state_cleanall(p); @@ -1786,7 +1786,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) if (!parms || parms == &tbl->parms) return; write_lock_bh(&tbl->lock); - list_del(&parms->list); + list_del_rcu(&parms->list); parms->dead = 1; write_unlock_bh(&tbl->lock); netdev_put(parms->dev, &parms->dev_tracker); From 35d7c70870338aa6a367b9e4ed528914320b0be0 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 22 Oct 2025 05:39:46 +0000 Subject: [PATCH 230/867] neighbour: Annotate access to neigh_parms fields. NEIGH_VAR() is read locklessly in the fast path, and IPv6 ndisc uses NEIGH_VAR_SET() locklessly. The next patch will convert neightbl_dump_info() to RCU. Let's annotate accesses to neigh_param with READ_ONCE() and WRITE_ONCE(). Note that ndisc_ifinfo_sysctl_change() uses &NEIGH_VAR() and we cannot use '&' with READ_ONCE(), so NEIGH_VAR_PTR() is introduced. Note also that NEIGH_VAR_INIT() does not need WRITE_ONCE() as it is before parms is published. Also, the only user hippi_neigh_setup_dev() is no longer called since commit e3804cbebb67 ("net: remove COMPAT_NET_DEV_OPS"), which looks wrong, but probably no one uses HIPPI and RoadRunner. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251022054004.2514876-3-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 15 ++++++++++++--- net/core/neighbour.c | 17 ++++++----------- net/ipv6/ndisc.c | 8 ++++---- 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 4a30bd458c5a9..998ff9eccebb7 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -92,15 +92,17 @@ struct neigh_parms { static inline void neigh_var_set(struct neigh_parms *p, int index, int val) { set_bit(index, p->data_state); - p->data[index] = val; + WRITE_ONCE(p->data[index], val); } -#define NEIGH_VAR(p, attr) ((p)->data[NEIGH_VAR_ ## attr]) +#define __NEIGH_VAR(p, attr) ((p)->data[NEIGH_VAR_ ## attr]) +#define NEIGH_VAR(p, attr) READ_ONCE(__NEIGH_VAR(p, attr)) +#define NEIGH_VAR_PTR(p, attr) (&(__NEIGH_VAR(p, attr))) /* In ndo_neigh_setup, NEIGH_VAR_INIT should be used. * In other cases, NEIGH_VAR_SET should be used. */ -#define NEIGH_VAR_INIT(p, attr, val) (NEIGH_VAR(p, attr) = val) +#define NEIGH_VAR_INIT(p, attr, val) (__NEIGH_VAR(p, attr) = val) #define NEIGH_VAR_SET(p, attr, val) neigh_var_set(p, NEIGH_VAR_ ## attr, val) static inline void neigh_parms_data_state_setall(struct neigh_parms *p) @@ -378,6 +380,13 @@ struct net *neigh_parms_net(const struct neigh_parms *parms) unsigned long neigh_rand_reach_time(unsigned long base); +static inline void neigh_set_reach_time(struct neigh_parms *p) +{ + unsigned long base = NEIGH_VAR(p, BASE_REACHABLE_TIME); + + WRITE_ONCE(p->reachable_time, neigh_rand_reach_time(base)); +} + void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, struct sk_buff *skb); struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl, struct net *net, diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 98428f60731bd..5bbebbfcba43b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -995,8 +995,7 @@ static void neigh_periodic_work(struct work_struct *work) WRITE_ONCE(tbl->last_rand, jiffies); list_for_each_entry(p, &tbl->parms_list, list) - p->reachable_time = - neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); + neigh_set_reach_time(p); } if (atomic_read(&tbl->entries) < READ_ONCE(tbl->gc_thresh1)) @@ -1749,8 +1748,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, if (p) { p->tbl = tbl; refcount_set(&p->refcnt, 1); - p->reachable_time = - neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); + neigh_set_reach_time(p); p->qlen = 0; netdev_hold(dev, &p->dev_tracker, GFP_KERNEL); p->dev = dev; @@ -1810,8 +1808,7 @@ void neigh_table_init(int index, struct neigh_table *tbl) list_add(&tbl->parms.list, &tbl->parms_list); write_pnet(&tbl->parms.net, &init_net); refcount_set(&tbl->parms.refcnt, 1); - tbl->parms.reachable_time = - neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME)); + neigh_set_reach_time(&tbl->parms); tbl->parms.qlen = 0; tbl->stats = alloc_percpu(struct neigh_statistics); @@ -2194,7 +2191,7 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) NEIGH_VAR(parms, MCAST_PROBES)) || nla_put_u32(skb, NDTPA_MCAST_REPROBES, NEIGH_VAR(parms, MCAST_REPROBES)) || - nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time, + nla_put_msecs(skb, NDTPA_REACHABLE_TIME, READ_ONCE(parms->reachable_time), NDTPA_PAD) || nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) || @@ -2475,8 +2472,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, * only be effective after the next time neigh_periodic_work * decides to recompute it (can be multiple minutes) */ - p->reachable_time = - neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); + neigh_set_reach_time(p); break; case NDTPA_GC_STALETIME: NEIGH_VAR_SET(p, GC_STALETIME, @@ -3721,8 +3717,7 @@ static int neigh_proc_base_reachable_time(const struct ctl_table *ctl, int write * only be effective after the next time neigh_periodic_work * decides to recompute it */ - p->reachable_time = - neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); + neigh_set_reach_time(p); } return ret; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f427e41e9c49b..59d17b6f06bfd 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1449,7 +1449,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) BASE_REACHABLE_TIME, rtime); NEIGH_VAR_SET(in6_dev->nd_parms, GC_STALETIME, 3 * rtime); - in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime); + neigh_set_reach_time(in6_dev->nd_parms); in6_dev->tstamp = jiffies; send_ifinfo_notify = true; } @@ -1948,9 +1948,9 @@ int ndisc_ifinfo_sysctl_change(const struct ctl_table *ctl, int write, void *buf ret = -1; if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) { - if (ctl->data == &NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME)) - idev->nd_parms->reachable_time = - neigh_rand_reach_time(NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME)); + if (ctl->data == NEIGH_VAR_PTR(idev->nd_parms, BASE_REACHABLE_TIME)) + neigh_set_reach_time(idev->nd_parms); + WRITE_ONCE(idev->tstamp, jiffies); inet6_ifinfo_notify(RTM_NEWLINK, idev); in6_dev_put(idev); From 4ae34be500649ec452ac1fc2748958683ad9b55d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 22 Oct 2025 05:39:47 +0000 Subject: [PATCH 231/867] neighbour: Convert RTM_GETNEIGHTBL to RCU. neightbl_dump_info() calls these functions for each neigh_tables[] entry: 1. neightbl_fill_info() for tbl->parms 2. neightbl_fill_param_info() for tbl->parms_list (except tbl->parms) Both functions rely on the table lock (read_lock_bh(&tbl->lock)) and RTNL is not needed. Let's fetch the table under RCU and convert RTM_GETNEIGHTBL to RCU. Note that the first entry of tbl->parms_list is tbl->parms.list and embedded in neigh_table, so list_next_entry() is safe. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251022054004.2514876-4-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5bbebbfcba43b..a660723476fd1 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2176,7 +2176,7 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) return -ENOBUFS; if ((parms->dev && - nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) || + nla_put_u32(skb, NDTPA_IFINDEX, READ_ONCE(parms->dev->ifindex))) || nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) || nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, NEIGH_VAR(parms, QUEUE_LEN_BYTES)) || @@ -2228,8 +2228,6 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, return -EMSGSIZE; ndtmsg = nlmsg_data(nlh); - - read_lock_bh(&tbl->lock); ndtmsg->ndtm_family = tbl->family; ndtmsg->ndtm_pad1 = 0; ndtmsg->ndtm_pad2 = 0; @@ -2255,11 +2253,9 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, .ndtc_proxy_qlen = READ_ONCE(tbl->proxy_queue.qlen), }; - rcu_read_lock(); nht = rcu_dereference(tbl->nht); ndc.ndtc_hash_rnd = nht->hash_rnd[0]; ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); - rcu_read_unlock(); if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc)) goto nla_put_failure; @@ -2297,12 +2293,10 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, if (neightbl_fill_parms(skb, &tbl->parms) < 0) goto nla_put_failure; - read_unlock_bh(&tbl->lock); nlmsg_end(skb, nlh); return 0; nla_put_failure: - read_unlock_bh(&tbl->lock); nlmsg_cancel(skb, nlh); return -EMSGSIZE; } @@ -2321,8 +2315,6 @@ static int neightbl_fill_param_info(struct sk_buff *skb, return -EMSGSIZE; ndtmsg = nlmsg_data(nlh); - - read_lock_bh(&tbl->lock); ndtmsg->ndtm_family = tbl->family; ndtmsg->ndtm_pad1 = 0; ndtmsg->ndtm_pad2 = 0; @@ -2331,11 +2323,9 @@ static int neightbl_fill_param_info(struct sk_buff *skb, neightbl_fill_parms(skb, parms) < 0) goto errout; - read_unlock_bh(&tbl->lock); nlmsg_end(skb, nlh); return 0; errout: - read_unlock_bh(&tbl->lock); nlmsg_cancel(skb, nlh); return -EMSGSIZE; } @@ -2575,10 +2565,12 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; + rcu_read_lock(); + for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { struct neigh_parms *p; - tbl = rcu_dereference_rtnl(neigh_tables[tidx]); + tbl = rcu_dereference(neigh_tables[tidx]); if (!tbl) continue; @@ -2592,7 +2584,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) nidx = 0; p = list_next_entry(&tbl->parms, list); - list_for_each_entry_from(p, &tbl->parms_list, list) { + list_for_each_entry_from_rcu(p, &tbl->parms_list, list) { if (!net_eq(neigh_parms_net(p), net)) continue; @@ -2612,6 +2604,8 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) neigh_skip = 0; } out: + rcu_read_unlock(); + cb->args[0] = tidx; cb->args[1] = nidx; @@ -3913,7 +3907,8 @@ static const struct rtnl_msg_handler neigh_rtnl_msg_handlers[] __initconst = { {.msgtype = RTM_DELNEIGH, .doit = neigh_delete}, {.msgtype = RTM_GETNEIGH, .doit = neigh_get, .dumpit = neigh_dump_info, .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, - {.msgtype = RTM_GETNEIGHTBL, .dumpit = neightbl_dump_info}, + {.msgtype = RTM_GETNEIGHTBL, .dumpit = neightbl_dump_info, + .flags = RTNL_FLAG_DUMP_UNLOCKED}, {.msgtype = RTM_SETNEIGHTBL, .doit = neightbl_set}, }; From 55a6046b48a8c7d3ea164d9b68902b4ea6930cf3 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 22 Oct 2025 05:39:48 +0000 Subject: [PATCH 232/867] neighbour: Convert RTM_SETNEIGHTBL to RCU. neightbl_set() fetches neigh_tables[] and updates attributes under write_lock_bh(&tbl->lock), so RTNL is not needed. neigh_table_clear() synchronises RCU only, and rcu_dereference_rtnl() protects nothing here. If we released RCU after fetching neigh_tables[], there would be no synchronisation to block neigh_table_clear() further, so RCU is held until the end of the function. Another option would be to protect neigh_tables[] user with SRCU and add synchronize_srcu() in neigh_table_clear(). But, holding RCU should be fine as we hold write_lock_bh() for the rest of neightbl_set() anyway. Let's perform RTM_SETNEIGHTBL under RCU and drop RTNL. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251022054004.2514876-5-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index a660723476fd1..6d2164b4d9998 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2362,9 +2362,9 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); + struct nlattr *tb[NDTA_MAX + 1]; struct neigh_table *tbl; struct ndtmsg *ndtmsg; - struct nlattr *tb[NDTA_MAX+1]; bool found = false; int err, tidx; @@ -2380,20 +2380,27 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, ndtmsg = nlmsg_data(nlh); + rcu_read_lock(); + for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { - tbl = rcu_dereference_rtnl(neigh_tables[tidx]); + tbl = rcu_dereference(neigh_tables[tidx]); if (!tbl) continue; + if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family) continue; + if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) { found = true; break; } } - if (!found) - return -ENOENT; + if (!found) { + rcu_read_unlock(); + err = -ENOENT; + goto errout; + } /* * We acquire tbl->lock to be nice to the periodic timers and @@ -2519,6 +2526,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, errout_tbl_lock: write_unlock_bh(&tbl->lock); + rcu_read_unlock(); errout: return err; } @@ -3909,7 +3917,8 @@ static const struct rtnl_msg_handler neigh_rtnl_msg_handlers[] __initconst = { .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, {.msgtype = RTM_GETNEIGHTBL, .dumpit = neightbl_dump_info, .flags = RTNL_FLAG_DUMP_UNLOCKED}, - {.msgtype = RTM_SETNEIGHTBL, .doit = neightbl_set}, + {.msgtype = RTM_SETNEIGHTBL, .doit = neightbl_set, + .flags = RTNL_FLAG_DOIT_UNLOCKED}, }; static int __init neigh_init(void) From 3064d0fe02af23a3956d2b690461abb44da88cf4 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 22 Oct 2025 05:39:49 +0000 Subject: [PATCH 233/867] neighbour: Convert rwlock of struct neigh_table to spinlock. Only neigh_for_each() and neigh_seq_start/stop() are on the reader side of neigh_table.lock. Let's convert rwlock to the plain spinlock. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251022054004.2514876-6-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 2 +- net/atm/clip.c | 4 +-- net/core/neighbour.c | 68 +++++++++++++++++++++-------------------- net/ipv4/arp.c | 4 +-- 4 files changed, 40 insertions(+), 38 deletions(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 998ff9eccebb7..2dfee6d4258af 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -238,7 +238,7 @@ struct neigh_table { atomic_t gc_entries; struct list_head gc_list; struct list_head managed_list; - rwlock_t lock; + spinlock_t lock; unsigned long last_rand; struct neigh_statistics __percpu *stats; struct neigh_hash_table __rcu *nht; diff --git a/net/atm/clip.c b/net/atm/clip.c index f7a5565e794ef..8f152e5fa6594 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -168,10 +168,10 @@ static int neigh_check_cb(struct neighbour *n) static void idle_timer_check(struct timer_list *unused) { - write_lock(&arp_tbl.lock); + spin_lock(&arp_tbl.lock); __neigh_for_each_release(&arp_tbl, neigh_check_cb); mod_timer(&idle_timer, jiffies + CLIP_CHECK_INTERVAL * HZ); - write_unlock(&arp_tbl.lock); + spin_unlock(&arp_tbl.lock); } static int clip_arp_rcv(struct sk_buff *skb) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 6d2164b4d9998..96a3b1a93252a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -81,7 +81,7 @@ static struct hlist_head *neigh_get_dev_table(struct net_device *dev, int family } /* - Neighbour hash table buckets are protected with rwlock tbl->lock. + Neighbour hash table buckets are protected with tbl->lock. - All the scans/updates to hash buckets MUST be made under this lock. - NOTHING clever should be made under this lock: no callbacks @@ -149,7 +149,7 @@ static void neigh_update_gc_list(struct neighbour *n) { bool on_gc_list, exempt_from_gc; - write_lock_bh(&n->tbl->lock); + spin_lock_bh(&n->tbl->lock); write_lock(&n->lock); if (n->dead) goto out; @@ -172,14 +172,14 @@ static void neigh_update_gc_list(struct neighbour *n) } out: write_unlock(&n->lock); - write_unlock_bh(&n->tbl->lock); + spin_unlock_bh(&n->tbl->lock); } static void neigh_update_managed_list(struct neighbour *n) { bool on_managed_list, add_to_managed; - write_lock_bh(&n->tbl->lock); + spin_lock_bh(&n->tbl->lock); write_lock(&n->lock); if (n->dead) goto out; @@ -193,7 +193,7 @@ static void neigh_update_managed_list(struct neighbour *n) list_add_tail(&n->managed_list, &n->tbl->managed_list); out: write_unlock(&n->lock); - write_unlock_bh(&n->tbl->lock); + spin_unlock_bh(&n->tbl->lock); } static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify, @@ -263,7 +263,7 @@ static int neigh_forced_gc(struct neigh_table *tbl) NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); - write_lock_bh(&tbl->lock); + spin_lock_bh(&tbl->lock); list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) { if (refcount_read(&n->refcnt) == 1) { @@ -292,7 +292,7 @@ static int neigh_forced_gc(struct neigh_table *tbl) WRITE_ONCE(tbl->last_flush, jiffies); unlock: - write_unlock_bh(&tbl->lock); + spin_unlock_bh(&tbl->lock); return shrunk; } @@ -454,23 +454,23 @@ static void neigh_flush_table(struct neigh_table *tbl) void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) { - write_lock_bh(&tbl->lock); + spin_lock_bh(&tbl->lock); neigh_flush_dev(tbl, dev, false); - write_unlock_bh(&tbl->lock); + spin_unlock_bh(&tbl->lock); } EXPORT_SYMBOL(neigh_changeaddr); static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, bool skip_perm) { - write_lock_bh(&tbl->lock); + spin_lock_bh(&tbl->lock); if (likely(dev)) { neigh_flush_dev(tbl, dev, skip_perm); } else { DEBUG_NET_WARN_ON_ONCE(skip_perm); neigh_flush_table(tbl); } - write_unlock_bh(&tbl->lock); + spin_unlock_bh(&tbl->lock); pneigh_ifdown(tbl, dev, skip_perm); pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL, @@ -687,7 +687,7 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey, n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1); - write_lock_bh(&tbl->lock); + spin_lock_bh(&tbl->lock); nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); @@ -722,13 +722,13 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey, hlist_add_head_rcu(&n->dev_list, neigh_get_dev_table(dev, tbl->family)); - write_unlock_bh(&tbl->lock); + spin_unlock_bh(&tbl->lock); neigh_dbg(2, "neigh %p is created\n", n); rc = n; out: return rc; out_tbl_unlock: - write_unlock_bh(&tbl->lock); + spin_unlock_bh(&tbl->lock); out_neigh_release: if (!exempt_from_gc) atomic_dec(&tbl->gc_entries); @@ -982,7 +982,7 @@ static void neigh_periodic_work(struct work_struct *work) NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); - write_lock_bh(&tbl->lock); + spin_lock_bh(&tbl->lock); nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); @@ -1036,9 +1036,9 @@ static void neigh_periodic_work(struct work_struct *work) * It's fine to release lock here, even if hash table * grows while we are preempted. */ - write_unlock_bh(&tbl->lock); + spin_unlock_bh(&tbl->lock); cond_resched(); - write_lock_bh(&tbl->lock); + spin_lock_bh(&tbl->lock); nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); } @@ -1049,7 +1049,7 @@ static void neigh_periodic_work(struct work_struct *work) */ queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1); - write_unlock_bh(&tbl->lock); + spin_unlock_bh(&tbl->lock); } static __inline__ int neigh_max_probes(struct neighbour *n) @@ -1641,12 +1641,12 @@ static void neigh_managed_work(struct work_struct *work) managed_work.work); struct neighbour *neigh; - write_lock_bh(&tbl->lock); + spin_lock_bh(&tbl->lock); list_for_each_entry(neigh, &tbl->managed_list, managed_list) neigh_event_send_probe(neigh, NULL, false); queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, NEIGH_VAR(&tbl->parms, INTERVAL_PROBE_TIME_MS)); - write_unlock_bh(&tbl->lock); + spin_unlock_bh(&tbl->lock); } static void neigh_proxy_process(struct timer_list *t) @@ -1761,9 +1761,9 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, return NULL; } - write_lock_bh(&tbl->lock); + spin_lock_bh(&tbl->lock); list_add_rcu(&p->list, &tbl->parms.list); - write_unlock_bh(&tbl->lock); + spin_unlock_bh(&tbl->lock); neigh_parms_data_state_cleanall(p); } @@ -1783,10 +1783,12 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) { if (!parms || parms == &tbl->parms) return; - write_lock_bh(&tbl->lock); + + spin_lock_bh(&tbl->lock); list_del_rcu(&parms->list); parms->dead = 1; - write_unlock_bh(&tbl->lock); + spin_unlock_bh(&tbl->lock); + netdev_put(parms->dev, &parms->dev_tracker); call_rcu(&parms->rcu_head, neigh_rcu_free_parms); } @@ -1835,7 +1837,7 @@ void neigh_table_init(int index, struct neigh_table *tbl) else WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN); - rwlock_init(&tbl->lock); + spin_lock_init(&tbl->lock); mutex_init(&tbl->phash_lock); INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); @@ -1978,10 +1980,10 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, err = __neigh_update(neigh, NULL, NUD_FAILED, NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN, NETLINK_CB(skb).portid, extack); - write_lock_bh(&tbl->lock); + spin_lock_bh(&tbl->lock); neigh_release(neigh); neigh_remove_one(neigh); - write_unlock_bh(&tbl->lock); + spin_unlock_bh(&tbl->lock); out: return err; @@ -2406,7 +2408,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, * We acquire tbl->lock to be nice to the periodic timers and * make sure they always see a consistent set of values. */ - write_lock_bh(&tbl->lock); + spin_lock_bh(&tbl->lock); if (tb[NDTA_PARMS]) { struct nlattr *tbp[NDTPA_MAX+1]; @@ -2525,7 +2527,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, err = 0; errout_tbl_lock: - write_unlock_bh(&tbl->lock); + spin_unlock_bh(&tbl->lock); rcu_read_unlock(); errout: return err; @@ -3125,14 +3127,14 @@ void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void rcu_read_lock(); nht = rcu_dereference(tbl->nht); - read_lock_bh(&tbl->lock); /* avoid resizes */ + spin_lock_bh(&tbl->lock); /* avoid resizes */ for (chain = 0; chain < (1 << nht->hash_shift); chain++) { struct neighbour *n; neigh_for_each_in_bucket(n, &nht->hash_heads[chain]) cb(n, cookie); } - read_unlock_bh(&tbl->lock); + spin_unlock_bh(&tbl->lock); rcu_read_unlock(); } EXPORT_SYMBOL(neigh_for_each); @@ -3402,7 +3404,7 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl rcu_read_lock(); state->nht = rcu_dereference(tbl->nht); - read_lock_bh(&tbl->lock); + spin_lock_bh(&tbl->lock); return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN; } @@ -3442,7 +3444,7 @@ void neigh_seq_stop(struct seq_file *seq, void *v) struct neigh_seq_state *state = seq->private; struct neigh_table *tbl = state->tbl; - read_unlock_bh(&tbl->lock); + spin_unlock_bh(&tbl->lock); rcu_read_unlock(); } EXPORT_SYMBOL(neigh_seq_stop); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 833f2cf97178e..f3bfecf8a2341 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1217,10 +1217,10 @@ int arp_invalidate(struct net_device *dev, __be32 ip, bool force) err = neigh_update(neigh, NULL, NUD_FAILED, NEIGH_UPDATE_F_OVERRIDE| NEIGH_UPDATE_F_ADMIN, 0); - write_lock_bh(&tbl->lock); + spin_lock_bh(&tbl->lock); neigh_release(neigh); neigh_remove_one(neigh); - write_unlock_bh(&tbl->lock); + spin_unlock_bh(&tbl->lock); } return err; From 13cb6ac5b506d510d5d57128ff9d99541aae886e Mon Sep 17 00:00:00 2001 From: Alessandro Zanni Date: Thu, 23 Oct 2025 22:53:52 +0200 Subject: [PATCH 234/867] selftest: net: prevent use of uninitialized variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix to avoid the usage of the `ret` variable uninitialized in the following macro expansions. It solves the following warning: In file included from netlink-dumps.c:21: netlink-dumps.c: In function ‘dump_extack’: ../kselftest_harness.h:788:35: warning: ‘ret’ may be used uninitialized [-Wmaybe-uninitialized] 788 | intmax_t __exp_print = (intmax_t)__exp; \ | ^~~~~~~~~~~ ../kselftest_harness.h:631:9: note: in expansion of macro ‘__EXPECT’ 631 | __EXPECT(expected, #expected, seen, #seen, ==, 0) | ^~~~~~~~ netlink-dumps.c:169:9: note: in expansion of macro ‘EXPECT_EQ’ 169 | EXPECT_EQ(ret, FOUND_EXTACK); | ^~~~~~~~~ The issue can be reproduced, building the tests, with the command: make -C tools/testing/selftests TARGETS=net Signed-off-by: Alessandro Zanni Link: https://patch.msgid.link/20251023205354.28249-1-alessandro.zanni87@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/netlink-dumps.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/netlink-dumps.c b/tools/testing/selftests/net/netlink-dumps.c index 7618ebe528a4c..679b6c77ace7c 100644 --- a/tools/testing/selftests/net/netlink-dumps.c +++ b/tools/testing/selftests/net/netlink-dumps.c @@ -143,6 +143,7 @@ TEST(dump_extack) EXPECT_EQ(n, -1); EXPECT_EQ(errno, ENOBUFS); + ret = NO_CTRL; for (i = 0; i < cnt; i++) { struct ext_ack ea = {}; From d0d2203b9ab71b69c22c8ee1d60c51f9561426a3 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 23 Oct 2025 03:30:51 +0200 Subject: [PATCH 235/867] strparser: fix typo in comment The name frags_list doesn't appear in the kernel. It should be frag_list as in the next sentence. Signed-off-by: Julia Lawall Link: https://patch.msgid.link/20251023013051.1728388-1-Julia.Lawall@inria.fr Signed-off-by: Jakub Kicinski --- net/strparser/strparser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 43b1f558b33db..b929c1cd85e0d 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -127,7 +127,7 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, } if (!strp->skb_nextp) { - /* We are going to append to the frags_list of head. + /* We are going to append to the frag_list of head. * Need to unshare the frag_list. */ err = skb_unclone(head, GFP_ATOMIC); From f0773d0b41b492c2ffef92ae74edf65b1d84367e Mon Sep 17 00:00:00 2001 From: Dust Li Date: Thu, 23 Oct 2025 10:00:12 +0800 Subject: [PATCH 236/867] smc: rename smc_find_ism_store_rc to reflect broader usage The function smc_find_ism_store_rc() is used to record the reason why a suitable device (either ISM or RDMA) could not be found. However, its name suggests it is ISM-specific, which is misleading. Rename it to better reflect its actual usage. No functional changes. Signed-off-by: Dust Li Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251023020012.69609-1-dust.li@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 77b99e8ef35a4..e9d0e62e0b1bc 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -2140,7 +2140,7 @@ static void smc_check_ism_v2_match(struct smc_init_info *ini, } } -static void smc_find_ism_store_rc(u32 rc, struct smc_init_info *ini) +static void smc_init_info_store_rc(u32 rc, struct smc_init_info *ini) { if (!ini->rc) ini->rc = rc; @@ -2203,7 +2203,7 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, mutex_unlock(&smcd_dev_list.mutex); if (!ini->ism_dev[0]) { - smc_find_ism_store_rc(SMC_CLC_DECL_NOSMCD2DEV, ini); + smc_init_info_store_rc(SMC_CLC_DECL_NOSMCD2DEV, ini); goto not_found; } @@ -2220,7 +2220,7 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, ini->ism_selected = i; rc = smc_listen_ism_init(new_smc, ini); if (rc) { - smc_find_ism_store_rc(rc, ini); + smc_init_info_store_rc(rc, ini); /* try next active ISM device */ continue; } @@ -2260,7 +2260,7 @@ static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc, return; /* V1 ISM device found */ not_found: - smc_find_ism_store_rc(rc, ini); + smc_init_info_store_rc(rc, ini); ini->smcd_version &= ~SMC_V1; ini->ism_dev[0] = NULL; ini->is_smcd = false; @@ -2311,7 +2311,7 @@ static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc, ini->smcrv2.daddr = smc_ib_gid_to_ipv4(smc_v2_ext->roce); rc = smc_find_rdma_device(new_smc, ini); if (rc) { - smc_find_ism_store_rc(rc, ini); + smc_init_info_store_rc(rc, ini); goto not_found; } if (!ini->smcrv2.uses_gateway) @@ -2328,7 +2328,7 @@ static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc, if (!rc) return; ini->smcr_version = smcr_version; - smc_find_ism_store_rc(rc, ini); + smc_init_info_store_rc(rc, ini); not_found: ini->smcr_version &= ~SMC_V2; @@ -2375,7 +2375,7 @@ static int smc_listen_find_device(struct smc_sock *new_smc, /* check for matching IP prefix and subnet length (V1) */ prfx_rc = smc_listen_prfx_check(new_smc, pclc); if (prfx_rc) - smc_find_ism_store_rc(prfx_rc, ini); + smc_init_info_store_rc(prfx_rc, ini); /* get vlan id from IP device */ if (smc_vlan_by_tcpsk(new_smc->clcsock, ini)) @@ -2402,7 +2402,7 @@ static int smc_listen_find_device(struct smc_sock *new_smc, int rc; rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini); - smc_find_ism_store_rc(rc, ini); + smc_init_info_store_rc(rc, ini); return (!rc) ? 0 : ini->rc; } return prfx_rc; From 330ce8ffc1848cbfa3e06c2c22750cfffa115579 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 23 Oct 2025 10:16:30 +0100 Subject: [PATCH 237/867] net: phy: add phy_can_wakeup() Add phy_can_wakeup() to report whether the PHY driver has marked the PHY device as being wake-up capable as far as the driver model is concerned. Reviewed-by: Maxime Chevallier Reviewed-by: Florian Fainelli Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vBrQs-0000000BLzI-0w3U@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/linux/phy.h b/include/linux/phy.h index 3c7634482356e..3eeeaec528320 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1379,6 +1379,18 @@ static inline void phy_disable_eee_mode(struct phy_device *phydev, u32 link_mode linkmode_clear_bit(link_mode, phydev->advertising_eee); } +/** + * phy_can_wakeup() - indicate whether PHY has driver model wakeup capabilities + * @phydev: The phy_device struct + * + * Returns: true/false depending on the PHY driver's device_set_wakeup_capable() + * setting. + */ +static inline bool phy_can_wakeup(struct phy_device *phydev) +{ + return device_can_wakeup(&phydev->mdio.dev); +} + void phy_resolve_aneg_pause(struct phy_device *phydev); void phy_resolve_aneg_linkmode(struct phy_device *phydev); From b344bfacf1de2dd776a218ce8341b9c672745a01 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 23 Oct 2025 10:16:35 +0100 Subject: [PATCH 238/867] net: phy: add phy_may_wakeup() Add phy_may_wakeup() which uses the driver model's device_may_wakeup() when the PHY driver has marked the device as wakeup capable in the driver model, otherwise use phy_drv_wol_enabled(). Replace the sites that used to call phy_drv_wol_enabled() with this as checking the driver model will be more efficient than checking the WoL state. Export phy_may_wakeup() so that phylink can use it. Reviewed-by: Maxime Chevallier Reviewed-by: Florian Fainelli Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vBrQx-0000000BLzO-1RLt@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_device.c | 14 ++++++++++++-- include/linux/phy.h | 9 +++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 7a67c900e79a5..b7feaf0cb1df1 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -251,6 +251,16 @@ static bool phy_drv_wol_enabled(struct phy_device *phydev) return wol.wolopts != 0; } +bool phy_may_wakeup(struct phy_device *phydev) +{ + /* If the PHY is using driver-model based wakeup, use that state. */ + if (phy_can_wakeup(phydev)) + return device_may_wakeup(&phydev->mdio.dev); + + return phy_drv_wol_enabled(phydev); +} +EXPORT_SYMBOL_GPL(phy_may_wakeup); + static void phy_link_change(struct phy_device *phydev, bool up) { struct net_device *netdev = phydev->attached_dev; @@ -302,7 +312,7 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) /* If the PHY on the mido bus is not attached but has WOL enabled * we cannot suspend the PHY. */ - if (!netdev && phy_drv_wol_enabled(phydev)) + if (!netdev && phy_may_wakeup(phydev)) return false; /* PHY not attached? May suspend if the PHY has not already been @@ -1909,7 +1919,7 @@ int phy_suspend(struct phy_device *phydev) if (phydev->suspended || !phydrv) return 0; - phydev->wol_enabled = phy_drv_wol_enabled(phydev) || + phydev->wol_enabled = phy_may_wakeup(phydev) || (netdev && netdev->ethtool->wol_enabled); /* If the device has WOL enabled, we cannot suspend the PHY */ if (phydev->wol_enabled && !(phydrv->flags & PHY_ALWAYS_CALL_SUSPEND)) diff --git a/include/linux/phy.h b/include/linux/phy.h index 3eeeaec528320..17a2cdc9f1a0f 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1391,6 +1391,15 @@ static inline bool phy_can_wakeup(struct phy_device *phydev) return device_can_wakeup(&phydev->mdio.dev); } +/** + * phy_may_wakeup() - indicate whether PHY has wakeup enabled + * @phydev: The phy_device struct + * + * Returns: true/false depending on the PHY driver's device_set_wakeup_enabled() + * setting if using the driver model, otherwise the legacy determination. + */ +bool phy_may_wakeup(struct phy_device *phydev); + void phy_resolve_aneg_pause(struct phy_device *phydev); void phy_resolve_aneg_linkmode(struct phy_device *phydev); From b79fbd86c84918790c128e6899b420de4667018e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 23 Oct 2025 10:16:40 +0100 Subject: [PATCH 239/867] net: phylink: add phylink managed MAC Wake-on-Lan support Add core phylink managed Wake-on-Lan support, which is enabled when the MAC driver fills in the new .mac_wol_set() method that this commit creates. When this feature is disabled, phylink acts as it has in the past, merely passing the ethtool WoL calls to phylib whenever a PHY exists. No other new functionality provided by this commit is enabled. When this feature is enabled, a more inteligent approach is used. Phylink will first pass WoL options to the PHY, read them back, and attempt to set any options that were not set at the PHY at the MAC. Since we have PHY drivers that report they support WoL, and accept WoL configuration even though they aren't wired up to be capable of waking the system, we need a way to differentiate between PHYs that think they support WoL and those which actually do. As PHY drivers do not make use of the driver model's wake-up infrastructure, but could, we use this to determine whether PHY drivers can participate. This gives a path forward where, as MAC drivers are converted to this, it encourages PHY drivers to also be converted. Phylink will also ignore the mac_wol argument to phylink_suspend() as it now knows the WoL state at the MAC. MAC drivers are expected to record/configure the Wake-on-Lan state in their .mac_set_wol() method, and deal appropriately with it in their suspend/resume methods. The driver model provides assistance to set the IRQ wake support which may assist driver authors in achieving the necessary configuration. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vBrR2-0000000BLzU-1xYL@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 80 +++++++++++++++++++++++++++++++++++++-- include/linux/phylink.h | 26 +++++++++++++ 2 files changed, 102 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 9d7799ea1c173..bec44ebdf80ba 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -93,6 +93,9 @@ struct phylink { u8 sfp_port; struct eee_config eee_cfg; + + u32 wolopts_mac; + u8 wol_sopass[SOPASS_MAX]; }; #define phylink_printk(level, pl, fmt, ...) \ @@ -2562,6 +2565,17 @@ void phylink_rx_clk_stop_unblock(struct phylink *pl) } EXPORT_SYMBOL_GPL(phylink_rx_clk_stop_unblock); +static bool phylink_mac_supports_wol(struct phylink *pl) +{ + return !!pl->mac_ops->mac_wol_set; +} + +static bool phylink_phy_supports_wol(struct phylink *pl, + struct phy_device *phydev) +{ + return phydev && (pl->config->wol_phy_legacy || phy_can_wakeup(phydev)); +} + /** * phylink_suspend() - handle a network device suspend event * @pl: a pointer to a &struct phylink returned from phylink_create() @@ -2575,11 +2589,17 @@ EXPORT_SYMBOL_GPL(phylink_rx_clk_stop_unblock); * can also bring down the link between the MAC and PHY. * - If Wake-on-Lan is active, but being handled by the MAC, the MAC * still needs to receive packets, so we can not bring the link down. + * + * Note: when phylink managed Wake-on-Lan is in use, @mac_wol is ignored. + * (struct phylink_mac_ops.mac_set_wol populated.) */ void phylink_suspend(struct phylink *pl, bool mac_wol) { ASSERT_RTNL(); + if (phylink_mac_supports_wol(pl)) + mac_wol = !!pl->wolopts_mac; + if (mac_wol && (!pl->netdev || pl->netdev->ethtool->wol_enabled)) { /* Wake-on-Lan enabled, MAC handling */ mutex_lock(&pl->state_mutex); @@ -2689,8 +2709,24 @@ void phylink_ethtool_get_wol(struct phylink *pl, struct ethtool_wolinfo *wol) wol->supported = 0; wol->wolopts = 0; - if (pl->phydev) - phy_ethtool_get_wol(pl->phydev, wol); + if (phylink_mac_supports_wol(pl)) { + if (phylink_phy_supports_wol(pl, pl->phydev)) + phy_ethtool_get_wol(pl->phydev, wol); + + /* Where the MAC augments the WoL support, merge its support and + * current configuration. + */ + if (~wol->wolopts & pl->wolopts_mac & WAKE_MAGICSECURE) + memcpy(wol->sopass, pl->wol_sopass, + sizeof(wol->sopass)); + + wol->supported |= pl->config->wol_mac_support; + wol->wolopts |= pl->wolopts_mac; + } else { + /* Legacy */ + if (pl->phydev) + phy_ethtool_get_wol(pl->phydev, wol); + } } EXPORT_SYMBOL_GPL(phylink_ethtool_get_wol); @@ -2707,12 +2743,48 @@ EXPORT_SYMBOL_GPL(phylink_ethtool_get_wol); */ int phylink_ethtool_set_wol(struct phylink *pl, struct ethtool_wolinfo *wol) { + struct ethtool_wolinfo w = { .cmd = ETHTOOL_GWOL }; int ret = -EOPNOTSUPP; + bool changed; + u32 wolopts; ASSERT_RTNL(); - if (pl->phydev) - ret = phy_ethtool_set_wol(pl->phydev, wol); + if (phylink_mac_supports_wol(pl)) { + wolopts = wol->wolopts; + + if (phylink_phy_supports_wol(pl, pl->phydev)) { + ret = phy_ethtool_set_wol(pl->phydev, wol); + if (ret != 0 && ret != -EOPNOTSUPP) + return ret; + + phy_ethtool_get_wol(pl->phydev, &w); + + /* Any Wake-on-Lan modes which the PHY is handling + * should not be passed on to the MAC. + */ + wolopts &= ~w.wolopts; + } + + wolopts &= pl->config->wol_mac_support; + changed = pl->wolopts_mac != wolopts; + if (wolopts & WAKE_MAGICSECURE) + changed |= !!memcmp(wol->sopass, pl->wol_sopass, + sizeof(wol->sopass)); + memcpy(pl->wol_sopass, wol->sopass, sizeof(pl->wol_sopass)); + + if (changed) { + ret = pl->mac_ops->mac_wol_set(pl->config, wolopts, + wol->sopass); + if (!ret) + pl->wolopts_mac = wolopts; + } else { + ret = 0; + } + } else { + if (pl->phydev) + ret = phy_ethtool_set_wol(pl->phydev, wol); + } return ret; } diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 9af0411761d75..59cb58b29d1d1 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -156,6 +156,8 @@ enum phylink_op_type { * @lpi_capabilities: MAC speeds which can support LPI signalling * @lpi_timer_default: Default EEE LPI timer setting. * @eee_enabled_default: If set, EEE will be enabled by phylink at creation time + * @wol_phy_legacy: Use Wake-on-Lan with PHY even if phy_can_wakeup() is false + * @wol_mac_support: Bitmask of MAC supported %WAKE_* options */ struct phylink_config { struct device *dev; @@ -173,6 +175,10 @@ struct phylink_config { unsigned long lpi_capabilities; u32 lpi_timer_default; bool eee_enabled_default; + + /* Wake-on-Lan support */ + bool wol_phy_legacy; + u32 wol_mac_support; }; void phylink_limit_mac_speed(struct phylink_config *config, u32 max_speed); @@ -188,6 +194,7 @@ void phylink_limit_mac_speed(struct phylink_config *config, u32 max_speed); * @mac_link_up: allow the link to come up. * @mac_disable_tx_lpi: disable LPI. * @mac_enable_tx_lpi: enable and configure LPI. + * @mac_wol_set: configure Wake-on-Lan settings at the MAC. * * The individual methods are described more fully below. */ @@ -211,6 +218,9 @@ struct phylink_mac_ops { void (*mac_disable_tx_lpi)(struct phylink_config *config); int (*mac_enable_tx_lpi)(struct phylink_config *config, u32 timer, bool tx_clk_stop); + + int (*mac_wol_set)(struct phylink_config *config, u32 wolopts, + const u8 *sopass); }; #if 0 /* For kernel-doc purposes only. */ @@ -440,6 +450,22 @@ void mac_disable_tx_lpi(struct phylink_config *config); */ int mac_enable_tx_lpi(struct phylink_config *config, u32 timer, bool tx_clk_stop); + +/** + * mac_wol_set() - configure the Wake-on-Lan parameters + * @config: a pointer to a &struct phylink_config. + * @wolopts: Bitmask of %WAKE_* flags for enabled Wake-On-Lan modes. + * @sopass: SecureOn(tm) password; meaningful only for %WAKE_MAGICSECURE + * + * Enable the specified Wake-on-Lan options at the MAC. Options that the + * PHY can handle will have been removed from @wolopts. + * + * The presence of this method enables phylink-managed WoL support. + * + * Returns: 0 on success. + */ +int (*mac_wol_set)(struct phylink_config *config, u32 wolopts, + const u8 *sopass); #endif struct phylink_pcs_ops; From dc1a2a9ce5b2c80e02115ff6fb29b726ad9d7777 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 23 Oct 2025 10:16:45 +0100 Subject: [PATCH 240/867] net: phylink: add phylink managed wake-on-lan PHY speed control Some drivers, e.g. stmmac, use the speed_up()/speed_down() APIs to gain additional power saving during Wake-on-LAN where the PHY is managing the state. Add support to phylink for this, which can be enabled by the MAC driver. Only change the PHY speed if the PHY is configured for wake-up, but without any wake-up on the MAC side, as MAC side means changing the configuration once the negotiation has completed. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vBrR7-0000000BLza-2PjK@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 12 ++++++++++++ include/linux/phylink.h | 2 ++ 2 files changed, 14 insertions(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index bec44ebdf80ba..6e1243bf68aab 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -2576,6 +2576,12 @@ static bool phylink_phy_supports_wol(struct phylink *pl, return phydev && (pl->config->wol_phy_legacy || phy_can_wakeup(phydev)); } +static bool phylink_phy_pm_speed_ctrl(struct phylink *pl) +{ + return pl->config->wol_phy_speed_ctrl && !pl->wolopts_mac && + pl->phydev && phy_may_wakeup(pl->phydev); +} + /** * phylink_suspend() - handle a network device suspend event * @pl: a pointer to a &struct phylink returned from phylink_create() @@ -2625,6 +2631,9 @@ void phylink_suspend(struct phylink *pl, bool mac_wol) } else { phylink_stop(pl); } + + if (phylink_phy_pm_speed_ctrl(pl)) + phylink_speed_down(pl, false); } EXPORT_SYMBOL_GPL(phylink_suspend); @@ -2664,6 +2673,9 @@ void phylink_resume(struct phylink *pl) { ASSERT_RTNL(); + if (phylink_phy_pm_speed_ctrl(pl)) + phylink_speed_up(pl); + if (test_bit(PHYLINK_DISABLE_MAC_WOL, &pl->phylink_disable_state)) { /* Wake-on-Lan enabled, MAC handling */ diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 59cb58b29d1d1..38363e566ac3d 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -157,6 +157,7 @@ enum phylink_op_type { * @lpi_timer_default: Default EEE LPI timer setting. * @eee_enabled_default: If set, EEE will be enabled by phylink at creation time * @wol_phy_legacy: Use Wake-on-Lan with PHY even if phy_can_wakeup() is false + * @wol_phy_speed_ctrl: Use phy speed control on suspend/resume * @wol_mac_support: Bitmask of MAC supported %WAKE_* options */ struct phylink_config { @@ -178,6 +179,7 @@ struct phylink_config { /* Wake-on-Lan support */ bool wol_phy_legacy; + bool wol_phy_speed_ctrl; u32 wol_mac_support; }; From 6911308d7d111a9c367293b52f2dc265819f2b60 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 23 Oct 2025 10:16:50 +0100 Subject: [PATCH 241/867] net: stmmac: convert to phylink-managed Wake-on-Lan Convert stmmac to use phylink-managed Wake-on-Lan support. To achieve this, we implement the .mac_wol_set() method, which simply configures the driver model's struct device wakeup for stmmac, and sets the priv->wolopts appropriately. When STMMAC_FLAG_USE_PHY_WOL is set, in the stmmac world this means to only use the PHY's WoL support and ignore the MAC's WoL capabilities. To preserve this behaviour, we enable phylink's legacy mode, and avoid telling phylink that the MAC has any WoL support. This achieves the same functionality for this case. When STMMAC_FLAG_USE_PHY_WOL is not set, we provide the MAC's WoL capabilities to phylink, which then allows phylink to choose between the PHY and MAC for WoL depending on their individual capabilities as described in the phylink commit. This only augments the WoL functionality with PHYs that declare to the driver model that they are wake-up capable. Currently, very few PHY drivers support this. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vBrRC-0000000BLzg-2tA4@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 6 +--- .../ethernet/stmicro/stmmac/stmmac_ethtool.c | 34 ++++--------------- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 30 ++++++++++++++-- .../ethernet/stmicro/stmmac/stmmac_platform.c | 4 +-- 4 files changed, 36 insertions(+), 38 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index aaeaf42084f0d..f128d25346a9b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -291,6 +291,7 @@ struct stmmac_priv { int hw_cap_support; int synopsys_id; u32 msg_enable; + /* Our MAC Wake-on-Lan options */ int wolopts; int wol_irq; u32 gmii_address_bus_config; @@ -379,11 +380,6 @@ enum stmmac_state { extern const struct dev_pm_ops stmmac_simple_pm_ops; -static inline bool stmmac_wol_enabled_mac(struct stmmac_priv *priv) -{ - return priv->plat->pmt && device_may_wakeup(priv->device); -} - static inline bool stmmac_wol_enabled_phy(struct stmmac_priv *priv) { return !priv->plat->pmt && device_may_wakeup(priv->device); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index df016c4eb7104..08b570bc60c76 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -724,41 +724,19 @@ static void stmmac_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct stmmac_priv *priv = netdev_priv(dev); - if (!priv->plat->pmt) - return phylink_ethtool_get_wol(priv->phylink, wol); - - mutex_lock(&priv->lock); - if (device_can_wakeup(priv->device)) { - wol->supported = WAKE_MAGIC | WAKE_UCAST; - if (priv->hw_cap_support && !priv->dma_cap.pmt_magic_frame) - wol->supported &= ~WAKE_MAGIC; - wol->wolopts = priv->wolopts; - } - mutex_unlock(&priv->lock); + return phylink_ethtool_get_wol(priv->phylink, wol); } static int stmmac_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct stmmac_priv *priv = netdev_priv(dev); + int ret; - if (!device_can_wakeup(priv->device)) - return -EOPNOTSUPP; - - if (!priv->plat->pmt) { - int ret = phylink_ethtool_set_wol(priv->phylink, wol); - - if (!ret) - device_set_wakeup_enable(priv->device, !!wol->wolopts); - return ret; - } - - device_set_wakeup_enable(priv->device, !!wol->wolopts); + ret = phylink_ethtool_set_wol(priv->phylink, wol); + if (!ret) + device_set_wakeup_enable(priv->device, !!wol->wolopts); - mutex_lock(&priv->lock); - priv->wolopts = wol->wolopts; - mutex_unlock(&priv->lock); - - return 0; + return ret; } static int stmmac_ethtool_op_get_eee(struct net_device *dev, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 9fa3c221a0c3c..af4eb94f0f4f0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1073,6 +1073,20 @@ static int stmmac_mac_enable_tx_lpi(struct phylink_config *config, u32 timer, return 0; } +static int stmmac_mac_wol_set(struct phylink_config *config, u32 wolopts, + const u8 *sopass) +{ + struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev)); + + device_set_wakeup_enable(priv->device, !!wolopts); + + mutex_lock(&priv->lock); + priv->wolopts = wolopts; + mutex_unlock(&priv->lock); + + return 0; +} + static const struct phylink_mac_ops stmmac_phylink_mac_ops = { .mac_get_caps = stmmac_mac_get_caps, .mac_select_pcs = stmmac_mac_select_pcs, @@ -1082,6 +1096,7 @@ static const struct phylink_mac_ops stmmac_phylink_mac_ops = { .mac_link_up = stmmac_mac_link_up, .mac_disable_tx_lpi = stmmac_mac_disable_tx_lpi, .mac_enable_tx_lpi = stmmac_mac_enable_tx_lpi, + .mac_wol_set = stmmac_mac_wol_set, }; /** @@ -1266,6 +1281,15 @@ static int stmmac_phylink_setup(struct stmmac_priv *priv) config->eee_enabled_default = true; } + if (priv->plat->flags & STMMAC_FLAG_USE_PHY_WOL) { + config->wol_phy_legacy = true; + } else { + if (priv->dma_cap.pmt_remote_wake_up) + config->wol_mac_support |= WAKE_UCAST; + if (priv->dma_cap.pmt_magic_frame) + config->wol_mac_support |= WAKE_MAGIC; + } + fwnode = priv->plat->port_node; if (!fwnode) fwnode = dev_fwnode(priv->device); @@ -7760,7 +7784,7 @@ int stmmac_suspend(struct device *dev) priv->plat->serdes_powerdown(ndev, priv->plat->bsp_priv); /* Enable Power down mode by programming the PMT regs */ - if (stmmac_wol_enabled_mac(priv)) { + if (priv->wolopts) { stmmac_pmt(priv, priv->hw, priv->wolopts); priv->irq_wake = 1; } else { @@ -7774,7 +7798,7 @@ int stmmac_suspend(struct device *dev) if (stmmac_wol_enabled_phy(priv)) phylink_speed_down(priv->phylink, false); - phylink_suspend(priv->phylink, stmmac_wol_enabled_mac(priv)); + phylink_suspend(priv->phylink, !!priv->wolopts); rtnl_unlock(); if (stmmac_fpe_supported(priv)) @@ -7850,7 +7874,7 @@ int stmmac_resume(struct device *dev) * this bit because it can generate problems while resuming * from another devices (e.g. serial console). */ - if (stmmac_wol_enabled_mac(priv)) { + if (priv->wolopts) { mutex_lock(&priv->lock); stmmac_pmt(priv, priv->hw, 0); mutex_unlock(&priv->lock); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index fbb92cc6ab598..6483d52b4c0f5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -969,7 +969,7 @@ static int __maybe_unused stmmac_pltfr_noirq_suspend(struct device *dev) if (!netif_running(ndev)) return 0; - if (!stmmac_wol_enabled_mac(priv)) { + if (!priv->wolopts) { /* Disable clock in case of PWM is off */ clk_disable_unprepare(priv->plat->clk_ptp_ref); @@ -990,7 +990,7 @@ static int __maybe_unused stmmac_pltfr_noirq_resume(struct device *dev) if (!netif_running(ndev)) return 0; - if (!stmmac_wol_enabled_mac(priv)) { + if (!priv->wolopts) { /* enable the clk previously disabled */ ret = pm_runtime_force_resume(dev); if (ret) From d65cb2e27e6e18d036276905c1aa11828aac5b6a Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 23 Oct 2025 10:16:55 +0100 Subject: [PATCH 242/867] net: stmmac: convert to phylink managed WoL PHY speed Convert stmmac to use phylink's management of the PHY speed when Wake-on-Lan is enabled. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vBrRH-0000000BLzm-3JjF@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 5 ----- .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 7 +------ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 15 +-------------- 3 files changed, 2 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index f128d25346a9b..d5af9344dfb03 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -380,11 +380,6 @@ enum stmmac_state { extern const struct dev_pm_ops stmmac_simple_pm_ops; -static inline bool stmmac_wol_enabled_phy(struct stmmac_priv *priv) -{ - return !priv->plat->pmt && device_may_wakeup(priv->device); -} - int stmmac_mdio_unregister(struct net_device *ndev); int stmmac_mdio_register(struct net_device *ndev); int stmmac_mdio_reset(struct mii_bus *mii); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 08b570bc60c76..b155e71aac51f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -730,13 +730,8 @@ static void stmmac_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) static int stmmac_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct stmmac_priv *priv = netdev_priv(dev); - int ret; - ret = phylink_ethtool_set_wol(priv->phylink, wol); - if (!ret) - device_set_wakeup_enable(priv->device, !!wol->wolopts); - - return ret; + return phylink_ethtool_set_wol(priv->phylink, wol); } static int stmmac_ethtool_op_get_eee(struct net_device *dev, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index af4eb94f0f4f0..fd51068801928 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1205,14 +1205,6 @@ static int stmmac_init_phy(struct net_device *dev) phylink_ethtool_set_eee(priv->phylink, &eee); } - if (!priv->plat->pmt) { - struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; - - phylink_ethtool_get_wol(priv->phylink, &wol); - device_set_wakeup_capable(priv->device, !!wol.supported); - device_set_wakeup_enable(priv->device, !!wol.wolopts); - } - return 0; } @@ -1281,6 +1273,7 @@ static int stmmac_phylink_setup(struct stmmac_priv *priv) config->eee_enabled_default = true; } + config->wol_phy_speed_ctrl = true; if (priv->plat->flags & STMMAC_FLAG_USE_PHY_WOL) { config->wol_phy_legacy = true; } else { @@ -7795,9 +7788,6 @@ int stmmac_suspend(struct device *dev) mutex_unlock(&priv->lock); rtnl_lock(); - if (stmmac_wol_enabled_phy(priv)) - phylink_speed_down(priv->phylink, false); - phylink_suspend(priv->phylink, !!priv->wolopts); rtnl_unlock(); @@ -7936,9 +7926,6 @@ int stmmac_resume(struct device *dev) * workqueue thread, which will race with initialisation. */ phylink_resume(priv->phylink); - if (stmmac_wol_enabled_phy(priv)) - phylink_speed_up(priv->phylink); - rtnl_unlock(); netif_device_attach(ndev); From 442a8c68f083f267c0f52526f1cd16988837ec0f Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 23 Oct 2025 10:46:20 +0100 Subject: [PATCH 243/867] net: stmmac: add stmmac_mac_irq_modify() Add a function to allow interrupts to be enabled and disabled in a core independent manner. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vBrtk-0000000BMYm-3CV5@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/common.h | 5 +++++ .../ethernet/stmicro/stmmac/dwmac1000_core.c | 15 +++++++++++++++ .../net/ethernet/stmicro/stmmac/dwmac4_core.c | 17 +++++++++++++++++ .../net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 16 ++++++++++++++++ drivers/net/ethernet/stmicro/stmmac/hwif.c | 2 ++ drivers/net/ethernet/stmicro/stmmac/hwif.h | 4 ++++ .../net/ethernet/stmicro/stmmac/stmmac_fpe.c | 3 +++ 7 files changed, 62 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 31254ba525d56..553a8897b005b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -611,6 +611,11 @@ struct mac_device_info { u8 vlan_fail_q; bool hw_vlan_en; bool reverse_sgmii_enable; + + /* This spinlock protects read-modify-write of the interrupt + * mask/enable registers. + */ + spinlock_t irq_ctrl_lock; }; struct stmmac_rx_routing { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 571e483624442..2ca94bfd3f718 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -61,6 +61,20 @@ static void dwmac1000_core_init(struct mac_device_info *hw, #endif } +static void dwmac1000_irq_modify(struct mac_device_info *hw, u32 disable, + u32 enable) +{ + void __iomem *int_mask = hw->pcsr + GMAC_INT_MASK; + unsigned long flags; + u32 value; + + spin_lock_irqsave(&hw->irq_ctrl_lock, flags); + value = readl(int_mask) | disable; + value &= ~enable; + writel(value, int_mask); + spin_unlock_irqrestore(&hw->irq_ctrl_lock, flags); +} + static int dwmac1000_rx_ipc_enable(struct mac_device_info *hw) { void __iomem *ioaddr = hw->pcsr; @@ -445,6 +459,7 @@ static void dwmac1000_set_mac_loopback(void __iomem *ioaddr, bool enable) const struct stmmac_ops dwmac1000_ops = { .pcs_init = dwmac1000_pcs_init, .core_init = dwmac1000_core_init, + .irq_modify = dwmac1000_irq_modify, .set_mac = stmmac_set_mac, .rx_ipc = dwmac1000_rx_ipc_enable, .dump_regs = dwmac1000_dump_regs, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 0b785389b7eff..6269407d70cd2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -57,6 +57,20 @@ static void dwmac4_core_init(struct mac_device_info *hw, init_waitqueue_head(&priv->tstamp_busy_wait); } +static void dwmac4_irq_modify(struct mac_device_info *hw, u32 disable, + u32 enable) +{ + void __iomem *int_mask = hw->pcsr + GMAC_INT_EN; + unsigned long flags; + u32 value; + + spin_lock_irqsave(&hw->irq_ctrl_lock, flags); + value = readl(int_mask) & ~disable; + value |= enable; + writel(value, int_mask); + spin_unlock_irqrestore(&hw->irq_ctrl_lock, flags); +} + static void dwmac4_update_caps(struct stmmac_priv *priv) { if (priv->plat->tx_queues_to_use > 1) @@ -885,6 +899,7 @@ static int dwmac4_config_l4_filter(struct mac_device_info *hw, u32 filter_no, const struct stmmac_ops dwmac4_ops = { .pcs_init = dwmac4_pcs_init, .core_init = dwmac4_core_init, + .irq_modify = dwmac4_irq_modify, .update_caps = dwmac4_update_caps, .set_mac = stmmac_set_mac, .rx_ipc = dwmac4_rx_ipc_enable, @@ -920,6 +935,7 @@ const struct stmmac_ops dwmac4_ops = { const struct stmmac_ops dwmac410_ops = { .pcs_init = dwmac4_pcs_init, .core_init = dwmac4_core_init, + .irq_modify = dwmac4_irq_modify, .update_caps = dwmac4_update_caps, .set_mac = stmmac_dwmac4_set_mac, .rx_ipc = dwmac4_rx_ipc_enable, @@ -957,6 +973,7 @@ const struct stmmac_ops dwmac410_ops = { const struct stmmac_ops dwmac510_ops = { .pcs_init = dwmac4_pcs_init, .core_init = dwmac4_core_init, + .irq_modify = dwmac4_irq_modify, .update_caps = dwmac4_update_caps, .set_mac = stmmac_dwmac4_set_mac, .rx_ipc = dwmac4_rx_ipc_enable, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 0430af27da407..b40b3ea50e253 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -28,6 +28,20 @@ static void dwxgmac2_core_init(struct mac_device_info *hw, writel(XGMAC_INT_DEFAULT_EN, ioaddr + XGMAC_INT_EN); } +static void dwxgmac2_irq_modify(struct mac_device_info *hw, u32 disable, + u32 enable) +{ + void __iomem *int_mask = hw->pcsr + XGMAC_INT_EN; + unsigned long flags; + u32 value; + + spin_lock_irqsave(&hw->irq_ctrl_lock, flags); + value = readl(int_mask) & ~disable; + value |= enable; + writel(value, int_mask); + spin_unlock_irqrestore(&hw->irq_ctrl_lock, flags); +} + static void dwxgmac2_update_caps(struct stmmac_priv *priv) { if (!priv->dma_cap.mbps_10_100) @@ -1411,6 +1425,7 @@ static void dwxgmac2_set_arp_offload(struct mac_device_info *hw, bool en, const struct stmmac_ops dwxgmac210_ops = { .core_init = dwxgmac2_core_init, + .irq_modify = dwxgmac2_irq_modify, .update_caps = dwxgmac2_update_caps, .set_mac = dwxgmac2_set_mac, .rx_ipc = dwxgmac2_rx_ipc, @@ -1466,6 +1481,7 @@ static void dwxlgmac2_rx_queue_enable(struct mac_device_info *hw, u8 mode, const struct stmmac_ops dwxlgmac2_ops = { .core_init = dwxgmac2_core_init, + .irq_modify = dwxgmac2_irq_modify, .set_mac = dwxgmac2_set_mac, .rx_ipc = dwxgmac2_rx_ipc, .rx_queue_enable = dwxlgmac2_rx_queue_enable, diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 00083ce525492..41a7e18412276 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -333,6 +333,8 @@ int stmmac_hwif_init(struct stmmac_priv *priv) if (!mac) return -ENOMEM; + spin_lock_init(&mac->irq_ctrl_lock); + /* Fallback to generic HW */ for (i = ARRAY_SIZE(stmmac_hw) - 1; i >= 0; i--) { entry = &stmmac_hw[i]; diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 82cfb6bec334a..cb8fc09caf86b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -319,6 +319,8 @@ struct stmmac_ops { void (*core_init)(struct mac_device_info *hw, struct net_device *dev); /* Update MAC capabilities */ void (*update_caps)(struct stmmac_priv *priv); + /* Change the interrupt enable setting. Enable takes precedence. */ + void (*irq_modify)(struct mac_device_info *hw, u32 disable, u32 enable); /* Enable the MAC RX/TX */ void (*set_mac)(void __iomem *ioaddr, bool enable); /* Enable and verify that the IPC module is supported */ @@ -421,6 +423,8 @@ struct stmmac_ops { stmmac_do_void_callback(__priv, mac, core_init, __args) #define stmmac_mac_update_caps(__priv) \ stmmac_do_void_callback(__priv, mac, update_caps, __priv) +#define stmmac_mac_irq_modify(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, irq_modify, (__priv)->hw, __args) #define stmmac_mac_set(__priv, __args...) \ stmmac_do_void_callback(__priv, mac, set_mac, __args) #define stmmac_rx_ipc(__priv, __args...) \ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_fpe.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_fpe.c index 75b470ee621a3..c54c702243517 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_fpe.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_fpe.c @@ -70,8 +70,10 @@ static void stmmac_fpe_configure_pmac(struct ethtool_mmsv *mmsv, bool pmac_enabl struct stmmac_priv *priv = container_of(cfg, struct stmmac_priv, fpe_cfg); const struct stmmac_fpe_reg *reg = cfg->reg; void __iomem *ioaddr = priv->ioaddr; + unsigned long flags; u32 value; + spin_lock_irqsave(&priv->hw->irq_ctrl_lock, flags); value = readl(ioaddr + reg->int_en_reg); if (pmac_enable) { @@ -86,6 +88,7 @@ static void stmmac_fpe_configure_pmac(struct ethtool_mmsv *mmsv, bool pmac_enabl } writel(value, ioaddr + reg->int_en_reg); + spin_unlock_irqrestore(&priv->hw->irq_ctrl_lock, flags); } static void stmmac_fpe_send_mpacket(struct ethtool_mmsv *mmsv, From eed68edac508fed36e3726f5b0b828a83efab7f8 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 23 Oct 2025 10:46:25 +0100 Subject: [PATCH 244/867] net: stmmac: add support for controlling PCS interrupts Add support to the PCS instance for controlling the PCS interrupts depending on whether the PCS is used. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vBrtp-0000000BMYs-3bhI@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac1000.h | 7 +++--- .../ethernet/stmicro/stmmac/dwmac1000_core.c | 11 ++++------ drivers/net/ethernet/stmicro/stmmac/dwmac4.h | 2 -- .../net/ethernet/stmicro/stmmac/dwmac4_core.c | 10 +++------ .../net/ethernet/stmicro/stmmac/stmmac_pcs.c | 22 ++++++++++++++++++- .../net/ethernet/stmicro/stmmac/stmmac_pcs.h | 4 +++- 6 files changed, 34 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h index 8f3002d9de78b..697bba641e050 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h @@ -38,11 +38,10 @@ #define GMAC_INT_DISABLE_PCSAN BIT(2) #define GMAC_INT_DISABLE_PMT BIT(3) #define GMAC_INT_DISABLE_TIMESTAMP BIT(9) -#define GMAC_INT_DISABLE_PCS (GMAC_INT_DISABLE_PCSLINK | \ - GMAC_INT_DISABLE_PCSAN) #define GMAC_INT_DEFAULT_MASK (GMAC_INT_DISABLE_RGMII | \ - GMAC_INT_DISABLE_TIMESTAMP | \ - GMAC_INT_DISABLE_PCS) + GMAC_INT_DISABLE_PCSLINK | \ + GMAC_INT_DISABLE_PCSAN | \ + GMAC_INT_DISABLE_TIMESTAMP) /* PMT Control and Status */ #define GMAC_PMT 0x0000002c diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 2ca94bfd3f718..a2ae136d2c0e9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -27,7 +27,9 @@ static int dwmac1000_pcs_init(struct stmmac_priv *priv) if (!priv->dma_cap.pcs) return 0; - return stmmac_integrated_pcs_init(priv, GMAC_PCS_BASE); + return stmmac_integrated_pcs_init(priv, GMAC_PCS_BASE, + GMAC_INT_DISABLE_PCSLINK | + GMAC_INT_DISABLE_PCSAN); } static void dwmac1000_core_init(struct mac_device_info *hw, @@ -48,12 +50,7 @@ static void dwmac1000_core_init(struct mac_device_info *hw, writel(value | GMAC_CORE_INIT, ioaddr + GMAC_CONTROL); /* Mask GMAC interrupts */ - value = GMAC_INT_DEFAULT_MASK; - - if (hw->pcs) - value &= ~GMAC_INT_DISABLE_PCS; - - writel(value, ioaddr + GMAC_INT_MASK); + writel(GMAC_INT_DEFAULT_MASK, ioaddr + GMAC_INT_MASK); #ifdef STMMAC_VLAN_TAG_USED /* Tag detection without filtering */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index 6dd84b6544cc0..3cb733781e1e8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -106,8 +106,6 @@ #define GMAC_INT_LPI_EN BIT(5) #define GMAC_INT_TSIE BIT(12) -#define GMAC_PCS_IRQ_DEFAULT (GMAC_INT_PCS_LINK | GMAC_INT_PCS_ANE) - #define GMAC_INT_DEFAULT_ENABLE (GMAC_INT_PMT_EN | GMAC_INT_LPI_EN | \ GMAC_INT_TSIE) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 6269407d70cd2..a4282fd7c3c73 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -27,7 +27,8 @@ static int dwmac4_pcs_init(struct stmmac_priv *priv) if (!priv->dma_cap.pcs) return 0; - return stmmac_integrated_pcs_init(priv, GMAC_PCS_BASE); + return stmmac_integrated_pcs_init(priv, GMAC_PCS_BASE, + GMAC_INT_PCS_LINK | GMAC_INT_PCS_ANE); } static void dwmac4_core_init(struct mac_device_info *hw, @@ -46,12 +47,7 @@ static void dwmac4_core_init(struct mac_device_info *hw, writel((clk_rate / 1000000) - 1, ioaddr + GMAC4_MAC_ONEUS_TIC_COUNTER); /* Enable GMAC interrupts */ - value = GMAC_INT_DEFAULT_ENABLE; - - if (hw->pcs) - value |= GMAC_PCS_IRQ_DEFAULT; - - writel(value, ioaddr + GMAC_INT_EN); + writel(GMAC_INT_DEFAULT_ENABLE, ioaddr + GMAC_INT_EN); if (GMAC_INT_DEFAULT_ENABLE & GMAC_INT_TSIE) init_waitqueue_head(&priv->tstamp_busy_wait); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c index 50ea51d7a1cc8..e2f531c119868 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c @@ -2,6 +2,22 @@ #include "stmmac.h" #include "stmmac_pcs.h" +static int dwmac_integrated_pcs_enable(struct phylink_pcs *pcs) +{ + struct stmmac_pcs *spcs = phylink_pcs_to_stmmac_pcs(pcs); + + stmmac_mac_irq_modify(spcs->priv, 0, spcs->int_mask); + + return 0; +} + +static void dwmac_integrated_pcs_disable(struct phylink_pcs *pcs) +{ + struct stmmac_pcs *spcs = phylink_pcs_to_stmmac_pcs(pcs); + + stmmac_mac_irq_modify(spcs->priv, spcs->int_mask, 0); +} + static void dwmac_integrated_pcs_get_state(struct phylink_pcs *pcs, unsigned int neg_mode, struct phylink_link_state *state) @@ -23,11 +39,14 @@ static int dwmac_integrated_pcs_config(struct phylink_pcs *pcs, } static const struct phylink_pcs_ops dwmac_integrated_pcs_ops = { + .pcs_enable = dwmac_integrated_pcs_enable, + .pcs_disable = dwmac_integrated_pcs_disable, .pcs_get_state = dwmac_integrated_pcs_get_state, .pcs_config = dwmac_integrated_pcs_config, }; -int stmmac_integrated_pcs_init(struct stmmac_priv *priv, unsigned int offset) +int stmmac_integrated_pcs_init(struct stmmac_priv *priv, unsigned int offset, + u32 int_mask) { struct stmmac_pcs *spcs; @@ -37,6 +56,7 @@ int stmmac_integrated_pcs_init(struct stmmac_priv *priv, unsigned int offset) spcs->priv = priv; spcs->base = priv->ioaddr + offset; + spcs->int_mask = int_mask; spcs->pcs.ops = &dwmac_integrated_pcs_ops; __set_bit(PHY_INTERFACE_MODE_SGMII, spcs->pcs.supported_interfaces); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h index 64397ac8ecab8..cda93894168e2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h @@ -52,6 +52,7 @@ struct stmmac_priv; struct stmmac_pcs { struct stmmac_priv *priv; void __iomem *base; + u32 int_mask; struct phylink_pcs pcs; }; @@ -61,7 +62,8 @@ phylink_pcs_to_stmmac_pcs(struct phylink_pcs *pcs) return container_of(pcs, struct stmmac_pcs, pcs); } -int stmmac_integrated_pcs_init(struct stmmac_priv *priv, unsigned int offset); +int stmmac_integrated_pcs_init(struct stmmac_priv *priv, unsigned int offset, + u32 int_mask); /** * dwmac_pcs_isr - TBI, RTBI, or SGMII PHY ISR From c09b183dc14e0fda14bb99b31b9637ce2e1d3682 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 23 Oct 2025 12:00:19 +0200 Subject: [PATCH 245/867] net: usb: usbnet: coding style for functions Functions are not to have blanks between names and parameter lists. Remove them. Signed-off-by: Oliver Neukum Link: https://patch.msgid.link/20251023100136.909118-1-oneukum@suse.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/usbnet.c | 49 ++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index bf01f27285318..62a85dbad31a5 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -189,7 +189,7 @@ static bool usbnet_needs_usb_name_format(struct usbnet *dev, struct net_device * is_local_ether_addr(net->dev_addr)); } -static void intr_complete (struct urb *urb) +static void intr_complete(struct urb *urb) { struct usbnet *dev = urb->context; int status = urb->status; @@ -221,7 +221,7 @@ static void intr_complete (struct urb *urb) "intr resubmit --> %d\n", status); } -static int init_status (struct usbnet *dev, struct usb_interface *intf) +static int init_status(struct usbnet *dev, struct usb_interface *intf) { char *buf = NULL; unsigned pipe = 0; @@ -326,7 +326,7 @@ static void __usbnet_status_stop_force(struct usbnet *dev) * Some link protocols batch packets, so their rx_fixup paths * can return clones as well as just modify the original skb. */ -void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb) +void usbnet_skb_return(struct usbnet *dev, struct sk_buff *skb) { struct pcpu_sw_netstats *stats64 = this_cpu_ptr(dev->net->tstats); unsigned long flags; @@ -396,7 +396,7 @@ EXPORT_SYMBOL_GPL(usbnet_update_max_qlen); * *-------------------------------------------------------------------------*/ -int usbnet_change_mtu (struct net_device *net, int new_mtu) +int usbnet_change_mtu(struct net_device *net, int new_mtu) { struct usbnet *dev = netdev_priv(net); int ll_mtu = new_mtu + net->hard_header_len; @@ -472,7 +472,7 @@ static enum skb_state defer_bh(struct usbnet *dev, struct sk_buff *skb, * NOTE: annoying asymmetry: if it's active, schedule_work() fails, * but tasklet_schedule() doesn't. hope the failure is rare. */ -void usbnet_defer_kevent (struct usbnet *dev, int work) +void usbnet_defer_kevent(struct usbnet *dev, int work) { set_bit (work, &dev->flags); if (!usbnet_going_away(dev)) { @@ -489,9 +489,9 @@ EXPORT_SYMBOL_GPL(usbnet_defer_kevent); /*-------------------------------------------------------------------------*/ -static void rx_complete (struct urb *urb); +static void rx_complete(struct urb *urb); -static int rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags) +static int rx_submit(struct usbnet *dev, struct urb *urb, gfp_t flags) { struct sk_buff *skb; struct skb_data *entry; @@ -597,7 +597,7 @@ static inline int rx_process(struct usbnet *dev, struct sk_buff *skb) /*-------------------------------------------------------------------------*/ -static void rx_complete (struct urb *urb) +static void rx_complete(struct urb *urb) { struct sk_buff *skb = (struct sk_buff *) urb->context; struct skb_data *entry = (struct skb_data *) skb->cb; @@ -728,7 +728,7 @@ EXPORT_SYMBOL_GPL(usbnet_purge_paused_rxq); // unlink pending rx/tx; completion handlers do all other cleanup -static int unlink_urbs (struct usbnet *dev, struct sk_buff_head *q) +static int unlink_urbs(struct usbnet *dev, struct sk_buff_head *q) { unsigned long flags; struct sk_buff *skb; @@ -823,7 +823,7 @@ static void usbnet_terminate_urbs(struct usbnet *dev) remove_wait_queue(&dev->wait, &wait); } -int usbnet_stop (struct net_device *net) +int usbnet_stop(struct net_device *net) { struct usbnet *dev = netdev_priv(net); const struct driver_info *info = dev->driver_info; @@ -892,7 +892,7 @@ EXPORT_SYMBOL_GPL(usbnet_stop); // precondition: never called in_interrupt -int usbnet_open (struct net_device *net) +int usbnet_open(struct net_device *net) { struct usbnet *dev = netdev_priv(net); int retval; @@ -1048,7 +1048,7 @@ int usbnet_set_link_ksettings_mii(struct net_device *net, } EXPORT_SYMBOL_GPL(usbnet_set_link_ksettings_mii); -u32 usbnet_get_link (struct net_device *net) +u32 usbnet_get_link(struct net_device *net) { struct usbnet *dev = netdev_priv(net); @@ -1076,7 +1076,7 @@ int usbnet_nway_reset(struct net_device *net) } EXPORT_SYMBOL_GPL(usbnet_nway_reset); -void usbnet_get_drvinfo (struct net_device *net, struct ethtool_drvinfo *info) +void usbnet_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info) { struct usbnet *dev = netdev_priv(net); @@ -1087,7 +1087,7 @@ void usbnet_get_drvinfo (struct net_device *net, struct ethtool_drvinfo *info) } EXPORT_SYMBOL_GPL(usbnet_get_drvinfo); -u32 usbnet_get_msglevel (struct net_device *net) +u32 usbnet_get_msglevel(struct net_device *net) { struct usbnet *dev = netdev_priv(net); @@ -1095,7 +1095,7 @@ u32 usbnet_get_msglevel (struct net_device *net) } EXPORT_SYMBOL_GPL(usbnet_get_msglevel); -void usbnet_set_msglevel (struct net_device *net, u32 level) +void usbnet_set_msglevel(struct net_device *net, u32 level) { struct usbnet *dev = netdev_priv(net); @@ -1166,7 +1166,7 @@ static void __handle_set_rx_mode(struct usbnet *dev) * especially now that control transfers can be queued. */ static void -usbnet_deferred_kevent (struct work_struct *work) +usbnet_deferred_kevent(struct work_struct *work) { struct usbnet *dev = container_of(work, struct usbnet, kevent); @@ -1277,7 +1277,7 @@ usbnet_deferred_kevent (struct work_struct *work) /*-------------------------------------------------------------------------*/ -static void tx_complete (struct urb *urb) +static void tx_complete(struct urb *urb) { struct sk_buff *skb = (struct sk_buff *) urb->context; struct skb_data *entry = (struct skb_data *) skb->cb; @@ -1332,7 +1332,7 @@ static void tx_complete (struct urb *urb) /*-------------------------------------------------------------------------*/ -void usbnet_tx_timeout (struct net_device *net, unsigned int txqueue) +void usbnet_tx_timeout(struct net_device *net, unsigned int txqueue) { struct usbnet *dev = netdev_priv(net); @@ -1382,8 +1382,7 @@ static int build_dma_sg(const struct sk_buff *skb, struct urb *urb) return 1; } -netdev_tx_t usbnet_start_xmit (struct sk_buff *skb, - struct net_device *net) +netdev_tx_t usbnet_start_xmit(struct sk_buff *skb, struct net_device *net) { struct usbnet *dev = netdev_priv(net); unsigned int length; @@ -1561,7 +1560,7 @@ static inline void usb_free_skb(struct sk_buff *skb) // work (work deferred from completions, in_irq) or timer -static void usbnet_bh (struct timer_list *t) +static void usbnet_bh(struct timer_list *t) { struct usbnet *dev = timer_container_of(dev, t, delay); struct sk_buff *skb; @@ -1636,7 +1635,7 @@ static void usbnet_bh_work(struct work_struct *work) // precondition: never called in_interrupt -void usbnet_disconnect (struct usb_interface *intf) +void usbnet_disconnect(struct usb_interface *intf) { struct usbnet *dev; struct usb_device *xdev; @@ -1700,7 +1699,7 @@ static const struct device_type wwan_type = { }; int -usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) +usbnet_probe(struct usb_interface *udev, const struct usb_device_id *prod) { struct usbnet *dev; struct net_device *net; @@ -1907,7 +1906,7 @@ EXPORT_SYMBOL_GPL(usbnet_probe); * resume only when the last interface is resumed */ -int usbnet_suspend (struct usb_interface *intf, pm_message_t message) +int usbnet_suspend(struct usb_interface *intf, pm_message_t message) { struct usbnet *dev = usb_get_intfdata(intf); @@ -1940,7 +1939,7 @@ int usbnet_suspend (struct usb_interface *intf, pm_message_t message) } EXPORT_SYMBOL_GPL(usbnet_suspend); -int usbnet_resume (struct usb_interface *intf) +int usbnet_resume(struct usb_interface *intf) { struct usbnet *dev = usb_get_intfdata(intf); struct sk_buff *skb; From 9078e6c5f1de342ae0c2322c999bbab9c2ad08b7 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Thu, 23 Oct 2025 12:21:10 +0100 Subject: [PATCH 246/867] net: ravb: Make DBAT entry count configurable per-SoC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid wasting coherent DMA memory by allocating the descriptor base address table sized for the actual number of DBAT/CDARq entries supported by the SoC. Some platforms (for example GBETH) only provide two CDARq entries; previously the driver always allocated space for 22 entries which needlessly consumed memory on those systems. Pass the per-SoC dbat_entry_num via struct ravb_hw_info and use it for allocation and initialization in probe. This sizes the table correctly and removes the unnecessary memory overhead on SoCs with fewer DBAT entries. Signed-off-by: Lad Prabhakar Reviewed-by: Niklas Söderlund Link: https://patch.msgid.link/20251023112111.215198-2-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb.h | 2 +- drivers/net/ethernet/renesas/ravb_main.c | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index 7b48060c250b4..d65cd83ddd163 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -1017,7 +1017,6 @@ enum CSR2_BIT { #define CSR2_CSUM_ENABLE (CSR2_RTCP4 | CSR2_RUDP4 | CSR2_RICMP4 | \ CSR2_RTCP6 | CSR2_RUDP6 | CSR2_RICMP6) -#define DBAT_ENTRY_NUM 22 #define RX_QUEUE_OFFSET 4 #define NUM_RX_QUEUE 2 #define NUM_TX_QUEUE 2 @@ -1062,6 +1061,7 @@ struct ravb_hw_info { u32 rx_max_frame_size; u32 rx_buffer_size; u32 rx_desc_size; + u32 dbat_entry_num; unsigned aligned_tx: 1; unsigned coalesce_irqs:1; /* Needs software IRQ coalescing */ diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index e2d7ce1a85e84..cb5ae9f85252b 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2714,6 +2714,7 @@ static const struct ravb_hw_info ravb_gen2_hw_info = { .rx_buffer_size = SZ_2K + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), .rx_desc_size = sizeof(struct ravb_ex_rx_desc), + .dbat_entry_num = 22, .aligned_tx = 1, .gptp = 1, .nc_queues = 1, @@ -2737,6 +2738,7 @@ static const struct ravb_hw_info ravb_gen3_hw_info = { .rx_buffer_size = SZ_2K + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), .rx_desc_size = sizeof(struct ravb_ex_rx_desc), + .dbat_entry_num = 22, .internal_delay = 1, .tx_counters = 1, .multi_irqs = 1, @@ -2763,6 +2765,7 @@ static const struct ravb_hw_info ravb_gen4_hw_info = { .rx_buffer_size = SZ_2K + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), .rx_desc_size = sizeof(struct ravb_ex_rx_desc), + .dbat_entry_num = 22, .internal_delay = 1, .tx_counters = 1, .multi_irqs = 1, @@ -2789,6 +2792,7 @@ static const struct ravb_hw_info ravb_rzv2m_hw_info = { .rx_buffer_size = SZ_2K + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), .rx_desc_size = sizeof(struct ravb_ex_rx_desc), + .dbat_entry_num = 22, .multi_irqs = 1, .err_mgmt_irqs = 1, .gptp = 1, @@ -2814,6 +2818,7 @@ static const struct ravb_hw_info gbeth_hw_info = { .rx_max_frame_size = SZ_8K, .rx_buffer_size = SZ_2K, .rx_desc_size = sizeof(struct ravb_rx_desc), + .dbat_entry_num = 2, .aligned_tx = 1, .coalesce_irqs = 1, .tx_counters = 1, @@ -3045,7 +3050,7 @@ static int ravb_probe(struct platform_device *pdev) ravb_parse_delay_mode(np, ndev); /* Allocate descriptor base address table */ - priv->desc_bat_size = sizeof(struct ravb_desc) * DBAT_ENTRY_NUM; + priv->desc_bat_size = sizeof(struct ravb_desc) * info->dbat_entry_num; priv->desc_bat = dma_alloc_coherent(ndev->dev.parent, priv->desc_bat_size, &priv->desc_bat_dma, GFP_KERNEL); if (!priv->desc_bat) { @@ -3055,7 +3060,7 @@ static int ravb_probe(struct platform_device *pdev) error = -ENOMEM; goto out_rpm_put; } - for (q = RAVB_BE; q < DBAT_ENTRY_NUM; q++) + for (q = RAVB_BE; q < info->dbat_entry_num; q++) priv->desc_bat[q].die_dt = DT_EOS; /* Initialise HW timestamp list */ From 3912e804ff6a03693cc50d801ab840479f7b20ac Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Thu, 23 Oct 2025 12:21:11 +0100 Subject: [PATCH 247/867] net: ravb: Allocate correct number of queues based on SoC support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the per-SoC match data flag `nc_queues` to decide how many TX/RX queues to allocate. If the SoC does not provide a network-control queue, fall back to a single TX/RX queue. Obtain the match data before calling alloc_etherdev_mqs() so the allocation is sized correctly. Signed-off-by: Lad Prabhakar Reviewed-by: Niklas Söderlund Link: https://patch.msgid.link/20251023112111.215198-3-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index cb5ae9f85252b..c3fc15f9ec852 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2946,13 +2946,14 @@ static int ravb_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, PTR_ERR(rstc), "failed to get cpg reset\n"); + info = of_device_get_match_data(&pdev->dev); + ndev = alloc_etherdev_mqs(sizeof(struct ravb_private), - NUM_TX_QUEUE, NUM_RX_QUEUE); + info->nc_queues ? NUM_TX_QUEUE : 1, + info->nc_queues ? NUM_RX_QUEUE : 1); if (!ndev) return -ENOMEM; - info = of_device_get_match_data(&pdev->dev); - ndev->features = info->net_features; ndev->hw_features = info->net_hw_features; ndev->vlan_features = info->vlan_features; From 32dd679b88d58e5245727974d1726f499f7f8f3d Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Thu, 23 Oct 2025 13:12:08 +0200 Subject: [PATCH 248/867] dt-bindings: net: snps,dwmac: move rk3399 line to its correct position Move the rk3399 compatible to its alphabetically correct position. Reviewed-by: Andrew Lunn Acked-by: Conor Dooley Signed-off-by: Heiko Stuebner Link: https://patch.msgid.link/20251023111213.298860-2-heiko@sntech.de Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/snps,dwmac.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index 658c004e6a5c8..28113ac5e11ac 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -86,9 +86,9 @@ properties: - rockchip,rk3328-gmac - rockchip,rk3366-gmac - rockchip,rk3368-gmac + - rockchip,rk3399-gmac - rockchip,rk3576-gmac - rockchip,rk3588-gmac - - rockchip,rk3399-gmac - rockchip,rv1108-gmac - snps,dwmac - snps,dwmac-3.40a From e774c91dca451bcf6eb4ca05d6bef977f88ceff6 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Thu, 23 Oct 2025 13:12:09 +0200 Subject: [PATCH 249/867] dt-bindings: net: snps,dwmac: Sync list of Rockchip compatibles A number of dwmac variants from Rockchip SoCs have turned up in the Rockchip-specific binding, but not in the main list in snps,dwmac.yaml which as the comment indicates is needed for accurate matching. So add the missing rk3528, rk3568 and rv1126 to the main list. Reviewed-by: Andrew Lunn Acked-by: Conor Dooley Signed-off-by: Heiko Stuebner Link: https://patch.msgid.link/20251023111213.298860-3-heiko@sntech.de Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/snps,dwmac.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index 28113ac5e11ac..1a0d6789a59b7 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -87,9 +87,12 @@ properties: - rockchip,rk3366-gmac - rockchip,rk3368-gmac - rockchip,rk3399-gmac + - rockchip,rk3528-gmac + - rockchip,rk3568-gmac - rockchip,rk3576-gmac - rockchip,rk3588-gmac - rockchip,rv1108-gmac + - rockchip,rv1126-gmac - snps,dwmac - snps,dwmac-3.40a - snps,dwmac-3.50a From 4a667bec74b3c108729ae2db2196f6bd15d62f74 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Thu, 23 Oct 2025 13:12:10 +0200 Subject: [PATCH 250/867] dt-bindings: net: rockchip-dwmac: Add compatible string for RK3506 Rockchip RK3506 has two Ethernet controllers based on Synopsys DWC Ethernet QoS IP. Add compatible string for the RK3506 variant. Reviewed-by: Andrew Lunn Acked-by: Conor Dooley Signed-off-by: Heiko Stuebner Link: https://patch.msgid.link/20251023111213.298860-4-heiko@sntech.de Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/rockchip-dwmac.yaml | 3 +++ Documentation/devicetree/bindings/net/snps,dwmac.yaml | 1 + 2 files changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml index 0ac7c4b47d6bf..d17112527dab0 100644 --- a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml @@ -24,6 +24,7 @@ select: - rockchip,rk3366-gmac - rockchip,rk3368-gmac - rockchip,rk3399-gmac + - rockchip,rk3506-gmac - rockchip,rk3528-gmac - rockchip,rk3568-gmac - rockchip,rk3576-gmac @@ -50,6 +51,7 @@ properties: - rockchip,rv1108-gmac - items: - enum: + - rockchip,rk3506-gmac - rockchip,rk3528-gmac - rockchip,rk3568-gmac - rockchip,rk3576-gmac @@ -148,6 +150,7 @@ allOf: compatible: contains: enum: + - rockchip,rk3506-gmac - rockchip,rk3528-gmac then: properties: diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index 1a0d6789a59b7..dd3c72e8363e7 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -87,6 +87,7 @@ properties: - rockchip,rk3366-gmac - rockchip,rk3368-gmac - rockchip,rk3399-gmac + - rockchip,rk3506-gmac - rockchip,rk3528-gmac - rockchip,rk3568-gmac - rockchip,rk3576-gmac From 2010163a8ea4a19308ec6cf259fe68f014553efb Mon Sep 17 00:00:00 2001 From: David Wu Date: Thu, 23 Oct 2025 13:12:11 +0200 Subject: [PATCH 251/867] ethernet: stmmac: dwmac-rk: Add RK3506 GMAC support Add the needed glue blocks for the RK3506-specific setup. The RK3506 dwmac only supports up to 100MBit with a RMII PHY, but no RGMII. Signed-off-by: David Wu Signed-off-by: Heiko Stuebner Link: https://patch.msgid.link/20251023111213.298860-5-heiko@sntech.de Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-rk.c | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 643578266dfca..a5c7e03ebc63f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -827,6 +827,69 @@ static const struct rk_gmac_ops rk3399_ops = { .set_speed = rk3399_set_speed, }; +#define RK3506_GRF_SOC_CON8 0x0020 +#define RK3506_GRF_SOC_CON11 0x002c + +#define RK3506_GMAC_RMII_MODE GRF_BIT(1) + +#define RK3506_GMAC_CLK_RMII_DIV2 GRF_BIT(3) +#define RK3506_GMAC_CLK_RMII_DIV20 GRF_CLR_BIT(3) + +#define RK3506_GMAC_CLK_SELECT_CRU GRF_CLR_BIT(5) +#define RK3506_GMAC_CLK_SELECT_IO GRF_BIT(5) + +#define RK3506_GMAC_CLK_RMII_GATE GRF_BIT(2) +#define RK3506_GMAC_CLK_RMII_NOGATE GRF_CLR_BIT(2) + +static void rk3506_set_to_rmii(struct rk_priv_data *bsp_priv) +{ + unsigned int id = bsp_priv->id, offset; + + offset = (id == 1) ? RK3506_GRF_SOC_CON11 : RK3506_GRF_SOC_CON8; + regmap_write(bsp_priv->grf, offset, RK3506_GMAC_RMII_MODE); +} + +static const struct rk_reg_speed_data rk3506_reg_speed_data = { + .rmii_10 = RK3506_GMAC_CLK_RMII_DIV20, + .rmii_100 = RK3506_GMAC_CLK_RMII_DIV2, +}; + +static int rk3506_set_speed(struct rk_priv_data *bsp_priv, + phy_interface_t interface, int speed) +{ + unsigned int id = bsp_priv->id, offset; + + offset = (id == 1) ? RK3506_GRF_SOC_CON11 : RK3506_GRF_SOC_CON8; + return rk_set_reg_speed(bsp_priv, &rk3506_reg_speed_data, + offset, interface, speed); +} + +static void rk3506_set_clock_selection(struct rk_priv_data *bsp_priv, + bool input, bool enable) +{ + unsigned int value, offset, id = bsp_priv->id; + + offset = (id == 1) ? RK3506_GRF_SOC_CON11 : RK3506_GRF_SOC_CON8; + + value = input ? RK3506_GMAC_CLK_SELECT_IO : + RK3506_GMAC_CLK_SELECT_CRU; + value |= enable ? RK3506_GMAC_CLK_RMII_NOGATE : + RK3506_GMAC_CLK_RMII_GATE; + regmap_write(bsp_priv->grf, offset, value); +} + +static const struct rk_gmac_ops rk3506_ops = { + .set_to_rmii = rk3506_set_to_rmii, + .set_speed = rk3506_set_speed, + .set_clock_selection = rk3506_set_clock_selection, + .regs_valid = true, + .regs = { + 0xff4c8000, /* gmac0 */ + 0xff4d0000, /* gmac1 */ + 0x0, /* sentinel */ + }, +}; + #define RK3528_VO_GRF_GMAC_CON 0x0018 #define RK3528_VO_GRF_MACPHY_CON0 0x001c #define RK3528_VO_GRF_MACPHY_CON1 0x0020 @@ -1809,6 +1872,7 @@ static const struct of_device_id rk_gmac_dwmac_match[] = { { .compatible = "rockchip,rk3366-gmac", .data = &rk3366_ops }, { .compatible = "rockchip,rk3368-gmac", .data = &rk3368_ops }, { .compatible = "rockchip,rk3399-gmac", .data = &rk3399_ops }, + { .compatible = "rockchip,rk3506-gmac", .data = &rk3506_ops }, { .compatible = "rockchip,rk3528-gmac", .data = &rk3528_ops }, { .compatible = "rockchip,rk3568-gmac", .data = &rk3568_ops }, { .compatible = "rockchip,rk3576-gmac", .data = &rk3576_ops }, From 384d8426329531fe1952549266fa3a7444dc6c31 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Thu, 23 Oct 2025 13:12:12 +0200 Subject: [PATCH 252/867] MAINTAINERS: add dwmac-rk glue driver to the main Rockchip entry The dwmac-rk glue driver is currently not caught by the general maintainer entry for Rockchip SoCs, so add it explicitly, similar to the i2c driver. The binding document in net/rockchip-dwmac.yaml already gets caught by the wildcard match. Signed-off-by: Heiko Stuebner Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251023111213.298860-6-heiko@sntech.de Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3ed59823f7a4d..d652f4f27756e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3296,6 +3296,7 @@ F: drivers/*/*/*rockchip* F: drivers/*/*rockchip* F: drivers/clk/rockchip/ F: drivers/i2c/busses/i2c-rk3x.c +F: drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c F: sound/soc/rockchip/ N: rockchip From 428ea708b714ba09c96da5722fb1c8b86af509d1 Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Mon, 13 Oct 2025 22:08:33 -0700 Subject: [PATCH 253/867] wifi: rt2x00: check retval for of_get_mac_address of_get_mac_address can return -EPROBE_DEFER when nvmem is not probed yet for whatever reason. In this case, nvmem mac assignments will not work. Based on the function path, this change only has effect for rt2800soc.c and rt2800pci.c. The former tends to use nvmem for assignments. Signed-off-by: Rosen Penev Acked-by: Stanislaw Gruszka Link: https://patch.msgid.link/20251014050833.46377-1-rosenp@gmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/ralink/rt2x00/rt2800lib.c | 4 +++- drivers/net/wireless/ralink/rt2x00/rt2x00.h | 2 +- drivers/net/wireless/ralink/rt2x00/rt2x00dev.c | 10 ++++++++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c index b264ed0af9234..f07152fa37255 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c @@ -11011,7 +11011,9 @@ static int rt2800_validate_eeprom(struct rt2x00_dev *rt2x00dev) * Start validation of the data that has been read. */ mac = rt2800_eeprom_addr(rt2x00dev, EEPROM_MAC_ADDR_0); - rt2x00lib_set_mac_address(rt2x00dev, mac); + retval = rt2x00lib_set_mac_address(rt2x00dev, mac); + if (retval) + return retval; word = rt2800_eeprom_read(rt2x00dev, EEPROM_NIC_CONF0); if (word == 0xffff) { diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00.h b/drivers/net/wireless/ralink/rt2x00/rt2x00.h index 09b9d1f9f793f..665887e9b118b 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00.h +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00.h @@ -1427,7 +1427,7 @@ static inline void rt2x00debug_dump_frame(struct rt2x00_dev *rt2x00dev, */ u32 rt2x00lib_get_bssidx(struct rt2x00_dev *rt2x00dev, struct ieee80211_vif *vif); -void rt2x00lib_set_mac_address(struct rt2x00_dev *rt2x00dev, u8 *eeprom_mac_addr); +int rt2x00lib_set_mac_address(struct rt2x00_dev *rt2x00dev, u8 *eeprom_mac_addr); /* * Interrupt context handlers. diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c b/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c index f8a6f9c968a1e..778a478ab53a9 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c @@ -988,14 +988,20 @@ static void rt2x00lib_rate(struct ieee80211_rate *entry, entry->flags |= IEEE80211_RATE_SHORT_PREAMBLE; } -void rt2x00lib_set_mac_address(struct rt2x00_dev *rt2x00dev, u8 *eeprom_mac_addr) +int rt2x00lib_set_mac_address(struct rt2x00_dev *rt2x00dev, u8 *eeprom_mac_addr) { - of_get_mac_address(rt2x00dev->dev->of_node, eeprom_mac_addr); + int ret; + + ret = of_get_mac_address(rt2x00dev->dev->of_node, eeprom_mac_addr); + if (ret == -EPROBE_DEFER) + return ret; if (!is_valid_ether_addr(eeprom_mac_addr)) { eth_random_addr(eeprom_mac_addr); rt2x00_eeprom_dbg(rt2x00dev, "MAC: %pM\n", eeprom_mac_addr); } + + return 0; } EXPORT_SYMBOL_GPL(rt2x00lib_set_mac_address); From a392cde88d19af917740d27e13115447d3b21a06 Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Tue, 23 Sep 2025 17:23:22 +0000 Subject: [PATCH 254/867] wifi: cfg80211/mac80211: validate radio frequency range for monitor mode In multi-radio devices, it is possible to have an MLD AP and a monitor interface active at the same time. In such cases, monitor mode may not be able to specify a fixed channel and could end up capturing frames from all radios, including those outside the intended frequency bands. This patch adds frequency validation for monitor mode. Received frames are now only processed if their frequency fall within the allowed ranges of the radios specified by the interface's radio_mask. This prevents monitor mode from capturing frames outside the supported radio. Signed-off-by: Ryder Lee Link: https://patch.msgid.link/700b8284e845d96654eb98431f8eeb5a81503862.1758647858.git.ryder.lee@mediatek.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 14 ++++++++++++ net/mac80211/rx.c | 49 ++++++++++++++++++++++++++++++++++++++++++ net/wireless/util.c | 6 +++--- 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 781624f5913af..3b6f48a783bb2 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1015,6 +1015,7 @@ const struct cfg80211_chan_def * cfg80211_chandef_compatible(const struct cfg80211_chan_def *chandef1, const struct cfg80211_chan_def *chandef2); + /** * nl80211_chan_width_to_mhz - get the channel width in MHz * @chan_width: the channel width from &enum nl80211_chan_width @@ -6882,6 +6883,19 @@ static inline bool cfg80211_channel_is_psc(struct ieee80211_channel *chan) return ieee80211_frequency_to_channel(chan->center_freq) % 16 == 5; } +/** + * ieee80211_radio_freq_range_valid - Check if the radio supports the + * specified frequency range + * + * @radio: wiphy radio + * @freq: the frequency (in KHz) to be queried + * @width: the bandwidth (in KHz) to be queried + * + * Return: whether or not the given frequency range is valid for the given radio + */ +bool ieee80211_radio_freq_range_valid(const struct wiphy_radio *radio, + u32 freq, u32 width); + /** * cfg80211_radio_chandef_valid - Check if the radio supports the chandef * diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6af43dfefdd6a..29175a0c9f688 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -763,6 +763,51 @@ ieee80211_make_monitor_skb(struct ieee80211_local *local, return skb; } +static bool +ieee80211_validate_monitor_radio(struct ieee80211_sub_if_data *sdata, + struct ieee80211_local *local, + struct ieee80211_rx_status *status) +{ + struct wiphy *wiphy = local->hw.wiphy; + int i, freq, bw; + + if (!wiphy->n_radio) + return true; + + switch (status->bw) { + case RATE_INFO_BW_20: + bw = 20000; + break; + case RATE_INFO_BW_40: + bw = 40000; + break; + case RATE_INFO_BW_80: + bw = 80000; + break; + case RATE_INFO_BW_160: + bw = 160000; + break; + case RATE_INFO_BW_320: + bw = 320000; + break; + default: + return false; + } + + freq = MHZ_TO_KHZ(status->freq); + + for (i = 0; i < wiphy->n_radio; i++) { + if (!(sdata->wdev.radio_mask & BIT(i))) + continue; + + if (!ieee80211_radio_freq_range_valid(&wiphy->radio[i], freq, bw)) + continue; + + return true; + } + return false; +} + /* * This function copies a received frame to all monitor interfaces and * returns a cleaned-up SKB that no longer includes the FCS nor the @@ -855,6 +900,10 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, chandef->chan->center_freq != status->freq) continue; + if (ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR) && + !ieee80211_validate_monitor_radio(sdata, local, status)) + continue; + if (!prev_sdata) { prev_sdata = sdata; continue; diff --git a/net/wireless/util.c b/net/wireless/util.c index 56724b33af045..97f40c6d1e9d1 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -2942,9 +2942,8 @@ cfg80211_get_iftype_ext_capa(struct wiphy *wiphy, enum nl80211_iftype type) } EXPORT_SYMBOL(cfg80211_get_iftype_ext_capa); -static bool -ieee80211_radio_freq_range_valid(const struct wiphy_radio *radio, - u32 freq, u32 width) +bool ieee80211_radio_freq_range_valid(const struct wiphy_radio *radio, + u32 freq, u32 width) { const struct wiphy_radio_freq_range *r; int i; @@ -2958,6 +2957,7 @@ ieee80211_radio_freq_range_valid(const struct wiphy_radio *radio, return false; } +EXPORT_SYMBOL(ieee80211_radio_freq_range_valid); bool cfg80211_radio_chandef_valid(const struct wiphy_radio *radio, const struct cfg80211_chan_def *chandef) From a3b16dfe79eecafea0e058b038bb506ed9bd2c89 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Fri, 17 Oct 2025 09:32:41 +0530 Subject: [PATCH 255/867] wifi: mac80211_hwsim: advertise puncturing feature support If userspace provides a puncturing bitmap via the NL80211_ATTR_PUNCT_BITMAP attribute, the kernel with mac80211_hwsim driver currently rejects the command with the error: "driver doesn't support puncturing", because the driver does not advertise support for this feature. At present, the following hwsim test cases utilize puncturing, but the bitmap is not sent to the kernel. Instead, the puncturing information is conveyed only through the beacon data: * eht_5ghz_80mhz_puncturing_override_1 * eht_5ghz_80mhz_puncturing_override_2 * eht_5ghz_80mhz_puncturing_override_3 A future change in hostapd will begin configuring the puncturing bitmap explicitly, which will cause these test cases to fail unless the driver advertises support. To address this, update mac80211_hwsim driver to advertise puncturing feature support. Signed-off-by: Aditya Kumar Singh Link: https://patch.msgid.link/20251017-hwsim_set_punct_feature_bit-v1-1-3be1bb3450c0@oss.qualcomm.com Signed-off-by: Johannes Berg --- drivers/net/wireless/virtual/mac80211_hwsim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index 9f856042a67af..cd84dfd5b47e7 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -5793,6 +5793,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, ieee80211_hw_set(hw, NO_AUTO_VIF); wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST); + wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_PUNCT); for (i = 0; i < ARRAY_SIZE(data->link_data); i++) { hrtimer_setup(&data->link_data[i].beacon_timer, mac80211_hwsim_beacon, From 8f24be708829854560e1db9f765c51305b046183 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 19 Oct 2025 11:50:49 +0300 Subject: [PATCH 256/867] wifi: mac80211: reset CRC valid after CSA While waiting for a beacon after CSA, reset the CRC valid so that the next beacon is handled even if it happens to be identical the last one on the old channel. This is an AP bug either way, but it's better to disconnect cleanly than to have lingering CSA state. In the iwlwifi instantiation of this problem, mac80211 is ignoring the beacon but the firmware creates a new CSA, and then crashes later because mac80211/driver didn't do anything about it. Signed-off-by: Johannes Berg Reviewed-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251019115024.521ad9c6b87d.I86376900df3d3423185b75bf63358c29f33a5eb6@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 3b5827ea438ee..e699702fe5b13 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2508,6 +2508,16 @@ static void ieee80211_csa_switch_work(struct wiphy *wiphy, link->u.mgd.csa.waiting_bcn = true; + /* + * The next beacon really should always be different, so this should + * have no effect whatsoever. However, some APs (we observed this in + * an Asus AXE11000), the beacon after the CSA might be identical to + * the last beacon on the old channel - in this case we'd ignore it. + * Resetting the CRC will lead us to handle it better (albeit with a + * disconnect, but clearly the AP is broken.) + */ + link->u.mgd.beacon_crc_valid = false; + /* apply new TPE restrictions immediately on the new channel */ if (link->u.mgd.csa.ap_chandef.chan->band == NL80211_BAND_6GHZ && link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HE) { From ad55aa3ad8f843b2600db322d312f9f28d79568e Mon Sep 17 00:00:00 2001 From: Lachlan Hodges Date: Tue, 21 Oct 2025 17:10:51 +1100 Subject: [PATCH 257/867] wifi: mac80211: get probe response chan via ieee80211_get_channel_khz Make use of ieee80211_get_channel_khz() rather then the MHz counterpart to ensure probe responses received on an S1G channel pass the check. Signed-off-by: Lachlan Hodges Link: https://patch.msgid.link/20251021061051.235258-1-lachlan.hodges@morsemicro.com [modify indentation] Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e699702fe5b13..025210d504051 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -6620,8 +6620,8 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_link_data *link, * Response frame shall be set to the broadcast address [..]" * So, on 6GHz band we should also accept broadcast responses. */ - channel = ieee80211_get_channel(sdata->local->hw.wiphy, - rx_status->freq); + channel = ieee80211_get_channel_khz(sdata->local->hw.wiphy, + ieee80211_rx_status_to_khz(rx_status)); if (!channel) return; From bca76b875d0530658f3ba1bc946dbae1974f14c3 Mon Sep 17 00:00:00 2001 From: Lachlan Hodges Date: Tue, 21 Oct 2025 17:12:01 +1100 Subject: [PATCH 258/867] wifi: cfg80211: default S1G chandef width to 1MHz When management frames are passed down to be transmitted by usermode, often times the NL80211_ATTR_CHANNEL_WIDTH is not used as its implied to be transmitted on the control width. This can lead to errors during chandef validation as the offsets from the channel center are wrong. Ensure we initialise S1G chandefs to a width of 1MHz rather then 20MHz. Signed-off-by: Lachlan Hodges Link: https://patch.msgid.link/20251021061201.235754-1-lachlan.hodges@morsemicro.com Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 346dfd2bd9879..ceca47cd9e251 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3544,6 +3544,9 @@ static int _nl80211_parse_chandef(struct cfg80211_registered_device *rdev, return -EINVAL; } + if (cfg80211_chandef_is_s1g(chandef)) + chandef->width = NL80211_CHAN_WIDTH_1; + if (attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) { enum nl80211_channel_type chantype; From cc18fffa3a51792637169872886df3407bd5bb84 Mon Sep 17 00:00:00 2001 From: Sarika Sharma Date: Fri, 24 Oct 2025 10:06:27 +0530 Subject: [PATCH 259/867] wifi: mac80211: fix missing RX bitrate update for mesh forwarding path Currently, RX bitrate statistics are not updated for packets received on the mesh forwarding path during fast RX processing. This results in incomplete RX rate tracking in station dump outputs for mesh scenarios. Update ieee80211_invoke_fast_rx() to record the RX rate using sta_stats_encode_rate() and store it in the last_rate field of ieee80211_sta_rx_stats when RX_QUEUED is returned from ieee80211_rx_mesh_data(). This ensures that RX bitrate is properly accounted for in both RSS and non-RSS paths. Signed-off-by: Sarika Sharma Link: https://patch.msgid.link/20251024043627.1640447-1-sarika.sharma@oss.qualcomm.com Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 29175a0c9f688..4641a2a80856a 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4952,6 +4952,11 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, /* after this point, don't punt to the slowpath! */ + if (fast_rx->uses_rss) + stats = this_cpu_ptr(rx->link_sta->pcpu_rx_stats); + else + stats = &rx->link_sta->rx_stats; + if (rx->key && !(status->flag & RX_FLAG_MIC_STRIPPED) && pskb_trim(skb, skb->len - fast_rx->icv_len)) goto drop; @@ -4986,6 +4991,8 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, res = ieee80211_rx_mesh_data(rx->sdata, rx->sta, rx->skb); switch (res) { case RX_QUEUED: + stats->last_rx = jiffies; + stats->last_rate = sta_stats_encode_rate(status); return true; case RX_CONTINUE: break; @@ -4999,11 +5006,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, drop: dev_kfree_skb(skb); - if (fast_rx->uses_rss) - stats = this_cpu_ptr(rx->link_sta->pcpu_rx_stats); - else - stats = &rx->link_sta->rx_stats; - stats->dropped++; return true; } From 7cc986c04a9b07d91684f7e326fa5b960215bc97 Mon Sep 17 00:00:00 2001 From: Roopni Devanathan Date: Fri, 24 Oct 2025 10:16:48 +0530 Subject: [PATCH 260/867] wifi: cfg80211: Add debugfs support for multi-radio wiphy In multi-radio wiphy architecture, where a single wiphy can have multiple radios tied to it, radio specific configuration parameters and global wiphy parameters are maintained for the entire physical device and common to all radios. But, each radio in a wiphy can have different values for each radio configuration parameter, like RTS threshold. With the current debugfs directory structure, the values of global wiphy configuration parameters can be viewed, but, values of individual radio configuration parameters cannot be viewed, as radio specific configuration parameters are not maintained, separately. To address this, in addition to maintaining global wiphy configuration parameters common to all radios, create separate debugfs directories for each radio in a wiphy to maintain parameters corresponding to that radio in this directory. In implementation, maintain a dentry structure in wiphy_radio_cfg, a structure containing radio configurations of a wiphy. This struct is maintained to denote per-radio configurations of a wiphy. Create separate directories representing each radio within phy#X directory in debugfs during wiphy registration. Sample directory structure with this change: ls /sys/kernel/debug/ieee80211/phy0/radio radio0/ radio1/ radio2/ Signed-off-by: Roopni Devanathan Link: https://patch.msgid.link/20251024044649.483557-2-quic_rdevanat@quicinc.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++++ net/wireless/core.c | 15 +++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 3b6f48a783bb2..53490eb04e87b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5684,9 +5684,13 @@ struct wiphy_iftype_akm_suites { * * @rts_threshold: RTS threshold (dot11RTSThreshold); * -1 (default) = RTS/CTS disabled + * @radio_debugfsdir: Pointer to debugfs directory containing the radio- + * specific parameters. + * NULL (default) = Debugfs directory not created */ struct wiphy_radio_cfg { u32 rts_threshold; + struct dentry *radio_debugfsdir; }; /** diff --git a/net/wireless/core.c b/net/wireless/core.c index 797f9f2004a69..f3568eb5e5922 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -34,6 +34,9 @@ /* name for sysfs, %d is appended */ #define PHY_NAME "phy" +/* maximum length of radio debugfs directory name */ +#define RADIO_DEBUGFSDIR_MAX_LEN 8 + MODULE_AUTHOR("Johannes Berg"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("wireless configuration support"); @@ -1042,6 +1045,18 @@ int wiphy_register(struct wiphy *wiphy) /* add to debugfs */ rdev->wiphy.debugfsdir = debugfs_create_dir(wiphy_name(&rdev->wiphy), ieee80211_debugfs_dir); + if (wiphy->n_radio > 0) { + int idx; + char radio_name[RADIO_DEBUGFSDIR_MAX_LEN]; + + for (idx = 0; idx < wiphy->n_radio; idx++) { + scnprintf(radio_name, sizeof(radio_name), "radio%d", + idx); + wiphy->radio_cfg[idx].radio_debugfsdir = + debugfs_create_dir(radio_name, + rdev->wiphy.debugfsdir); + } + } cfg80211_debugfs_rdev_add(rdev); nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY); From 88de08348af8ce15dc563e0ebb5553eddd821c06 Mon Sep 17 00:00:00 2001 From: Roopni Devanathan Date: Fri, 24 Oct 2025 10:16:49 +0530 Subject: [PATCH 261/867] wifi: cfg80211: Add parameters to radio-specific debugfs directories In multi-radio wiphy architecture, where a single wiphy can have multiple radios tied to it, radio specific configuration parameters and global wiphy parameters are maintained for the entire physical device and common to all radios. But, each radio in a wiphy can have different values for each radio configuration parameter like RTS threshold. With the current debugfs directory structure, the values of global wiphy configuration parameters can be viewed, but, values of individual radio configuration parameters cannot be viewed. To address this requirement, maintain separate entries of each radio configuration parameter i.e., RTS threshold in corresponding radio- specific debugfs directory. This way, radio-specific configuration parameters can be maintained along with global wiphy configuration parameters. Whenever the values are changed for one radio, the values for rest of the radios in the wiphy and the global wiphy parameter value will remain intact. Sample output: /# iw phy#0 set rts 100 radio 1 /# iw phy#0 set rts 468 radio 0 /# cat /sys/kernel/debug/ieee80211/phy0/rts_threshold -1 /# cat /sys/kernel/debug/ieee80211/phy0/radio0/radio_rts_threshold 468 /# cat /sys/kernel/debug/ieee80211/phy0/radio1/radio_rts_threshold 100 /# iw phy#0 set rts 500 /# cat /sys/kernel/debug/ieee80211/phy0/rts_threshold 500 /# cat /sys/kernel/debug/ieee80211/phy0/radio0/radio_rts_threshold 500 /# cat /sys/kernel/debug/ieee80211/phy0/radio1/radio_rts_threshold 500 Signed-off-by: Roopni Devanathan Link: https://patch.msgid.link/20251024044649.483557-3-quic_rdevanat@quicinc.com Signed-off-by: Johannes Berg --- net/wireless/debugfs.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c index 40e49074e2eeb..f9e7fff1ef257 100644 --- a/net/wireless/debugfs.c +++ b/net/wireless/debugfs.c @@ -29,6 +29,24 @@ static const struct file_operations name## _ops = { \ .llseek = generic_file_llseek, \ } +#define DEBUGFS_RADIO_READONLY_FILE(name, buflen, fmt, value...) \ +static ssize_t name## _read(struct file *file, char __user *userbuf, \ + size_t count, loff_t *ppos) \ +{ \ + struct wiphy_radio_cfg *radio_cfg = file->private_data; \ + char buf[buflen]; \ + int res; \ + \ + res = scnprintf(buf, buflen, fmt "\n", ##value); \ + return simple_read_from_buffer(userbuf, count, ppos, buf, res); \ +} \ + \ +static const struct file_operations name## _ops = { \ + .read = name## _read, \ + .open = simple_open, \ + .llseek = generic_file_llseek, \ +} + DEBUGFS_READONLY_FILE(rts_threshold, 20, "%d", wiphy->rts_threshold); DEBUGFS_READONLY_FILE(fragmentation_threshold, 20, "%d", @@ -38,6 +56,9 @@ DEBUGFS_READONLY_FILE(short_retry_limit, 20, "%d", DEBUGFS_READONLY_FILE(long_retry_limit, 20, "%d", wiphy->retry_long); +DEBUGFS_RADIO_READONLY_FILE(radio_rts_threshold, 20, "%d", + radio_cfg->rts_threshold); + static int ht_print_chan(struct ieee80211_channel *chan, char *buf, int buf_size, int offset) { @@ -100,15 +121,27 @@ static const struct file_operations ht40allow_map_ops = { #define DEBUGFS_ADD(name) \ debugfs_create_file(#name, 0444, phyd, &rdev->wiphy, &name## _ops) +#define DEBUGFS_RADIO_ADD(name, radio_idx) \ + debugfs_create_file(#name, 0444, radiod, \ + &rdev->wiphy.radio_cfg[radio_idx], \ + &name## _ops) + void cfg80211_debugfs_rdev_add(struct cfg80211_registered_device *rdev) { struct dentry *phyd = rdev->wiphy.debugfsdir; + struct dentry *radiod; + u8 i; DEBUGFS_ADD(rts_threshold); DEBUGFS_ADD(fragmentation_threshold); DEBUGFS_ADD(short_retry_limit); DEBUGFS_ADD(long_retry_limit); DEBUGFS_ADD(ht40allow_map); + + for (i = 0; i < rdev->wiphy.n_radio; i++) { + radiod = rdev->wiphy.radio_cfg[i].radio_debugfsdir; + DEBUGFS_RADIO_ADD(radio_rts_threshold, i); + } } struct debugfs_read_work { From c4b67b514af8c2d73c64b36e0cd99e9b26b9ac82 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Thu, 16 Oct 2025 19:40:48 +0800 Subject: [PATCH 262/867] RDMA/hns: Fix recv CQ and QP cache affinity Currently driver enforces affinity between QP cache and send CQ cache, which helps improve the performance of sending, but doesn't set affinity with recv CQ cache, resulting in suboptimal performance of receiving. Use one CQ bank per context to ensure the affinity among QP, send CQ and recv CQ. For kernel ULP, CQ bank is fixed to 0. Fixes: 9e03dbea2b06 ("RDMA/hns: Fix CQ and QP cache affinity") Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20251016114051.1963197-2-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_cq.c | 58 +++++++++++++++++++-- drivers/infiniband/hw/hns/hns_roce_device.h | 4 ++ drivers/infiniband/hw/hns/hns_roce_main.c | 4 ++ 3 files changed, 63 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 3a5c93c9fb3e6..6aa82fe9dd3df 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include #include #include #include "hns_roce_device.h" @@ -37,6 +38,43 @@ #include "hns_roce_hem.h" #include "hns_roce_common.h" +void hns_roce_put_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + + if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09) + return; + + mutex_lock(&cq_table->bank_mutex); + cq_table->ctx_num[uctx->cq_bank_id]--; + mutex_unlock(&cq_table->bank_mutex); +} + +void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + u32 least_load = cq_table->ctx_num[0]; + u8 bankid = 0; + u8 i; + + if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09) + return; + + mutex_lock(&cq_table->bank_mutex); + for (i = 1; i < HNS_ROCE_CQ_BANK_NUM; i++) { + if (cq_table->ctx_num[i] < least_load) { + least_load = cq_table->ctx_num[i]; + bankid = i; + } + } + cq_table->ctx_num[bankid]++; + mutex_unlock(&cq_table->bank_mutex); + + uctx->cq_bank_id = bankid; +} + static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank) { u32 least_load = bank[0].inuse; @@ -55,7 +93,21 @@ static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank) return bankid; } -static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) +static u8 select_cq_bankid(struct hns_roce_dev *hr_dev, + struct hns_roce_bank *bank, struct ib_udata *udata) +{ + struct hns_roce_ucontext *uctx = udata ? + rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, + ibucontext) : NULL; + + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return uctx ? uctx->cq_bank_id : 0; + + return get_least_load_bankid_for_cq(bank); +} + +static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, + struct ib_udata *udata) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct hns_roce_bank *bank; @@ -63,7 +115,7 @@ static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) int id; mutex_lock(&cq_table->bank_mutex); - bankid = get_least_load_bankid_for_cq(cq_table->bank); + bankid = select_cq_bankid(hr_dev, cq_table->bank, udata); bank = &cq_table->bank[bankid]; id = ida_alloc_range(&bank->ida, bank->min, bank->max, GFP_KERNEL); @@ -396,7 +448,7 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, goto err_cq_buf; } - ret = alloc_cqn(hr_dev, hr_cq); + ret = alloc_cqn(hr_dev, hr_cq, udata); if (ret) { ibdev_err(ibdev, "failed to alloc CQN, ret = %d.\n", ret); goto err_cq_db; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 78ee04a48a74a..06832c0ac0556 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -217,6 +217,7 @@ struct hns_roce_ucontext { struct mutex page_mutex; struct hns_user_mmap_entry *db_mmap_entry; u32 config; + u8 cq_bank_id; }; struct hns_roce_pd { @@ -495,6 +496,7 @@ struct hns_roce_cq_table { struct hns_roce_hem_table table; struct hns_roce_bank bank[HNS_ROCE_CQ_BANK_NUM]; struct mutex bank_mutex; + u32 ctx_num[HNS_ROCE_CQ_BANK_NUM]; }; struct hns_roce_srq_table { @@ -1305,5 +1307,7 @@ hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, size_t length, enum hns_roce_mmap_type mmap_type); bool check_sl_valid(struct hns_roce_dev *hr_dev, u8 sl); +void hns_roce_put_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx); +void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx); #endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index d50f36f8a1107..f3607fe107a7f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -425,6 +425,8 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, if (ret) goto error_fail_copy_to_udata; + hns_roce_get_cq_bankid_for_uctx(context); + return 0; error_fail_copy_to_udata: @@ -447,6 +449,8 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext); struct hns_roce_dev *hr_dev = to_hr_dev(ibcontext->device); + hns_roce_put_cq_bankid_for_uctx(context); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) mutex_destroy(&context->page_mutex); From f5a7cbea5411668d429eb4ffe96c4063fe8dac9e Mon Sep 17 00:00:00 2001 From: wenglianfa Date: Thu, 16 Oct 2025 19:40:49 +0800 Subject: [PATCH 263/867] RDMA/hns: Fix the modification of max_send_sge The actual sge number may exceed the value specified in init_attr->cap when HW needs extra sge to enable inline feature. Since these extra sges are not expected by ULP, return the user-specified value to ULP instead of the expanded sge number. Fixes: 0c5e259b06a8 ("RDMA/hns: Fix incorrect sge nums calculation") Signed-off-by: wenglianfa Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20251016114051.1963197-3-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_qp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 6ff1b8ce580c5..bdd879ac12dda 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -662,7 +662,6 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, hr_qp->sq.wqe_shift = ucmd->log_sq_stride; hr_qp->sq.wqe_cnt = cnt; - cap->max_send_sge = hr_qp->sq.max_gs; return 0; } @@ -744,7 +743,6 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, /* sync the parameters of kernel QP to user's configuration */ cap->max_send_wr = cnt; - cap->max_send_sge = hr_qp->sq.max_gs; return 0; } From fe9622011f955e35ba84d3af7b2f2fed31cf8ca1 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Thu, 16 Oct 2025 19:40:50 +0800 Subject: [PATCH 264/867] RDMA/hns: Fix wrong WQE data when QP wraps around When QP wraps around, WQE data from the previous use at the same position still remains as driver does not clear it. The WQE field layout differs across different opcodes, causing that the fields that are not explicitly assigned for the current opcode retain stale values, and are issued to HW by mistake. Such fields are as follows: * MSG_START_SGE_IDX field in ATOMIC WQE * BLOCK_SIZE and ZBVA fields in FRMR WQE * DirectWQE fields when DirectWQE not used For ATOMIC WQE, always set the latest sge index in MSG_START_SGE_IDX as required by HW. For FRMR WQE and DirectWQE, clear only those unassigned fields instead of the entire WQE to avoid performance penalty. Fixes: 68a997c5d28c ("RDMA/hns: Add FRMR support for hip08") Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20251016114051.1963197-4-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f82bdd46a9174..ab378525b296a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -165,6 +165,8 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, hr_reg_write(fseg, FRMR_PBL_BUF_PG_SZ, to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); hr_reg_clear(fseg, FRMR_BLK_MODE); + hr_reg_clear(fseg, FRMR_BLOCK_SIZE); + hr_reg_clear(fseg, FRMR_ZBVA); } static void set_atomic_seg(const struct ib_send_wr *wr, @@ -339,9 +341,6 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, int j = 0; int i; - hr_reg_write(rc_sq_wqe, RC_SEND_WQE_MSG_START_SGE_IDX, - (*sge_ind) & (qp->sge.sge_cnt - 1)); - hr_reg_write(rc_sq_wqe, RC_SEND_WQE_INLINE, !!(wr->send_flags & IB_SEND_INLINE)); if (wr->send_flags & IB_SEND_INLINE) @@ -586,6 +585,9 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, hr_reg_write(rc_sq_wqe, RC_SEND_WQE_CQE, (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_MSG_START_SGE_IDX, + curr_idx & (qp->sge.sge_cnt - 1)); + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { if (msg_len != ATOMIC_WR_LEN) @@ -734,6 +736,9 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, owner_bit = ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1); + /* RC and UD share the same DirectWQE field layout */ + ((struct hns_roce_v2_rc_send_wqe *)wqe)->byte_4 = 0; + /* Corresponding to the QP type, wqe process separately */ if (ibqp->qp_type == IB_QPT_RC) ret = set_rc_wqe(qp, wr, wqe, &sge_idx, owner_bit); From b8c9aab4c738e5e9814915768ac6c184fe36ab93 Mon Sep 17 00:00:00 2001 From: Guofeng Yue Date: Thu, 16 Oct 2025 19:40:51 +0800 Subject: [PATCH 265/867] RDMA/hns: Remove an extra blank line Remove an extra blank line. Signed-off-by: Guofeng Yue Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20251016114051.1963197-5-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index ab378525b296a..63052c0e76133 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -7053,7 +7053,6 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) goto error_failed_roce_init; } - handle->priv = hr_dev; return 0; From 2469bb6a6af944755a7d7daf66be90f3b8decbf9 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Mon, 27 Oct 2025 09:49:12 +0800 Subject: [PATCH 266/867] Revert "wifi: ath10k: avoid unnecessary wait for service ready message" This reverts commit 51a73f1b2e56b0324b4a3bb8cebc4221b5be4c7a. Although this commit benefits QCA6174, it breaks QCA988x and QCA9984 [1][2]. Since it is not likely to root cause/fix this issue in a short time, revert it to get those chips back. Compile tested only. Fixes: 51a73f1b2e56 ("wifi: ath10k: avoid unnecessary wait for service ready message") Link: https://lore.kernel.org/ath10k/6d41bc00602c33ffbf68781f563ff2e6c6915a3e.camel@gmail.com # [1] Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220671 # [2] Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20251027-ath10k-revert-polling-first-change-v1-1-89aaf3bcbfa1@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath10k/wmi.c | 39 ++++++++++++++------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index b3b00d324075b..b4aad6604d6d9 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -1764,32 +1764,33 @@ void ath10k_wmi_put_wmi_channel(struct ath10k *ar, struct wmi_channel *ch, int ath10k_wmi_wait_for_service_ready(struct ath10k *ar) { - unsigned long timeout = jiffies + WMI_SERVICE_READY_TIMEOUT_HZ; unsigned long time_left, i; - /* Sometimes the PCI HIF doesn't receive interrupt - * for the service ready message even if the buffer - * was completed. PCIe sniffer shows that it's - * because the corresponding CE ring doesn't fires - * it. Workaround here by polling CE rings. Since - * the message could arrive at any time, continue - * polling until timeout. - */ - do { + time_left = wait_for_completion_timeout(&ar->wmi.service_ready, + WMI_SERVICE_READY_TIMEOUT_HZ); + if (!time_left) { + /* Sometimes the PCI HIF doesn't receive interrupt + * for the service ready message even if the buffer + * was completed. PCIe sniffer shows that it's + * because the corresponding CE ring doesn't fires + * it. Workaround here by polling CE rings once. + */ + ath10k_warn(ar, "failed to receive service ready completion, polling..\n"); + for (i = 0; i < CE_COUNT; i++) ath10k_hif_send_complete_check(ar, i, 1); - /* The 100 ms granularity is a tradeoff considering scheduler - * overhead and response latency - */ time_left = wait_for_completion_timeout(&ar->wmi.service_ready, - msecs_to_jiffies(100)); - if (time_left) - return 0; - } while (time_before(jiffies, timeout)); + WMI_SERVICE_READY_TIMEOUT_HZ); + if (!time_left) { + ath10k_warn(ar, "polling timed out\n"); + return -ETIMEDOUT; + } + + ath10k_warn(ar, "service ready completion received, continuing normally\n"); + } - ath10k_warn(ar, "failed to receive service ready completion\n"); - return -ETIMEDOUT; + return 0; } int ath10k_wmi_wait_for_unified_ready(struct ath10k *ar) From 82cb5be6ad64198a3a028aeb49dcc7f6224d558a Mon Sep 17 00:00:00 2001 From: Wilfred Mallawa Date: Wed, 22 Oct 2025 10:19:36 +1000 Subject: [PATCH 267/867] net/tls: support setting the maximum payload size During a handshake, an endpoint may specify a maximum record size limit. Currently, the kernel defaults to TLS_MAX_PAYLOAD_SIZE (16KB) for the maximum record size. Meaning that, the outgoing records from the kernel can exceed a lower size negotiated during the handshake. In such a case, the TLS endpoint must send a fatal "record_overflow" alert [1], and thus the record is discarded. Upcoming Western Digital NVMe-TCP hardware controllers implement TLS support. For these devices, supporting TLS record size negotiation is necessary because the maximum TLS record size supported by the controller is less than the default 16KB currently used by the kernel. Currently, there is no way to inform the kernel of such a limit. This patch adds support to a new setsockopt() option `TLS_TX_MAX_PAYLOAD_LEN` that allows for setting the maximum plaintext fragment size. Once set, outgoing records are no larger than the size specified. This option can be used to specify the record size limit. [1] https://www.rfc-editor.org/rfc/rfc8449 Signed-off-by: Wilfred Mallawa Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20251022001937.20155-1-wilfred.opensource@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/networking/tls.rst | 20 ++++++++++ include/net/tls.h | 3 ++ include/uapi/linux/tls.h | 2 + net/tls/tls_device.c | 2 +- net/tls/tls_main.c | 64 ++++++++++++++++++++++++++++++++ net/tls/tls_sw.c | 2 +- 6 files changed, 91 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/tls.rst b/Documentation/networking/tls.rst index 36cc7afc2527d..980c442d7161a 100644 --- a/Documentation/networking/tls.rst +++ b/Documentation/networking/tls.rst @@ -280,6 +280,26 @@ If the record decrypted turns out to had been padded or is not a data record it will be decrypted again into a kernel buffer without zero copy. Such events are counted in the ``TlsDecryptRetry`` statistic. +TLS_TX_MAX_PAYLOAD_LEN +~~~~~~~~~~~~~~~~~~~~~~ + +Specifies the maximum size of the plaintext payload for transmitted TLS records. + +When this option is set, the kernel enforces the specified limit on all outgoing +TLS records. No plaintext fragment will exceed this size. This option can be used +to implement the TLS Record Size Limit extension [1]. + +* For TLS 1.2, the value corresponds directly to the record size limit. +* For TLS 1.3, the value should be set to record_size_limit - 1, since + the record size limit includes one additional byte for the ContentType + field. + +The valid range for this option is 64 to 16384 bytes for TLS 1.2, and 63 to +16384 bytes for TLS 1.3. The lower minimum for TLS 1.3 accounts for the +extra byte used by the ContentType field. + +[1] https://datatracker.ietf.org/doc/html/rfc8449 + Statistics ========== diff --git a/include/net/tls.h b/include/net/tls.h index 857340338b694..f2af113728aae 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -53,6 +53,8 @@ struct tls_rec; /* Maximum data size carried in a TLS record */ #define TLS_MAX_PAYLOAD_SIZE ((size_t)1 << 14) +/* Minimum record size limit as per RFC8449 */ +#define TLS_MIN_RECORD_SIZE_LIM ((size_t)1 << 6) #define TLS_HEADER_SIZE 5 #define TLS_NONCE_OFFSET TLS_HEADER_SIZE @@ -226,6 +228,7 @@ struct tls_context { u8 rx_conf:3; u8 zerocopy_sendfile:1; u8 rx_no_pad:1; + u16 tx_max_payload_len; int (*push_pending_record)(struct sock *sk, int flags); void (*sk_write_space)(struct sock *sk); diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h index b66a800389cc0..b8b9c42f848c8 100644 --- a/include/uapi/linux/tls.h +++ b/include/uapi/linux/tls.h @@ -41,6 +41,7 @@ #define TLS_RX 2 /* Set receive parameters */ #define TLS_TX_ZEROCOPY_RO 3 /* TX zerocopy (only sendfile now) */ #define TLS_RX_EXPECT_NO_PAD 4 /* Attempt opportunistic zero-copy */ +#define TLS_TX_MAX_PAYLOAD_LEN 5 /* Maximum plaintext size */ /* Supported versions */ #define TLS_VERSION_MINOR(ver) ((ver) & 0xFF) @@ -194,6 +195,7 @@ enum { TLS_INFO_RXCONF, TLS_INFO_ZC_RO_TX, TLS_INFO_RX_NO_PAD, + TLS_INFO_TX_MAX_PAYLOAD_LEN, __TLS_INFO_MAX, }; #define TLS_INFO_MAX (__TLS_INFO_MAX - 1) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index caa2b5d246223..4d29b390aed90 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -462,7 +462,7 @@ static int tls_push_data(struct sock *sk, /* TLS_HEADER_SIZE is not counted as part of the TLS record, and * we need to leave room for an authentication tag. */ - max_open_record_len = TLS_MAX_PAYLOAD_SIZE + + max_open_record_len = tls_ctx->tx_max_payload_len + prot->prepend_size; do { rc = tls_do_allocation(sk, ctx, pfrag, prot->prepend_size); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 39a2ab47fe720..56ce0bc8317b1 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -541,6 +541,28 @@ static int do_tls_getsockopt_no_pad(struct sock *sk, char __user *optval, return 0; } +static int do_tls_getsockopt_tx_payload_len(struct sock *sk, char __user *optval, + int __user *optlen) +{ + struct tls_context *ctx = tls_get_ctx(sk); + u16 payload_len = ctx->tx_max_payload_len; + int len; + + if (get_user(len, optlen)) + return -EFAULT; + + if (len < sizeof(payload_len)) + return -EINVAL; + + if (put_user(sizeof(payload_len), optlen)) + return -EFAULT; + + if (copy_to_user(optval, &payload_len, sizeof(payload_len))) + return -EFAULT; + + return 0; +} + static int do_tls_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) { @@ -560,6 +582,9 @@ static int do_tls_getsockopt(struct sock *sk, int optname, case TLS_RX_EXPECT_NO_PAD: rc = do_tls_getsockopt_no_pad(sk, optval, optlen); break; + case TLS_TX_MAX_PAYLOAD_LEN: + rc = do_tls_getsockopt_tx_payload_len(sk, optval, optlen); + break; default: rc = -ENOPROTOOPT; break; @@ -809,6 +834,32 @@ static int do_tls_setsockopt_no_pad(struct sock *sk, sockptr_t optval, return rc; } +static int do_tls_setsockopt_tx_payload_len(struct sock *sk, sockptr_t optval, + unsigned int optlen) +{ + struct tls_context *ctx = tls_get_ctx(sk); + struct tls_sw_context_tx *sw_ctx = tls_sw_ctx_tx(ctx); + u16 value; + bool tls_13 = ctx->prot_info.version == TLS_1_3_VERSION; + + if (sw_ctx && sw_ctx->open_rec) + return -EBUSY; + + if (sockptr_is_null(optval) || optlen != sizeof(value)) + return -EINVAL; + + if (copy_from_sockptr(&value, optval, sizeof(value))) + return -EFAULT; + + if (value < TLS_MIN_RECORD_SIZE_LIM - (tls_13 ? 1 : 0) || + value > TLS_MAX_PAYLOAD_SIZE) + return -EINVAL; + + ctx->tx_max_payload_len = value; + + return 0; +} + static int do_tls_setsockopt(struct sock *sk, int optname, sockptr_t optval, unsigned int optlen) { @@ -830,6 +881,11 @@ static int do_tls_setsockopt(struct sock *sk, int optname, sockptr_t optval, case TLS_RX_EXPECT_NO_PAD: rc = do_tls_setsockopt_no_pad(sk, optval, optlen); break; + case TLS_TX_MAX_PAYLOAD_LEN: + lock_sock(sk); + rc = do_tls_setsockopt_tx_payload_len(sk, optval, optlen); + release_sock(sk); + break; default: rc = -ENOPROTOOPT; break; @@ -1019,6 +1075,7 @@ static int tls_init(struct sock *sk) ctx->tx_conf = TLS_BASE; ctx->rx_conf = TLS_BASE; + ctx->tx_max_payload_len = TLS_MAX_PAYLOAD_SIZE; update_sk_prot(sk, ctx); out: write_unlock_bh(&sk->sk_callback_lock); @@ -1108,6 +1165,12 @@ static int tls_get_info(struct sock *sk, struct sk_buff *skb, bool net_admin) goto nla_failure; } + err = nla_put_u16(skb, TLS_INFO_TX_MAX_PAYLOAD_LEN, + ctx->tx_max_payload_len); + + if (err) + goto nla_failure; + rcu_read_unlock(); nla_nest_end(skb, start); return 0; @@ -1129,6 +1192,7 @@ static size_t tls_get_info_size(const struct sock *sk, bool net_admin) nla_total_size(sizeof(u16)) + /* TLS_INFO_TXCONF */ nla_total_size(0) + /* TLS_INFO_ZC_RO_TX */ nla_total_size(0) + /* TLS_INFO_RX_NO_PAD */ + nla_total_size(sizeof(u16)) + /* TLS_INFO_TX_MAX_PAYLOAD_LEN */ 0; return size; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index d171353699800..9937d4c810f2b 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1079,7 +1079,7 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg, orig_size = msg_pl->sg.size; full_record = false; try_to_copy = msg_data_left(msg); - record_room = TLS_MAX_PAYLOAD_SIZE - msg_pl->sg.size; + record_room = tls_ctx->tx_max_payload_len - msg_pl->sg.size; if (try_to_copy >= record_room) { try_to_copy = record_room; full_record = true; From 5f30bc470672f7b38a60d6641d519f308723085c Mon Sep 17 00:00:00 2001 From: Wilfred Mallawa Date: Wed, 22 Oct 2025 10:19:37 +1000 Subject: [PATCH 268/867] selftests: tls: add tls record_size_limit test Test that outgoing plaintext records respect the tls TLS_TX_MAX_PAYLOAD_LEN set using setsockopt(). The limit is set to be 128, thus, in all received records, the plaintext must not exceed this amount. Also test that setting a new record size limit whilst a pending open record exists is handled correctly by discarding the request. Suggested-by: Sabrina Dubroca Signed-off-by: Wilfred Mallawa Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20251022001937.20155-2-wilfred.opensource@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 141 ++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 5c6d8215021c9..da1b50b307194 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -2856,6 +2856,147 @@ TEST_F(tls_err, oob_pressure) EXPECT_EQ(send(self->fd2, buf, 5, MSG_OOB), 5); } +/* + * Parse a stream of TLS records and ensure that each record respects + * the specified @max_payload_len. + */ +static size_t parse_tls_records(struct __test_metadata *_metadata, + const __u8 *rx_buf, int rx_len, int overhead, + __u16 max_payload_len) +{ + const __u8 *rec = rx_buf; + size_t total_plaintext_rx = 0; + const __u8 rec_header_len = 5; + + while (rec < rx_buf + rx_len) { + __u16 record_payload_len; + __u16 plaintext_len; + + /* Sanity check that it's a TLS header for application data */ + ASSERT_EQ(rec[0], 23); + ASSERT_EQ(rec[1], 0x3); + ASSERT_EQ(rec[2], 0x3); + + memcpy(&record_payload_len, rec + 3, 2); + record_payload_len = ntohs(record_payload_len); + ASSERT_GE(record_payload_len, overhead); + + plaintext_len = record_payload_len - overhead; + total_plaintext_rx += plaintext_len; + + /* Plaintext must not exceed the specified limit */ + ASSERT_LE(plaintext_len, max_payload_len); + rec += rec_header_len + record_payload_len; + } + + return total_plaintext_rx; +} + +TEST(tls_12_tx_max_payload_len) +{ + struct tls_crypto_info_keys tls12; + int cfd, ret, fd, overhead; + size_t total_plaintext_rx = 0; + __u8 tx[1024], rx[2000]; + __u16 limit = 128; + __u16 opt = 0; + unsigned int optlen = sizeof(opt); + bool notls; + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128, + &tls12, 0); + + ulp_sock_pair(_metadata, &fd, &cfd, ¬ls); + + if (notls) + exit(KSFT_SKIP); + + /* Don't install keys on fd, we'll parse raw records */ + ret = setsockopt(cfd, SOL_TLS, TLS_TX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &limit, + sizeof(limit)); + ASSERT_EQ(ret, 0); + + ret = getsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &opt, &optlen); + EXPECT_EQ(ret, 0); + EXPECT_EQ(limit, opt); + EXPECT_EQ(optlen, sizeof(limit)); + + memset(tx, 0, sizeof(tx)); + ASSERT_EQ(send(cfd, tx, sizeof(tx), 0), sizeof(tx)); + close(cfd); + + ret = recv(fd, rx, sizeof(rx), 0); + + /* + * 16B tag + 8B IV -- record header (5B) is not counted but we'll + * need it to walk the record stream + */ + overhead = 16 + 8; + total_plaintext_rx = parse_tls_records(_metadata, rx, ret, overhead, + limit); + + ASSERT_EQ(total_plaintext_rx, sizeof(tx)); + close(fd); +} + +TEST(tls_12_tx_max_payload_len_open_rec) +{ + struct tls_crypto_info_keys tls12; + int cfd, ret, fd, overhead; + size_t total_plaintext_rx = 0; + __u8 tx[1024], rx[2000]; + __u16 tx_partial = 256; + __u16 og_limit = 512, limit = 128; + bool notls; + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128, + &tls12, 0); + + ulp_sock_pair(_metadata, &fd, &cfd, ¬ls); + + if (notls) + exit(KSFT_SKIP); + + /* Don't install keys on fd, we'll parse raw records */ + ret = setsockopt(cfd, SOL_TLS, TLS_TX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &og_limit, + sizeof(og_limit)); + ASSERT_EQ(ret, 0); + + memset(tx, 0, sizeof(tx)); + ASSERT_EQ(send(cfd, tx, tx_partial, MSG_MORE), tx_partial); + + /* + * Changing the payload limit with a pending open record should + * not be allowed. + */ + ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &limit, + sizeof(limit)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EBUSY); + + ASSERT_EQ(send(cfd, tx + tx_partial, sizeof(tx) - tx_partial, MSG_EOR), + sizeof(tx) - tx_partial); + close(cfd); + + ret = recv(fd, rx, sizeof(rx), 0); + + /* + * 16B tag + 8B IV -- record header (5B) is not counted but we'll + * need it to walk the record stream + */ + overhead = 16 + 8; + total_plaintext_rx = parse_tls_records(_metadata, rx, ret, overhead, + og_limit); + ASSERT_EQ(total_plaintext_rx, sizeof(tx)); + close(fd); +} + TEST(non_established) { struct tls12_crypto_info_aes_gcm_256 tls12; struct sockaddr_in addr; From 68800bbf583f26f71491141e4b3c8582f9cfcbde Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 23 Oct 2025 16:45:37 +0200 Subject: [PATCH 269/867] net: bridge: Flush multicast groups when snooping is disabled When forwarding multicast packets, the bridge takes MDB into account when IGMP / MLD snooping is enabled. Currently, when snooping is disabled, the MDB is retained, even though it is not used anymore. At the same time, during the time that snooping is disabled, the IGMP / MLD control packets are obviously ignored, and after the snooping is reenabled, the administrator has to assume it is out of sync. In particular, missed join and leave messages would lead to traffic being forwarded to wrong interfaces. Keeping the MDB entries around thus serves no purpose, and just takes memory. Note also that disabling per-VLAN snooping does actually flush the relevant MDB entries. This patch flushes non-permanent MDB entries as global snooping is disabled. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/5e992df1bb93b88e19c0ea5819e23b669e3dde5d.1761228273.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 22d12e5459668..d55a4ab87837f 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -4649,6 +4649,14 @@ static void br_multicast_start_querier(struct net_bridge_mcast *brmctx, rcu_read_unlock(); } +static void br_multicast_del_grps(struct net_bridge *br) +{ + struct net_bridge_port *port; + + list_for_each_entry(port, &br->port_list, list) + __br_multicast_disable_port_ctx(&port->multicast_ctx); +} + int br_multicast_toggle(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { @@ -4669,6 +4677,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val, br_opt_toggle(br, BROPT_MULTICAST_ENABLED, !!val); if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) { change_snoopers = true; + br_multicast_del_grps(br); goto unlock; } From d10920607ffedc6b8d27c7483d9dfe0cff0b2fa8 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 23 Oct 2025 16:45:38 +0200 Subject: [PATCH 270/867] selftests: bridge_mdb: Add a test for MDB flush on snooping disable Check that non-permanent MDB entries are removed as IGMP / MLD snooping is disabled. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/9420dfbcf26c8e1134d31244e9e7d6a49d677a69.1761228273.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/bridge_mdb.sh | 100 +++++++++++++++++- 1 file changed, 98 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh index 8c1597ebc2d38..e86d779465853 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -28,6 +28,7 @@ ALL_TESTS=" cfg_test fwd_test ctrl_test + disable_test " NUM_NETIFS=4 @@ -64,7 +65,10 @@ h2_destroy() switch_create() { - ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + local vlan_filtering=$1; shift + + ip link add name br0 type bridge \ + vlan_filtering "$vlan_filtering" vlan_default_pvid 0 \ mcast_snooping 1 mcast_igmp_version 3 mcast_mld_version 2 bridge vlan add vid 10 dev br0 self bridge vlan add vid 20 dev br0 self @@ -118,7 +122,7 @@ setup_prepare() h1_create h2_create - switch_create + switch_create 1 } cleanup() @@ -1357,6 +1361,98 @@ ctrl_test() ctrl_mldv2_is_in_test } +check_group() +{ + local group=$1; shift + local vid=$1; shift + local should_fail=$1; shift + local when=$1; shift + local -a vidkws + + if ((vid)); then + vidkws=(vid "$vid") + fi + + bridge mdb get dev br0 grp "$group" "${vidkws[@]}" 2>/dev/null | + grep -q "port $swp1" + check_err_fail "$should_fail" $? "$group seen $when snooping disable:" +} + +__disable_test() +{ + local vid=$1; shift + local what=$1; shift + local -a vidkws + + if ((vid)); then + vidkws=(vid "$vid") + fi + + RET=0 + + bridge mdb add dev br0 port "$swp1" grp ff0e::1 permanent \ + "${vidkws[@]}" filter_mode include source_list 2001:db8:1::1 + bridge mdb add dev br0 port "$swp1" grp ff0e::2 permanent \ + "${vidkws[@]}" filter_mode exclude + + bridge mdb add dev br0 port "$swp1" grp ff0e::3 \ + "${vidkws[@]}" filter_mode include source_list 2001:db8:1::2 + bridge mdb add dev br0 port "$swp1" grp ff0e::4 \ + "${vidkws[@]}" filter_mode exclude + + bridge mdb add dev br0 port "$swp1" grp 239.1.1.1 permanent \ + "${vidkws[@]}" filter_mode include source_list 192.0.2.1 + bridge mdb add dev br0 port "$swp1" grp 239.1.1.2 permanent \ + "${vidkws[@]}" filter_mode exclude + + bridge mdb add dev br0 port "$swp1" grp 239.1.1.3 \ + "${vidkws[@]}" filter_mode include source_list 192.0.2.2 + bridge mdb add dev br0 port "$swp1" grp 239.1.1.4 \ + "${vidkws[@]}" filter_mode exclude + + check_group ff0e::1 "$vid" 0 "before" + check_group ff0e::2 "$vid" 0 "before" + check_group ff0e::3 "$vid" 0 "before" + check_group ff0e::4 "$vid" 0 "before" + + check_group 239.1.1.1 "$vid" 0 "before" + check_group 239.1.1.2 "$vid" 0 "before" + check_group 239.1.1.3 "$vid" 0 "before" + check_group 239.1.1.4 "$vid" 0 "before" + + ip link set dev br0 type bridge mcast_snooping 0 + + check_group ff0e::1 "$vid" 0 "after" + check_group ff0e::2 "$vid" 0 "after" + check_group ff0e::3 "$vid" 1 "after" + check_group ff0e::4 "$vid" 1 "after" + + check_group 239.1.1.1 "$vid" 0 "after" + check_group 239.1.1.2 "$vid" 0 "after" + check_group 239.1.1.3 "$vid" 1 "after" + check_group 239.1.1.4 "$vid" 1 "after" + + log_test "$what: Flush after disable" + + ip link set dev br0 type bridge mcast_snooping 1 + sleep 10 +} + +disable_test() +{ + __disable_test 10 802.1q + + switch_destroy + switch_create 0 + setup_wait + + __disable_test 0 802.1d + + switch_destroy + switch_create 1 + setup_wait +} + if ! bridge mdb help 2>&1 | grep -q "flush"; then echo "SKIP: iproute2 too old, missing bridge mdb flush support" exit $ksft_skip From 1bc80d673087e5704adbb3ee8e4b785c14899cce Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Thu, 23 Oct 2025 21:13:49 +0200 Subject: [PATCH 271/867] phy: mscc: Use PHY_ID_MATCH_EXACT for VSC8584, VSC8582, VSC8575, VSC856X As the PHYs VSC8584, VSC8582, VSC8575 and VSC856X exists only as rev B, we can use PHY_ID_MATCH_EXACT to match exactly on revision B of the PHY. Because of this change then there is not need the check if it is a different revision than rev B in the function vsc8584_probe() as we already know that this will never happen. These changes are a preparation for the next patch because in that patch we will make the PHYs VSC8574 and VSC8572 to use vsc8584_probe() and these PHYs have multiple revision. Reviewed-by: Maxime Chevallier Signed-off-by: Horatiu Vultur Link: https://patch.msgid.link/20251023191350.190940-2-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mscc/mscc.h | 8 ++++---- drivers/net/phy/mscc/mscc_main.c | 23 ++++------------------- 2 files changed, 8 insertions(+), 23 deletions(-) diff --git a/drivers/net/phy/mscc/mscc.h b/drivers/net/phy/mscc/mscc.h index 2d8eca54c40a2..2eef5956b9cc5 100644 --- a/drivers/net/phy/mscc/mscc.h +++ b/drivers/net/phy/mscc/mscc.h @@ -289,12 +289,12 @@ enum rgmii_clock_delay { #define PHY_ID_VSC8540 0x00070760 #define PHY_ID_VSC8541 0x00070770 #define PHY_ID_VSC8552 0x000704e0 -#define PHY_ID_VSC856X 0x000707e0 +#define PHY_ID_VSC856X 0x000707e1 #define PHY_ID_VSC8572 0x000704d0 #define PHY_ID_VSC8574 0x000704a0 -#define PHY_ID_VSC8575 0x000707d0 -#define PHY_ID_VSC8582 0x000707b0 -#define PHY_ID_VSC8584 0x000707c0 +#define PHY_ID_VSC8575 0x000707d1 +#define PHY_ID_VSC8582 0x000707b1 +#define PHY_ID_VSC8584 0x000707c1 #define PHY_VENDOR_MSCC 0x00070400 #define MSCC_VDDMAC_1500 1500 diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index ef0ef1570d392..9343ed3b000d4 100644 --- a/drivers/net/phy/mscc/mscc_main.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -1724,12 +1724,6 @@ static int vsc8584_config_init(struct phy_device *phydev) * in this pre-init function. */ if (phy_package_init_once(phydev)) { - /* The following switch statement assumes that the lowest - * nibble of the phy_id_mask is always 0. This works because - * the lowest nibble of the PHY_ID's below are also 0. - */ - WARN_ON(phydev->drv->phy_id_mask & 0xf); - switch (phydev->phy_id & phydev->drv->phy_id_mask) { case PHY_ID_VSC8504: case PHY_ID_VSC8552: @@ -2290,11 +2284,6 @@ static int vsc8584_probe(struct phy_device *phydev) VSC8531_DUPLEX_COLLISION}; int ret; - if ((phydev->phy_id & MSCC_DEV_REV_MASK) != VSC8584_REVB) { - dev_err(&phydev->mdio.dev, "Only VSC8584 revB is supported.\n"); - return -ENOTSUPP; - } - vsc8531 = devm_kzalloc(&phydev->mdio.dev, sizeof(*vsc8531), GFP_KERNEL); if (!vsc8531) return -ENOMEM; @@ -2587,9 +2576,8 @@ static struct phy_driver vsc85xx_driver[] = { .config_inband = vsc85xx_config_inband, }, { - .phy_id = PHY_ID_VSC856X, + PHY_ID_MATCH_EXACT(PHY_ID_VSC856X), .name = "Microsemi GE VSC856X SyncE", - .phy_id_mask = 0xfffffff0, /* PHY_GBIT_FEATURES */ .soft_reset = &genphy_soft_reset, .config_init = &vsc8584_config_init, @@ -2667,9 +2655,8 @@ static struct phy_driver vsc85xx_driver[] = { .config_inband = vsc85xx_config_inband, }, { - .phy_id = PHY_ID_VSC8575, + PHY_ID_MATCH_EXACT(PHY_ID_VSC8575), .name = "Microsemi GE VSC8575 SyncE", - .phy_id_mask = 0xfffffff0, /* PHY_GBIT_FEATURES */ .soft_reset = &genphy_soft_reset, .config_init = &vsc8584_config_init, @@ -2693,9 +2680,8 @@ static struct phy_driver vsc85xx_driver[] = { .config_inband = vsc85xx_config_inband, }, { - .phy_id = PHY_ID_VSC8582, + PHY_ID_MATCH_EXACT(PHY_ID_VSC8582), .name = "Microsemi GE VSC8582 SyncE", - .phy_id_mask = 0xfffffff0, /* PHY_GBIT_FEATURES */ .soft_reset = &genphy_soft_reset, .config_init = &vsc8584_config_init, @@ -2719,9 +2705,8 @@ static struct phy_driver vsc85xx_driver[] = { .config_inband = vsc85xx_config_inband, }, { - .phy_id = PHY_ID_VSC8584, + PHY_ID_MATCH_EXACT(PHY_ID_VSC8584), .name = "Microsemi GE VSC8584 SyncE", - .phy_id_mask = 0xfffffff0, /* PHY_GBIT_FEATURES */ .soft_reset = &genphy_soft_reset, .config_init = &vsc8584_config_init, From ea5df88aeca112aac69e6c32e3dd1433a113b0c9 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Thu, 23 Oct 2025 21:13:50 +0200 Subject: [PATCH 272/867] phy: mscc: Fix PTP for VSC8574 and VSC8572 The PTP initialization is two-step. First part are the function vsc8584_ptp_probe_once() and vsc8584_ptp_probe() at probe time which initialize the locks, queues, creates the PTP device. The second part is the function vsc8584_ptp_init() at config_init() time which initialize PTP in the HW. For VSC8574 and VSC8572, the PTP initialization is incomplete. It is missing the first part but it makes the second part. Meaning that the ptp_clock_register() is never called. There is no crash without the first part when enabling PTP but this is unexpected because some PHys have PTP functionality exposed by the driver and some don't even though they share the same PTP clock PTP. Fixes: 774626fa440e ("net: phy: mscc: Add PTP support for 2 more VSC PHYs") Reviewed-by: Maxime Chevallier Signed-off-by: Horatiu Vultur Link: https://patch.msgid.link/20251023191350.190940-3-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mscc/mscc_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index 9343ed3b000d4..8678ebf89cca5 100644 --- a/drivers/net/phy/mscc/mscc_main.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -2613,7 +2613,7 @@ static struct phy_driver vsc85xx_driver[] = { .suspend = &genphy_suspend, .resume = &genphy_resume, .remove = &vsc85xx_remove, - .probe = &vsc8574_probe, + .probe = &vsc8584_probe, .set_wol = &vsc85xx_wol_set, .get_wol = &vsc85xx_wol_get, .get_tunable = &vsc85xx_get_tunable, @@ -2636,12 +2636,12 @@ static struct phy_driver vsc85xx_driver[] = { .config_aneg = &vsc85xx_config_aneg, .aneg_done = &genphy_aneg_done, .read_status = &vsc85xx_read_status, - .handle_interrupt = vsc85xx_handle_interrupt, + .handle_interrupt = vsc8584_handle_interrupt, .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, .remove = &vsc85xx_remove, - .probe = &vsc8574_probe, + .probe = &vsc8584_probe, .set_wol = &vsc85xx_wol_set, .get_wol = &vsc85xx_wol_get, .get_tunable = &vsc85xx_get_tunable, From 05e090620bacf317020f9591cfff8926093380bd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 24 Oct 2025 14:23:35 +0300 Subject: [PATCH 273/867] net: airoha: Fix a copy and paste bug in probe() This code has a copy and paste bug where it accidentally checks "if (err)" instead of checking if "xsi_rsts" is NULL. Also, as a free bonus, I changed the allocation from kzalloc() to kcalloc() which is a kernel hardening measure to protect against integer overflows. Fixes: 5863b4e065e2 ("net: airoha: Add airoha_eth_soc_data struct") Signed-off-by: Dan Carpenter Acked-by: Lorenzo Bianconi Link: https://patch.msgid.link/aPtht6y5DRokn9zv@stanley.mountain Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index e17a285a9e8fa..688faf999e4c0 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2985,11 +2985,11 @@ static int airoha_probe(struct platform_device *pdev) return err; } - xsi_rsts = devm_kzalloc(eth->dev, - eth->soc->num_xsi_rsts * sizeof(*xsi_rsts), + xsi_rsts = devm_kcalloc(eth->dev, + eth->soc->num_xsi_rsts, sizeof(*xsi_rsts), GFP_KERNEL); - if (err) - return err; + if (!xsi_rsts) + return -ENOMEM; eth->xsi_rsts = xsi_rsts; for (i = 0; i < eth->soc->num_xsi_rsts; i++) From a5c12b060efe7a7830effcd233af6b2973176626 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 23 Oct 2025 22:04:52 +0000 Subject: [PATCH 274/867] octeontx2: convert to ndo_hwtstamp API Convert driver to use .ndo_hwtstamp_get()/.ndo_hwtstamp_set() callbacks. otx2_ioctl() becomes empty, remove it. Reviewed-by: Jacob Keller Signed-off-by: Vadim Fedorenko Link: https://patch.msgid.link/20251023220457.3201122-2-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- .../marvell/octeontx2/nic/otx2_common.h | 9 ++- .../ethernet/marvell/octeontx2/nic/otx2_pf.c | 56 +++++++++---------- .../ethernet/marvell/octeontx2/nic/otx2_vf.c | 3 +- 3 files changed, 33 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 1c8a3c078a647..ec26d1b6c789f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -527,7 +527,7 @@ struct otx2_nic { u32 nix_lmt_size; struct otx2_ptp *ptp; - struct hwtstamp_config tstamp; + struct kernel_hwtstamp_config tstamp; unsigned long rq_bmap; @@ -1098,8 +1098,11 @@ int otx2_open(struct net_device *netdev); int otx2_stop(struct net_device *netdev); int otx2_set_real_num_queues(struct net_device *netdev, int tx_queues, int rx_queues); -int otx2_ioctl(struct net_device *netdev, struct ifreq *req, int cmd); -int otx2_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr); +int otx2_config_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config); +int otx2_config_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); /* MCAM filter related APIs */ int otx2_mcam_flow_init(struct otx2_nic *pf); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index e808995703cfd..a7feb4c392b36 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -2445,18 +2445,26 @@ static int otx2_config_hw_tx_tstamp(struct otx2_nic *pfvf, bool enable) return 0; } -int otx2_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr) +int otx2_config_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config) +{ + struct otx2_nic *pfvf = netdev_priv(netdev); + + *config = pfvf->tstamp; + return 0; +} +EXPORT_SYMBOL(otx2_config_hwtstamp_get); + +int otx2_config_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { struct otx2_nic *pfvf = netdev_priv(netdev); - struct hwtstamp_config config; if (!pfvf->ptp) return -ENODEV; - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - switch (config.tx_type) { + switch (config->tx_type) { case HWTSTAMP_TX_OFF: if (pfvf->flags & OTX2_FLAG_PTP_ONESTEP_SYNC) pfvf->flags &= ~OTX2_FLAG_PTP_ONESTEP_SYNC; @@ -2465,8 +2473,11 @@ int otx2_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr) otx2_config_hw_tx_tstamp(pfvf, false); break; case HWTSTAMP_TX_ONESTEP_SYNC: - if (!test_bit(CN10K_PTP_ONESTEP, &pfvf->hw.cap_flag)) + if (!test_bit(CN10K_PTP_ONESTEP, &pfvf->hw.cap_flag)) { + NL_SET_ERR_MSG_MOD(extack, + "One-step time stamping is not supported"); return -ERANGE; + } pfvf->flags |= OTX2_FLAG_PTP_ONESTEP_SYNC; schedule_delayed_work(&pfvf->ptp->synctstamp_work, msecs_to_jiffies(500)); @@ -2478,7 +2489,7 @@ int otx2_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr) return -ERANGE; } - switch (config.rx_filter) { + switch (config->rx_filter) { case HWTSTAMP_FILTER_NONE: otx2_config_hw_rx_tstamp(pfvf, false); break; @@ -2497,35 +2508,17 @@ int otx2_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: otx2_config_hw_rx_tstamp(pfvf, true); - config.rx_filter = HWTSTAMP_FILTER_ALL; + config->rx_filter = HWTSTAMP_FILTER_ALL; break; default: return -ERANGE; } - memcpy(&pfvf->tstamp, &config, sizeof(config)); + pfvf->tstamp = *config; - return copy_to_user(ifr->ifr_data, &config, - sizeof(config)) ? -EFAULT : 0; -} -EXPORT_SYMBOL(otx2_config_hwtstamp); - -int otx2_ioctl(struct net_device *netdev, struct ifreq *req, int cmd) -{ - struct otx2_nic *pfvf = netdev_priv(netdev); - struct hwtstamp_config *cfg = &pfvf->tstamp; - - switch (cmd) { - case SIOCSHWTSTAMP: - return otx2_config_hwtstamp(netdev, req); - case SIOCGHWTSTAMP: - return copy_to_user(req->ifr_data, cfg, - sizeof(*cfg)) ? -EFAULT : 0; - default: - return -EOPNOTSUPP; - } + return 0; } -EXPORT_SYMBOL(otx2_ioctl); +EXPORT_SYMBOL(otx2_config_hwtstamp_set); static int otx2_do_set_vf_mac(struct otx2_nic *pf, int vf, const u8 *mac) { @@ -2942,7 +2935,6 @@ static const struct net_device_ops otx2_netdev_ops = { .ndo_set_features = otx2_set_features, .ndo_tx_timeout = otx2_tx_timeout, .ndo_get_stats64 = otx2_get_stats64, - .ndo_eth_ioctl = otx2_ioctl, .ndo_set_vf_mac = otx2_set_vf_mac, .ndo_set_vf_vlan = otx2_set_vf_vlan, .ndo_get_vf_config = otx2_get_vf_config, @@ -2951,6 +2943,8 @@ static const struct net_device_ops otx2_netdev_ops = { .ndo_xdp_xmit = otx2_xdp_xmit, .ndo_setup_tc = otx2_setup_tc, .ndo_set_vf_trust = otx2_ndo_set_vf_trust, + .ndo_hwtstamp_get = otx2_config_hwtstamp_get, + .ndo_hwtstamp_set = otx2_config_hwtstamp_set, }; int otx2_wq_init(struct otx2_nic *pf) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 25381f079b97d..f4fdbfba86676 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -534,8 +534,9 @@ static const struct net_device_ops otx2vf_netdev_ops = { .ndo_set_features = otx2vf_set_features, .ndo_get_stats64 = otx2_get_stats64, .ndo_tx_timeout = otx2_tx_timeout, - .ndo_eth_ioctl = otx2_ioctl, .ndo_setup_tc = otx2_setup_tc, + .ndo_hwtstamp_get = otx2_config_hwtstamp_get, + .ndo_hwtstamp_set = otx2_config_hwtstamp_set, }; static int otx2_vf_wq_init(struct otx2_nic *vf) From 7a07dc723fadadb1680f6353954a965ae6e75862 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 23 Oct 2025 22:04:53 +0000 Subject: [PATCH 275/867] mlx4: convert to ndo_hwtstamp API Convert driver to use .ndo_hwtstamp_get()/.ndo_hwtstamp_set() callbacks. mlx4_en_ioctl() becomes empty, remove it. Reviewed-by: Jacob Keller Reviewed-by: Tariq Toukan Signed-off-by: Vadim Fedorenko Link: https://patch.msgid.link/20251023220457.3201122-3-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx4/en_netdev.c | 62 ++++++++----------- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 6 +- 2 files changed, 29 insertions(+), 39 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 308b4458e0d44..81bf8908b8974 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2420,21 +2420,22 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) return 0; } -static int mlx4_en_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) +static int mlx4_en_hwtstamp_set(struct net_device *dev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; - struct hwtstamp_config config; - - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; /* device doesn't support time stamping */ - if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS)) + if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS)) { + NL_SET_ERR_MSG_MOD(extack, + "device doesn't support time stamping"); return -EINVAL; + } /* TX HW timestamp */ - switch (config.tx_type) { + switch (config->tx_type) { case HWTSTAMP_TX_OFF: case HWTSTAMP_TX_ON: break; @@ -2443,7 +2444,7 @@ static int mlx4_en_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) } /* RX HW timestamp */ - switch (config.rx_filter) { + switch (config->rx_filter) { case HWTSTAMP_FILTER_NONE: break; case HWTSTAMP_FILTER_ALL: @@ -2461,39 +2462,27 @@ static int mlx4_en_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: - config.rx_filter = HWTSTAMP_FILTER_ALL; + config->rx_filter = HWTSTAMP_FILTER_ALL; break; default: return -ERANGE; } if (mlx4_en_reset_config(dev, config, dev->features)) { - config.tx_type = HWTSTAMP_TX_OFF; - config.rx_filter = HWTSTAMP_FILTER_NONE; + config->tx_type = HWTSTAMP_TX_OFF; + config->rx_filter = HWTSTAMP_FILTER_NONE; } - return copy_to_user(ifr->ifr_data, &config, - sizeof(config)) ? -EFAULT : 0; + return 0; } -static int mlx4_en_hwtstamp_get(struct net_device *dev, struct ifreq *ifr) +static int mlx4_en_hwtstamp_get(struct net_device *dev, + struct kernel_hwtstamp_config *config) { struct mlx4_en_priv *priv = netdev_priv(dev); - return copy_to_user(ifr->ifr_data, &priv->hwtstamp_config, - sizeof(priv->hwtstamp_config)) ? -EFAULT : 0; -} - -static int mlx4_en_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -{ - switch (cmd) { - case SIOCSHWTSTAMP: - return mlx4_en_hwtstamp_set(dev, ifr); - case SIOCGHWTSTAMP: - return mlx4_en_hwtstamp_get(dev, ifr); - default: - return -EOPNOTSUPP; - } + *config = priv->hwtstamp_config; + return 0; } static netdev_features_t mlx4_en_fix_features(struct net_device *netdev, @@ -2560,7 +2549,7 @@ static int mlx4_en_set_features(struct net_device *netdev, } if (reset) { - ret = mlx4_en_reset_config(netdev, priv->hwtstamp_config, + ret = mlx4_en_reset_config(netdev, &priv->hwtstamp_config, features); if (ret) return ret; @@ -2844,7 +2833,6 @@ static const struct net_device_ops mlx4_netdev_ops = { .ndo_set_mac_address = mlx4_en_set_mac, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = mlx4_en_change_mtu, - .ndo_eth_ioctl = mlx4_en_ioctl, .ndo_tx_timeout = mlx4_en_tx_timeout, .ndo_vlan_rx_add_vid = mlx4_en_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = mlx4_en_vlan_rx_kill_vid, @@ -2858,6 +2846,8 @@ static const struct net_device_ops mlx4_netdev_ops = { .ndo_features_check = mlx4_en_features_check, .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate, .ndo_bpf = mlx4_xdp, + .ndo_hwtstamp_get = mlx4_en_hwtstamp_get, + .ndo_hwtstamp_set = mlx4_en_hwtstamp_set, }; static const struct net_device_ops mlx4_netdev_ops_master = { @@ -3512,7 +3502,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, } int mlx4_en_reset_config(struct net_device *dev, - struct hwtstamp_config ts_config, + struct kernel_hwtstamp_config *ts_config, netdev_features_t features) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -3522,8 +3512,8 @@ int mlx4_en_reset_config(struct net_device *dev, int port_up = 0; int err = 0; - if (priv->hwtstamp_config.tx_type == ts_config.tx_type && - priv->hwtstamp_config.rx_filter == ts_config.rx_filter && + if (priv->hwtstamp_config.tx_type == ts_config->tx_type && + priv->hwtstamp_config.rx_filter == ts_config->rx_filter && !DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX) && !DEV_FEATURE_CHANGED(dev, features, NETIF_F_RXFCS)) return 0; /* Nothing to change */ @@ -3542,7 +3532,7 @@ int mlx4_en_reset_config(struct net_device *dev, mutex_lock(&mdev->state_lock); memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); - memcpy(&new_prof.hwtstamp_config, &ts_config, sizeof(ts_config)); + memcpy(&new_prof.hwtstamp_config, ts_config, sizeof(*ts_config)); err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true); if (err) @@ -3560,7 +3550,7 @@ int mlx4_en_reset_config(struct net_device *dev, dev->features |= NETIF_F_HW_VLAN_CTAG_RX; else dev->features &= ~NETIF_F_HW_VLAN_CTAG_RX; - } else if (ts_config.rx_filter == HWTSTAMP_FILTER_NONE) { + } else if (ts_config->rx_filter == HWTSTAMP_FILTER_NONE) { /* RX time-stamping is OFF, update the RX vlan offload * to the latest wanted state */ @@ -3581,7 +3571,7 @@ int mlx4_en_reset_config(struct net_device *dev, * Regardless of the caller's choice, * Turn Off RX vlan offload in case of time-stamping is ON */ - if (ts_config.rx_filter != HWTSTAMP_FILTER_NONE) { + if (ts_config->rx_filter != HWTSTAMP_FILTER_NONE) { if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) en_warn(priv, "Turning off RX vlan offload since RX time-stamping is ON\n"); dev->features &= ~NETIF_F_HW_VLAN_CTAG_RX; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index ad0d91a751848..aab97694f86b3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -388,7 +388,7 @@ struct mlx4_en_port_profile { u8 num_up; int rss_rings; int inline_thold; - struct hwtstamp_config hwtstamp_config; + struct kernel_hwtstamp_config hwtstamp_config; }; struct mlx4_en_profile { @@ -612,7 +612,7 @@ struct mlx4_en_priv { bool wol; struct device *ddev; struct hlist_head mac_hash[MLX4_EN_MAC_HASH_SIZE]; - struct hwtstamp_config hwtstamp_config; + struct kernel_hwtstamp_config hwtstamp_config; u32 counter_index; #ifdef CONFIG_MLX4_EN_DCB @@ -780,7 +780,7 @@ void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev); int mlx4_en_moderation_update(struct mlx4_en_priv *priv); int mlx4_en_reset_config(struct net_device *dev, - struct hwtstamp_config ts_config, + struct kernel_hwtstamp_config *ts_config, netdev_features_t new_features); void mlx4_en_update_pfc_stats_bitmap(struct mlx4_dev *dev, struct mlx4_en_stats_bitmap *stats_bitmap, From 38efb0ba3cd07c3f144ef80895966debeee3f60e Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 23 Oct 2025 22:04:54 +0000 Subject: [PATCH 276/867] ionic: convert to ndo_hwtstamp API Convert driver to use .ndo_hwtstamp_get()/.ndo_hwtstamp_set() callbacks. ionic_eth_ioctl() becomes empty, remove it. Reviewed-by: Jacob Keller Reviewed-by: Brett Creeley Signed-off-by: Vadim Fedorenko Link: https://patch.msgid.link/20251023220457.3201122-4-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- .../net/ethernet/pensando/ionic/ionic_lif.c | 17 +----- .../net/ethernet/pensando/ionic/ionic_lif.h | 18 ++++-- .../net/ethernet/pensando/ionic/ionic_phc.c | 61 ++++++++++++------- 3 files changed, 52 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index b28966ae50c22..058eea86e141c 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -2335,20 +2335,6 @@ static int ionic_stop(struct net_device *netdev) return 0; } -static int ionic_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) -{ - struct ionic_lif *lif = netdev_priv(netdev); - - switch (cmd) { - case SIOCSHWTSTAMP: - return ionic_lif_hwstamp_set(lif, ifr); - case SIOCGHWTSTAMP: - return ionic_lif_hwstamp_get(lif, ifr); - default: - return -EOPNOTSUPP; - } -} - static int ionic_get_vf_config(struct net_device *netdev, int vf, struct ifla_vf_info *ivf) { @@ -2812,7 +2798,6 @@ static int ionic_xdp(struct net_device *netdev, struct netdev_bpf *bpf) static const struct net_device_ops ionic_netdev_ops = { .ndo_open = ionic_open, .ndo_stop = ionic_stop, - .ndo_eth_ioctl = ionic_eth_ioctl, .ndo_start_xmit = ionic_start_xmit, .ndo_bpf = ionic_xdp, .ndo_xdp_xmit = ionic_xdp_xmit, @@ -2833,6 +2818,8 @@ static const struct net_device_ops ionic_netdev_ops = { .ndo_get_vf_config = ionic_get_vf_config, .ndo_set_vf_link_state = ionic_set_vf_link_state, .ndo_get_vf_stats = ionic_get_vf_stats, + .ndo_hwtstamp_get = ionic_hwstamp_get, + .ndo_hwtstamp_set = ionic_hwstamp_set, }; static int ionic_cmb_reconfig(struct ionic_lif *lif, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index 43bdd0fb87336..8e10f66dc50e9 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include "ionic_rx_filter.h" @@ -254,7 +254,7 @@ struct ionic_phc { struct timecounter tc; struct mutex config_lock; /* lock for ts_config */ - struct hwtstamp_config ts_config; + struct kernel_hwtstamp_config ts_config; u64 ts_config_rx_filt; u32 ts_config_tx_mode; @@ -362,8 +362,11 @@ int ionic_lif_size(struct ionic *ionic); #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) void ionic_lif_hwstamp_replay(struct ionic_lif *lif); void ionic_lif_hwstamp_recreate_queues(struct ionic_lif *lif); -int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr); -int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr); +int ionic_hwstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); +int ionic_hwstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config); ktime_t ionic_lif_phc_ktime(struct ionic_lif *lif, u64 counter); void ionic_lif_register_phc(struct ionic_lif *lif); void ionic_lif_unregister_phc(struct ionic_lif *lif); @@ -373,12 +376,15 @@ void ionic_lif_free_phc(struct ionic_lif *lif); static inline void ionic_lif_hwstamp_replay(struct ionic_lif *lif) {} static inline void ionic_lif_hwstamp_recreate_queues(struct ionic_lif *lif) {} -static inline int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr) +static inline int ionic_hwstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } -static inline int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr) +static inline int ionic_hwstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config) { return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_phc.c b/drivers/net/ethernet/pensando/ionic/ionic_phc.c index 9f5c81d44f995..05b44fc482f87 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_phc.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_phc.c @@ -65,11 +65,12 @@ static u64 ionic_hwstamp_rx_filt(int config_rx_filter) } static int ionic_lif_hwstamp_set_ts_config(struct ionic_lif *lif, - struct hwtstamp_config *new_ts) + struct kernel_hwtstamp_config *new_ts, + struct netlink_ext_ack *extack) { + struct kernel_hwtstamp_config *config; + struct kernel_hwtstamp_config ts = {}; struct ionic *ionic = lif->ionic; - struct hwtstamp_config *config; - struct hwtstamp_config ts; int tx_mode = 0; u64 rx_filt = 0; int err, err2; @@ -99,12 +100,16 @@ static int ionic_lif_hwstamp_set_ts_config(struct ionic_lif *lif, tx_mode = ionic_hwstamp_tx_mode(config->tx_type); if (tx_mode < 0) { + NL_SET_ERR_MSG_MOD(extack, + "TX time stamping mode isn't supported"); err = tx_mode; goto err_queues; } mask = cpu_to_le64(BIT_ULL(tx_mode)); if ((ionic->ident.lif.eth.hwstamp_tx_modes & mask) != mask) { + NL_SET_ERR_MSG_MOD(extack, + "TX time stamping mode isn't supported"); err = -ERANGE; goto err_queues; } @@ -124,32 +129,47 @@ static int ionic_lif_hwstamp_set_ts_config(struct ionic_lif *lif, if (tx_mode) { err = ionic_lif_create_hwstamp_txq(lif); - if (err) + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Error creating TX timestamp queue"); goto err_queues; + } } if (rx_filt) { err = ionic_lif_create_hwstamp_rxq(lif); - if (err) + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Error creating RX timestamp queue"); goto err_queues; + } } if (tx_mode != lif->phc->ts_config_tx_mode) { err = ionic_lif_set_hwstamp_txmode(lif, tx_mode); - if (err) + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Error enabling TX timestamp mode"); goto err_txmode; + } } if (rx_filt != lif->phc->ts_config_rx_filt) { err = ionic_lif_set_hwstamp_rxfilt(lif, rx_filt); - if (err) + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Error enabling RX timestamp mode"); goto err_rxfilt; + } } if (rx_all != (lif->phc->ts_config.rx_filter == HWTSTAMP_FILTER_ALL)) { err = ionic_lif_config_hwstamp_rxq_all(lif, rx_all); - if (err) + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Error enabling RX timestamp mode"); goto err_rxall; + } } memcpy(&lif->phc->ts_config, config, sizeof(*config)); @@ -183,28 +203,24 @@ static int ionic_lif_hwstamp_set_ts_config(struct ionic_lif *lif, return err; } -int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr) +int ionic_hwstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { - struct hwtstamp_config config; + struct ionic_lif *lif = netdev_priv(netdev); int err; if (!lif->phc || !lif->phc->ptp) return -EOPNOTSUPP; - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - mutex_lock(&lif->queue_lock); - err = ionic_lif_hwstamp_set_ts_config(lif, &config); + err = ionic_lif_hwstamp_set_ts_config(lif, config, extack); mutex_unlock(&lif->queue_lock); if (err) { netdev_info(lif->netdev, "hwstamp set failed: %d\n", err); return err; } - if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) - return -EFAULT; - return 0; } @@ -216,7 +232,7 @@ void ionic_lif_hwstamp_replay(struct ionic_lif *lif) return; mutex_lock(&lif->queue_lock); - err = ionic_lif_hwstamp_set_ts_config(lif, NULL); + err = ionic_lif_hwstamp_set_ts_config(lif, NULL, NULL); mutex_unlock(&lif->queue_lock); if (err) netdev_info(lif->netdev, "hwstamp replay failed: %d\n", err); @@ -246,19 +262,18 @@ void ionic_lif_hwstamp_recreate_queues(struct ionic_lif *lif) mutex_unlock(&lif->phc->config_lock); } -int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr) +int ionic_hwstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config) { - struct hwtstamp_config config; + struct ionic_lif *lif = netdev_priv(netdev); if (!lif->phc || !lif->phc->ptp) return -EOPNOTSUPP; mutex_lock(&lif->phc->config_lock); - memcpy(&config, &lif->phc->ts_config, sizeof(config)); + memcpy(config, &lif->phc->ts_config, sizeof(*config)); mutex_unlock(&lif->phc->config_lock); - if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) - return -EFAULT; return 0; } From faac57cddfc256f9cba35702b12bcc5cf793db99 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 23 Oct 2025 22:04:55 +0000 Subject: [PATCH 277/867] net: ravb: convert to ndo_hwtstamp API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert driver to use .ndo_hwtstamp_set()/.ndo_hwtstamp_get callbacks. ravb_do_ioctl() becomes pure phy_do_ioctl_running(), remove it and replace in callbacks. Reviewed-by: Niklas Söderlund Reviewed-by: Jacob Keller Signed-off-by: Vadim Fedorenko Link: https://patch.msgid.link/20251023220457.3201122-5-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 61 ++++++++---------------- 1 file changed, 19 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index c3fc15f9ec852..edfaa58505275 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2410,41 +2410,38 @@ static int ravb_close(struct net_device *ndev) return 0; } -static int ravb_hwtstamp_get(struct net_device *ndev, struct ifreq *req) +static int ravb_hwtstamp_get(struct net_device *ndev, + struct kernel_hwtstamp_config *config) { struct ravb_private *priv = netdev_priv(ndev); - struct hwtstamp_config config; - config.flags = 0; - config.tx_type = priv->tstamp_tx_ctrl ? HWTSTAMP_TX_ON : - HWTSTAMP_TX_OFF; + config->flags = 0; + config->tx_type = priv->tstamp_tx_ctrl ? HWTSTAMP_TX_ON : + HWTSTAMP_TX_OFF; switch (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE) { case RAVB_RXTSTAMP_TYPE_V2_L2_EVENT: - config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; + config->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; break; case RAVB_RXTSTAMP_TYPE_ALL: - config.rx_filter = HWTSTAMP_FILTER_ALL; + config->rx_filter = HWTSTAMP_FILTER_ALL; break; default: - config.rx_filter = HWTSTAMP_FILTER_NONE; + config->rx_filter = HWTSTAMP_FILTER_NONE; } - return copy_to_user(req->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; + return 0; } /* Control hardware time stamping */ -static int ravb_hwtstamp_set(struct net_device *ndev, struct ifreq *req) +static int ravb_hwtstamp_set(struct net_device *ndev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { struct ravb_private *priv = netdev_priv(ndev); - struct hwtstamp_config config; u32 tstamp_rx_ctrl = RAVB_RXTSTAMP_ENABLED; u32 tstamp_tx_ctrl; - if (copy_from_user(&config, req->ifr_data, sizeof(config))) - return -EFAULT; - - switch (config.tx_type) { + switch (config->tx_type) { case HWTSTAMP_TX_OFF: tstamp_tx_ctrl = 0; break; @@ -2455,7 +2452,7 @@ static int ravb_hwtstamp_set(struct net_device *ndev, struct ifreq *req) return -ERANGE; } - switch (config.rx_filter) { + switch (config->rx_filter) { case HWTSTAMP_FILTER_NONE: tstamp_rx_ctrl = 0; break; @@ -2463,36 +2460,14 @@ static int ravb_hwtstamp_set(struct net_device *ndev, struct ifreq *req) tstamp_rx_ctrl |= RAVB_RXTSTAMP_TYPE_V2_L2_EVENT; break; default: - config.rx_filter = HWTSTAMP_FILTER_ALL; + config->rx_filter = HWTSTAMP_FILTER_ALL; tstamp_rx_ctrl |= RAVB_RXTSTAMP_TYPE_ALL; } priv->tstamp_tx_ctrl = tstamp_tx_ctrl; priv->tstamp_rx_ctrl = tstamp_rx_ctrl; - return copy_to_user(req->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; -} - -/* ioctl to device function */ -static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd) -{ - struct phy_device *phydev = ndev->phydev; - - if (!netif_running(ndev)) - return -EINVAL; - - if (!phydev) - return -ENODEV; - - switch (cmd) { - case SIOCGHWTSTAMP: - return ravb_hwtstamp_get(ndev, req); - case SIOCSHWTSTAMP: - return ravb_hwtstamp_set(ndev, req); - } - - return phy_mii_ioctl(phydev, req, cmd); + return 0; } static int ravb_change_mtu(struct net_device *ndev, int new_mtu) @@ -2628,11 +2603,13 @@ static const struct net_device_ops ravb_netdev_ops = { .ndo_get_stats = ravb_get_stats, .ndo_set_rx_mode = ravb_set_rx_mode, .ndo_tx_timeout = ravb_tx_timeout, - .ndo_eth_ioctl = ravb_do_ioctl, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_change_mtu = ravb_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_set_features = ravb_set_features, + .ndo_hwtstamp_get = ravb_hwtstamp_get, + .ndo_hwtstamp_set = ravb_hwtstamp_set, }; /* MDIO bus init function */ From 87e1b590f776ec9c9f66362a6f818088560c645f Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 23 Oct 2025 22:04:56 +0000 Subject: [PATCH 278/867] net: renesas: rswitch: convert to ndo_hwtstamp API Convert driver to use .ndo_hwtstamp_set()/.ndo_hwtstamp_get() callbacks. rswitch_eth_ioctl() becomes phy_do_ioctl_running(), remove it and replace .ndo_eth_ioctl callback with phy_do_ioctl_running(). Reviewed-by: Jacob Keller Signed-off-by: Vadim Fedorenko Link: https://patch.msgid.link/20251023220457.3201122-6-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/rswitch_main.c | 53 ++++++++------------- 1 file changed, 19 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/renesas/rswitch_main.c b/drivers/net/ethernet/renesas/rswitch_main.c index 8d8acc2124b8f..f21a814aa9d11 100644 --- a/drivers/net/ethernet/renesas/rswitch_main.c +++ b/drivers/net/ethernet/renesas/rswitch_main.c @@ -1793,46 +1793,44 @@ static struct net_device_stats *rswitch_get_stats(struct net_device *ndev) return &ndev->stats; } -static int rswitch_hwstamp_get(struct net_device *ndev, struct ifreq *req) +static int rswitch_hwstamp_get(struct net_device *ndev, + struct kernel_hwtstamp_config *config) { struct rswitch_device *rdev = netdev_priv(ndev); struct rcar_gen4_ptp_private *ptp_priv; - struct hwtstamp_config config; ptp_priv = rdev->priv->ptp_priv; - config.flags = 0; - config.tx_type = ptp_priv->tstamp_tx_ctrl ? HWTSTAMP_TX_ON : + config->flags = 0; + config->tx_type = ptp_priv->tstamp_tx_ctrl ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; switch (ptp_priv->tstamp_rx_ctrl & RCAR_GEN4_RXTSTAMP_TYPE) { case RCAR_GEN4_RXTSTAMP_TYPE_V2_L2_EVENT: - config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; + config->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; break; case RCAR_GEN4_RXTSTAMP_TYPE_ALL: - config.rx_filter = HWTSTAMP_FILTER_ALL; + config->rx_filter = HWTSTAMP_FILTER_ALL; break; default: - config.rx_filter = HWTSTAMP_FILTER_NONE; + config->rx_filter = HWTSTAMP_FILTER_NONE; break; } - return copy_to_user(req->ifr_data, &config, sizeof(config)) ? -EFAULT : 0; + return 0; } -static int rswitch_hwstamp_set(struct net_device *ndev, struct ifreq *req) +static int rswitch_hwstamp_set(struct net_device *ndev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { struct rswitch_device *rdev = netdev_priv(ndev); u32 tstamp_rx_ctrl = RCAR_GEN4_RXTSTAMP_ENABLED; - struct hwtstamp_config config; u32 tstamp_tx_ctrl; - if (copy_from_user(&config, req->ifr_data, sizeof(config))) - return -EFAULT; - - if (config.flags) + if (config->flags) return -EINVAL; - switch (config.tx_type) { + switch (config->tx_type) { case HWTSTAMP_TX_OFF: tstamp_tx_ctrl = 0; break; @@ -1843,7 +1841,7 @@ static int rswitch_hwstamp_set(struct net_device *ndev, struct ifreq *req) return -ERANGE; } - switch (config.rx_filter) { + switch (config->rx_filter) { case HWTSTAMP_FILTER_NONE: tstamp_rx_ctrl = 0; break; @@ -1851,7 +1849,7 @@ static int rswitch_hwstamp_set(struct net_device *ndev, struct ifreq *req) tstamp_rx_ctrl |= RCAR_GEN4_RXTSTAMP_TYPE_V2_L2_EVENT; break; default: - config.rx_filter = HWTSTAMP_FILTER_ALL; + config->rx_filter = HWTSTAMP_FILTER_ALL; tstamp_rx_ctrl |= RCAR_GEN4_RXTSTAMP_TYPE_ALL; break; } @@ -1859,22 +1857,7 @@ static int rswitch_hwstamp_set(struct net_device *ndev, struct ifreq *req) rdev->priv->ptp_priv->tstamp_tx_ctrl = tstamp_tx_ctrl; rdev->priv->ptp_priv->tstamp_rx_ctrl = tstamp_rx_ctrl; - return copy_to_user(req->ifr_data, &config, sizeof(config)) ? -EFAULT : 0; -} - -static int rswitch_eth_ioctl(struct net_device *ndev, struct ifreq *req, int cmd) -{ - if (!netif_running(ndev)) - return -EINVAL; - - switch (cmd) { - case SIOCGHWTSTAMP: - return rswitch_hwstamp_get(ndev, req); - case SIOCSHWTSTAMP: - return rswitch_hwstamp_set(ndev, req); - default: - return phy_mii_ioctl(ndev->phydev, req, cmd); - } + return 0; } static int rswitch_get_port_parent_id(struct net_device *ndev, @@ -1905,11 +1888,13 @@ static const struct net_device_ops rswitch_netdev_ops = { .ndo_stop = rswitch_stop, .ndo_start_xmit = rswitch_start_xmit, .ndo_get_stats = rswitch_get_stats, - .ndo_eth_ioctl = rswitch_eth_ioctl, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_get_port_parent_id = rswitch_get_port_parent_id, .ndo_get_phys_port_name = rswitch_get_phys_port_name, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, + .ndo_hwtstamp_get = rswitch_hwstamp_get, + .ndo_hwtstamp_set = rswitch_hwstamp_set, }; bool is_rdev(const struct net_device *ndev) From 329021eeae035f169822c7575da49561b0bd7138 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 23 Oct 2025 22:04:57 +0000 Subject: [PATCH 279/867] net: hns3: add hwtstamp_get/hwtstamp_set ops And .ndo_hwtstamp_get()/.ndo_hwtstamp_set() callbacks to HNS3 framework to support HW timestamp configuration via netlink and adopt hns3pf to use .ndo_hwtstamp_get()/.ndo_hwtstamp_set() callbacks. Reviewed-by: Jacob Keller Reviewed-by: Jijie Shao Signed-off-by: Vadim Fedorenko Link: https://patch.msgid.link/20251023220457.3201122-7-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 5 +++ .../net/ethernet/hisilicon/hns3/hns3_enet.c | 31 ++++++++++++++++++ .../hisilicon/hns3/hns3pf/hclge_main.c | 13 +++----- .../hisilicon/hns3/hns3pf/hclge_ptp.c | 32 +++++++++++-------- .../hisilicon/hns3/hns3pf/hclge_ptp.h | 9 ++++-- 5 files changed, 64 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 3b548f71fa8a7..d7c3df1958f39 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -804,6 +804,11 @@ struct hnae3_ae_ops { int (*dbg_get_read_func)(struct hnae3_handle *handle, enum hnae3_dbg_cmd cmd, read_func *func); + int (*hwtstamp_get)(struct hnae3_handle *handle, + struct kernel_hwtstamp_config *config); + int (*hwtstamp_set)(struct hnae3_handle *handle, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); }; struct hnae3_dcb_ops { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index bfa5568baa926..7a0654e2d3dd9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -2419,6 +2419,35 @@ static int hns3_nic_do_ioctl(struct net_device *netdev, return h->ae_algo->ops->do_ioctl(h, ifr, cmd); } +static int hns3_nic_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config) +{ + struct hnae3_handle *h = hns3_get_handle(netdev); + + if (!netif_running(netdev)) + return -EINVAL; + + if (!h->ae_algo->ops->hwtstamp_get) + return -EOPNOTSUPP; + + return h->ae_algo->ops->hwtstamp_get(h, config); +} + +static int hns3_nic_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) +{ + struct hnae3_handle *h = hns3_get_handle(netdev); + + if (!netif_running(netdev)) + return -EINVAL; + + if (!h->ae_algo->ops->hwtstamp_set) + return -EOPNOTSUPP; + + return h->ae_algo->ops->hwtstamp_set(h, config, extack); +} + static int hns3_nic_set_features(struct net_device *netdev, netdev_features_t features) { @@ -3048,6 +3077,8 @@ static const struct net_device_ops hns3_nic_netdev_ops = { .ndo_set_vf_rate = hns3_nic_set_vf_rate, .ndo_set_vf_mac = hns3_nic_set_vf_mac, .ndo_select_queue = hns3_nic_select_queue, + .ndo_hwtstamp_get = hns3_nic_hwtstamp_get, + .ndo_hwtstamp_set = hns3_nic_hwtstamp_set, }; bool hns3_is_phys_func(struct pci_dev *pdev) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 9d34d28ff168a..81d3bdc098e63 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -9445,15 +9445,8 @@ static int hclge_do_ioctl(struct hnae3_handle *handle, struct ifreq *ifr, struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - switch (cmd) { - case SIOCGHWTSTAMP: - return hclge_ptp_get_cfg(hdev, ifr); - case SIOCSHWTSTAMP: - return hclge_ptp_set_cfg(hdev, ifr); - default: - if (!hdev->hw.mac.phydev) - return hclge_mii_ioctl(hdev, ifr, cmd); - } + if (!hdev->hw.mac.phydev) + return hclge_mii_ioctl(hdev, ifr, cmd); return phy_mii_ioctl(hdev->hw.mac.phydev, ifr, cmd); } @@ -12901,6 +12894,8 @@ static const struct hnae3_ae_ops hclge_ops = { .get_dscp_prio = hclge_get_dscp_prio, .get_wol = hclge_get_wol, .set_wol = hclge_set_wol, + .hwtstamp_get = hclge_ptp_get_cfg, + .hwtstamp_set = hclge_ptp_set_cfg, }; static struct hnae3_ae_algo ae_algo = { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c index 4bd52eab39145..0081c52814550 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c @@ -204,13 +204,17 @@ static int hclge_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) return 0; } -int hclge_ptp_get_cfg(struct hclge_dev *hdev, struct ifreq *ifr) +int hclge_ptp_get_cfg(struct hnae3_handle *handle, + struct kernel_hwtstamp_config *config) { + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + if (!test_bit(HCLGE_STATE_PTP_EN, &hdev->state)) return -EOPNOTSUPP; - return copy_to_user(ifr->ifr_data, &hdev->ptp->ts_cfg, - sizeof(struct hwtstamp_config)) ? -EFAULT : 0; + *config = hdev->ptp->ts_cfg; + return 0; } static int hclge_ptp_int_en(struct hclge_dev *hdev, bool en) @@ -269,7 +273,7 @@ static int hclge_ptp_cfg(struct hclge_dev *hdev, u32 cfg) return ret; } -static int hclge_ptp_set_tx_mode(struct hwtstamp_config *cfg, +static int hclge_ptp_set_tx_mode(struct kernel_hwtstamp_config *cfg, unsigned long *flags, u32 *ptp_cfg) { switch (cfg->tx_type) { @@ -287,7 +291,7 @@ static int hclge_ptp_set_tx_mode(struct hwtstamp_config *cfg, return 0; } -static int hclge_ptp_set_rx_mode(struct hwtstamp_config *cfg, +static int hclge_ptp_set_rx_mode(struct kernel_hwtstamp_config *cfg, unsigned long *flags, u32 *ptp_cfg) { int rx_filter = cfg->rx_filter; @@ -332,7 +336,7 @@ static int hclge_ptp_set_rx_mode(struct hwtstamp_config *cfg, } static int hclge_ptp_set_ts_mode(struct hclge_dev *hdev, - struct hwtstamp_config *cfg) + struct kernel_hwtstamp_config *cfg) { unsigned long flags = hdev->ptp->flags; u32 ptp_cfg = 0; @@ -359,9 +363,12 @@ static int hclge_ptp_set_ts_mode(struct hclge_dev *hdev, return 0; } -int hclge_ptp_set_cfg(struct hclge_dev *hdev, struct ifreq *ifr) +int hclge_ptp_set_cfg(struct hnae3_handle *handle, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { - struct hwtstamp_config cfg; + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; int ret; if (!test_bit(HCLGE_STATE_PTP_EN, &hdev->state)) { @@ -369,16 +376,13 @@ int hclge_ptp_set_cfg(struct hclge_dev *hdev, struct ifreq *ifr) return -EOPNOTSUPP; } - if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) - return -EFAULT; - - ret = hclge_ptp_set_ts_mode(hdev, &cfg); + ret = hclge_ptp_set_ts_mode(hdev, config); if (ret) return ret; - hdev->ptp->ts_cfg = cfg; + hdev->ptp->ts_cfg = *config; - return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; + return 0; } int hclge_ptp_get_ts_info(struct hnae3_handle *handle, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h index 61faddcc3dd09..0162fa5ac1462 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h @@ -62,7 +62,7 @@ struct hclge_ptp { unsigned long flags; void __iomem *io_base; struct ptp_clock_info info; - struct hwtstamp_config ts_cfg; + struct kernel_hwtstamp_config ts_cfg; spinlock_t lock; /* protects ptp registers */ u32 ptp_cfg; u32 last_tx_seqid; @@ -133,8 +133,11 @@ bool hclge_ptp_set_tx_info(struct hnae3_handle *handle, struct sk_buff *skb); void hclge_ptp_clean_tx_hwts(struct hclge_dev *hdev); void hclge_ptp_get_rx_hwts(struct hnae3_handle *handle, struct sk_buff *skb, u32 nsec, u32 sec); -int hclge_ptp_get_cfg(struct hclge_dev *hdev, struct ifreq *ifr); -int hclge_ptp_set_cfg(struct hclge_dev *hdev, struct ifreq *ifr); +int hclge_ptp_get_cfg(struct hnae3_handle *handle, + struct kernel_hwtstamp_config *config); +int hclge_ptp_set_cfg(struct hnae3_handle *handle, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); int hclge_ptp_init(struct hclge_dev *hdev); void hclge_ptp_uninit(struct hclge_dev *hdev); int hclge_ptp_get_ts_info(struct hnae3_handle *handle, From 622e8838a29845316668ec2e7648428878df7f9a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 23 Oct 2025 23:16:50 +0000 Subject: [PATCH 280/867] sctp: Defer SCTP_DBG_OBJCNT_DEC() to sctp_destroy_sock(). SCTP_DBG_OBJCNT_INC() is called only when sctp_init_sock() returns 0 after successfully allocating sctp_sk(sk)->ep. OTOH, SCTP_DBG_OBJCNT_DEC() is called in sctp_close(). The code seems to expect that the socket is always exposed to userspace once SCTP_DBG_OBJCNT_INC() is incremented, but there is a path where the assumption is not true. In sctp_accept(), sctp_sock_migrate() could fail after sctp_init_sock(). Then, sk_common_release() does not call inet_release() nor sctp_close(). Instead, it calls sk->sk_prot->destroy(). Let's move SCTP_DBG_OBJCNT_DEC() from sctp_close() to sctp_destroy_sock(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Acked-by: Xin Long Link: https://patch.msgid.link/20251023231751.4168390-2-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/sctp/socket.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ed8293a342402..d190e75e46454 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1553,8 +1553,6 @@ static void sctp_close(struct sock *sk, long timeout) spin_unlock_bh(&net->sctp.addr_wq_lock); sock_put(sk); - - SCTP_DBG_OBJCNT_DEC(sock); } /* Handle EPIPE error. */ @@ -5109,9 +5107,12 @@ static void sctp_destroy_sock(struct sock *sk) sp->do_auto_asconf = 0; list_del(&sp->auto_asconf_list); } + sctp_endpoint_free(sp->ep); + sk_sockets_allocated_dec(sk); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + SCTP_DBG_OBJCNT_DEC(sock); } static void sctp_destruct_sock(struct sock *sk) From 2d4df59aae91340e777660cfe9862b7d8e15b077 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 23 Oct 2025 23:16:51 +0000 Subject: [PATCH 281/867] sctp: Don't copy sk_sndbuf and sk_rcvbuf in sctp_sock_migrate(). sctp_sock_migrate() is called from 2 places. 1) sctp_accept() calls sp->pf->create_accept_sk() before sctp_sock_migrate(), and sp->pf->create_accept_sk() calls sctp_copy_sock(). 2) sctp_do_peeloff() also calls sctp_copy_sock() before sctp_sock_migrate(). sctp_copy_sock() copies sk_sndbuf and sk_rcvbuf from the parent socket. Let's not copy the two fields in sctp_sock_migrate(). Signed-off-by: Kuniyuki Iwashima Acked-by: Xin Long Link: https://patch.msgid.link/20251023231751.4168390-3-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/sctp/socket.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d190e75e46454..735b1222af955 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -9523,12 +9523,9 @@ static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, struct sctp_bind_hashbucket *head; int err; - /* Migrate socket buffer sizes and all the socket level options to the - * new socket. + /* Migrate all the socket level options to the new socket. + * Brute force copy old sctp opt. */ - newsk->sk_sndbuf = oldsk->sk_sndbuf; - newsk->sk_rcvbuf = oldsk->sk_rcvbuf; - /* Brute force copy old sctp opt. */ sctp_copy_descendant(newsk, oldsk); /* Restore the ep value that was overwritten with the above structure From b7185792f80a0069aa4eab87cb4263a1fb611a4e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 23 Oct 2025 23:16:52 +0000 Subject: [PATCH 282/867] sctp: Don't call sk->sk_prot->init() in sctp_v[46]_create_accept_sk(). sctp_accept() calls sctp_v[46]_create_accept_sk() to allocate a new socket and calls sctp_sock_migrate() to copy fields from the parent socket to the new socket. sctp_v[46]_create_accept_sk() calls sctp_init_sock() to initialise sctp_sock, but most fields are overwritten by sctp_copy_descendant() called from sctp_sock_migrate(). Things done in sctp_init_sock() but not in sctp_sock_migrate() are the following: 1. Copy sk->sk_gso 2. Copy sk->sk_destruct (sctp_v6_init_sock()) 3. Allocate sctp_sock.ep 4. Initialise sctp_sock.pd_lobby 5. Count sk_sockets_allocated_inc(), sock_prot_inuse_add(), and SCTP_DBG_OBJCNT_INC() Let's do these in sctp_copy_sock() and sctp_sock_migrate() and avoid calling sk->sk_prot->init() in sctp_v[46]_create_accept_sk(). Note that sk->sk_destruct is already copied in sctp_copy_sock(). Signed-off-by: Kuniyuki Iwashima Acked-by: Xin Long Link: https://patch.msgid.link/20251023231751.4168390-4-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/sctp/ipv6.c | 8 +------- net/sctp/protocol.c | 8 +------- net/sctp/socket.c | 27 ++++++++++++++++++++++----- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index d725b21587588..c0762424a8544 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -789,7 +789,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, kern); if (!newsk) - goto out; + return NULL; sock_init_data(NULL, newsk); @@ -818,12 +818,6 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, newsk->sk_v6_rcv_saddr = sk->sk_v6_rcv_saddr; - if (newsk->sk_prot->init(newsk)) { - sk_common_release(newsk); - newsk = NULL; - } - -out: return newsk; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 9dbc24af749b5..ad2722d1ec150 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -590,7 +590,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, struct inet_sock *newinet; if (!newsk) - goto out; + return NULL; sock_init_data(NULL, newsk); @@ -603,12 +603,6 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, newinet->inet_daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr; - if (newsk->sk_prot->init(newsk)) { - sk_common_release(newsk); - newsk = NULL; - } - -out: return newsk; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 735b1222af955..70c75ac8da55d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4851,7 +4851,7 @@ static int sctp_disconnect(struct sock *sk, int flags) */ static struct sock *sctp_accept(struct sock *sk, struct proto_accept_arg *arg) { - struct sctp_sock *sp; + struct sctp_sock *sp, *newsp; struct sctp_endpoint *ep; struct sock *newsk = NULL; struct sctp_association *asoc; @@ -4891,19 +4891,35 @@ static struct sock *sctp_accept(struct sock *sk, struct proto_accept_arg *arg) goto out; } + newsp = sctp_sk(newsk); + newsp->ep = sctp_endpoint_new(newsk, GFP_KERNEL); + if (!newsp->ep) { + error = -ENOMEM; + goto out_release; + } + + skb_queue_head_init(&newsp->pd_lobby); + + sk_sockets_allocated_inc(newsk); + sock_prot_inuse_add(sock_net(sk), newsk->sk_prot, 1); + SCTP_DBG_OBJCNT_INC(sock); + /* Populate the fields of the newsk from the oldsk and migrate the * asoc to the newsk. */ error = sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP); - if (error) { - sk_common_release(newsk); - newsk = NULL; - } + if (error) + goto out_release; out: release_sock(sk); arg->err = error; return newsk; + +out_release: + sk_common_release(newsk); + newsk = NULL; + goto out; } /* The SCTP ioctl handler. */ @@ -9469,6 +9485,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newsk->sk_rcvtimeo = READ_ONCE(sk->sk_rcvtimeo); newsk->sk_sndtimeo = READ_ONCE(sk->sk_sndtimeo); newsk->sk_rxhash = sk->sk_rxhash; + newsk->sk_gso_type = sk->sk_gso_type; newinet = inet_sk(newsk); From 151b98d10ef7c3174465e409b99d8762e7e8de60 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 23 Oct 2025 23:16:53 +0000 Subject: [PATCH 283/867] net: Add sk_clone(). sctp_accept() will use sk_clone_lock(), but it will be called with the parent socket locked, and sctp_migrate() acquires the child lock later. Let's add no lock version of sk_clone_lock(). Note that lockdep complains if we simply use bh_lock_sock_nested(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Xin Long Link: https://patch.msgid.link/20251023231751.4168390-5-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 7 ++++++- net/core/sock.c | 24 ++++++++++++++++-------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 01ce231603db0..c7e58b8e8a907 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1822,7 +1822,12 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, void sk_free(struct sock *sk); void sk_net_refcnt_upgrade(struct sock *sk); void sk_destruct(struct sock *sk); -struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); +struct sock *sk_clone(const struct sock *sk, const gfp_t priority, bool lock); + +static inline struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) +{ + return sk_clone(sk, priority, true); +} struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, gfp_t priority); diff --git a/net/core/sock.c b/net/core/sock.c index a99132cc09656..7a9bbc2afcf08 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2462,13 +2462,16 @@ static void sk_init_common(struct sock *sk) } /** - * sk_clone_lock - clone a socket, and lock its clone - * @sk: the socket to clone - * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) + * sk_clone - clone a socket + * @sk: the socket to clone + * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) + * @lock: if true, lock the cloned sk * - * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) + * If @lock is true, the clone is locked by bh_lock_sock(), and + * caller must unlock socket even in error path by bh_unlock_sock(). */ -struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) +struct sock *sk_clone(const struct sock *sk, const gfp_t priority, + bool lock) { struct proto *prot = READ_ONCE(sk->sk_prot); struct sk_filter *filter; @@ -2497,9 +2500,13 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) __netns_tracker_alloc(sock_net(newsk), &newsk->ns_tracker, false, priority); } + sk_node_init(&newsk->sk_node); sock_lock_init(newsk); - bh_lock_sock(newsk); + + if (lock) + bh_lock_sock(newsk); + newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; newsk->sk_backlog.len = 0; @@ -2590,12 +2597,13 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) * destructor and make plain sk_free() */ newsk->sk_destruct = NULL; - bh_unlock_sock(newsk); + if (lock) + bh_unlock_sock(newsk); sk_free(newsk); newsk = NULL; goto out; } -EXPORT_SYMBOL_GPL(sk_clone_lock); +EXPORT_SYMBOL_GPL(sk_clone); static u32 sk_dst_gso_max_size(struct sock *sk, const struct net_device *dev) { From 16942cf4d3e31b6246b7d000dd823f7b0b38bf8c Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 23 Oct 2025 23:16:54 +0000 Subject: [PATCH 284/867] sctp: Use sk_clone() in sctp_accept(). sctp_accept() calls sctp_v[46]_create_accept_sk() to allocate a new socket and calls sctp_sock_migrate() to copy fields from the parent socket to the new socket. sctp_v4_create_accept_sk() allocates sk by sk_alloc(), initialises it by sock_init_data(), and copy a bunch of fields from the parent socekt by sctp_copy_sock(). sctp_sock_migrate() calls sctp_copy_descendant() to copy most fields in sctp_sock from the parent socket by memcpy(). These can be simply replaced by sk_clone(). Let's consolidate sctp_v[46]_create_accept_sk() to sctp_clone_sock() with sk_clone(). We will reuse sctp_clone_sock() for sctp_do_peeloff() and then remove sctp_copy_descendant(). Note that sock_reset_flag(newsk, SOCK_ZAPPED) is not copied to sctp_clone_sock() as sctp does not use SOCK_ZAPPED at all. Signed-off-by: Kuniyuki Iwashima Acked-by: Xin Long Link: https://patch.msgid.link/20251023231751.4168390-6-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/af_inet.c | 4 +- net/sctp/socket.c | 113 ++++++++++++++++++++++++++++++--------------- 2 files changed, 77 insertions(+), 40 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e8771faa5bbfd..77f6ae0fc231d 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -755,9 +755,7 @@ EXPORT_SYMBOL(inet_stream_connect); void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *newsk) { - /* TODO: use sk_clone_lock() in SCTP and remove protocol checks */ - if (mem_cgroup_sockets_enabled && - (!IS_ENABLED(CONFIG_IP_SCTP) || sk_is_tcp(newsk))) { + if (mem_cgroup_sockets_enabled) { gfp_t gfp = GFP_KERNEL | __GFP_NOFAIL; mem_cgroup_sk_alloc(newsk); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 70c75ac8da55d..826f17747f176 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4842,6 +4842,74 @@ static int sctp_disconnect(struct sock *sk, int flags) return 0; } +static struct sock *sctp_clone_sock(struct sock *sk, + struct sctp_association *asoc, + enum sctp_socket_type type) +{ + struct sock *newsk = sk_clone(sk, GFP_KERNEL, false); + struct inet_sock *newinet; + struct sctp_sock *newsp; + int err = -ENOMEM; + + if (!newsk) + return ERR_PTR(err); + + /* sk_clone() sets refcnt to 2 */ + sock_put(newsk); + + newinet = inet_sk(newsk); + newsp = sctp_sk(newsk); + + newsp->pf->to_sk_daddr(&asoc->peer.primary_addr, newsk); + newinet->inet_dport = htons(asoc->peer.port); + + newsp->pf->copy_ip_options(sk, newsk); + atomic_set(&newinet->inet_id, get_random_u16()); + + inet_set_bit(MC_LOOP, newsk); + newinet->mc_ttl = 1; + newinet->mc_index = 0; + newinet->mc_list = NULL; + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) { + struct ipv6_pinfo *newnp = inet6_sk(newsk); + + newinet->pinet6 = &((struct sctp6_sock *)newsk)->inet6; + newinet->ipv6_fl_list = NULL; + + memcpy(newnp, inet6_sk(sk), sizeof(struct ipv6_pinfo)); + newnp->ipv6_mc_list = NULL; + newnp->ipv6_ac_list = NULL; + } +#endif + + skb_queue_head_init(&newsp->pd_lobby); + + newsp->ep = sctp_endpoint_new(newsk, GFP_KERNEL); + if (!newsp->ep) + goto out_release; + + SCTP_DBG_OBJCNT_INC(sock); + sk_sockets_allocated_inc(newsk); + sock_prot_inuse_add(sock_net(sk), newsk->sk_prot, 1); + + err = sctp_sock_migrate(sk, newsk, asoc, type); + if (err) + goto out_release; + + /* Set newsk security attributes from original sk and connection + * security attribute from asoc. + */ + security_sctp_sk_clone(asoc, sk, newsk); + + return newsk; + +out_release: + sk_common_release(newsk); + return ERR_PTR(err); +} + /* 4.1.4 accept() - TCP Style Syntax * * Applications use accept() call to remove an established SCTP @@ -4851,18 +4919,13 @@ static int sctp_disconnect(struct sock *sk, int flags) */ static struct sock *sctp_accept(struct sock *sk, struct proto_accept_arg *arg) { - struct sctp_sock *sp, *newsp; - struct sctp_endpoint *ep; - struct sock *newsk = NULL; struct sctp_association *asoc; - long timeo; + struct sock *newsk = NULL; int error = 0; + long timeo; lock_sock(sk); - sp = sctp_sk(sk); - ep = sp->ep; - if (!sctp_style(sk, TCP)) { error = -EOPNOTSUPP; goto out; @@ -4883,43 +4946,19 @@ static struct sock *sctp_accept(struct sock *sk, struct proto_accept_arg *arg) /* We treat the list of associations on the endpoint as the accept * queue and pick the first association on the list. */ - asoc = list_entry(ep->asocs.next, struct sctp_association, asocs); - - newsk = sp->pf->create_accept_sk(sk, asoc, arg->kern); - if (!newsk) { - error = -ENOMEM; - goto out; - } + asoc = list_entry(sctp_sk(sk)->ep->asocs.next, + struct sctp_association, asocs); - newsp = sctp_sk(newsk); - newsp->ep = sctp_endpoint_new(newsk, GFP_KERNEL); - if (!newsp->ep) { - error = -ENOMEM; - goto out_release; + newsk = sctp_clone_sock(sk, asoc, SCTP_SOCKET_TCP); + if (IS_ERR(newsk)) { + error = PTR_ERR(newsk); + newsk = NULL; } - skb_queue_head_init(&newsp->pd_lobby); - - sk_sockets_allocated_inc(newsk); - sock_prot_inuse_add(sock_net(sk), newsk->sk_prot, 1); - SCTP_DBG_OBJCNT_INC(sock); - - /* Populate the fields of the newsk from the oldsk and migrate the - * asoc to the newsk. - */ - error = sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP); - if (error) - goto out_release; - out: release_sock(sk); arg->err = error; return newsk; - -out_release: - sk_common_release(newsk); - newsk = NULL; - goto out; } /* The SCTP ioctl handler. */ From c49ed521f1772ca9203d22a1e5950f337fd5f930 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 23 Oct 2025 23:16:55 +0000 Subject: [PATCH 285/867] sctp: Remove sctp_pf.create_accept_sk(). sctp_v[46]_create_accept_sk() are no longer used. Let's remove sctp_pf.create_accept_sk(). Signed-off-by: Kuniyuki Iwashima Acked-by: Xin Long Link: https://patch.msgid.link/20251023231751.4168390-7-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/sctp/structs.h | 3 --- net/sctp/ipv6.c | 45 -------------------------------------- net/sctp/protocol.c | 27 ----------------------- 3 files changed, 75 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 2ae390219efdd..3dd304e411d02 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -497,9 +497,6 @@ struct sctp_pf { int (*bind_verify) (struct sctp_sock *, union sctp_addr *); int (*send_verify) (struct sctp_sock *, union sctp_addr *); int (*supported_addrs)(const struct sctp_sock *, __be16 *); - struct sock *(*create_accept_sk) (struct sock *sk, - struct sctp_association *asoc, - bool kern); int (*addr_to_user)(struct sctp_sock *sk, union sctp_addr *addr); void (*to_sk_saddr)(union sctp_addr *, struct sock *sk); void (*to_sk_daddr)(union sctp_addr *, struct sock *sk); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index c0762424a8544..069b7e45d8bda 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -777,50 +777,6 @@ static enum sctp_scope sctp_v6_scope(union sctp_addr *addr) return retval; } -/* Create and initialize a new sk for the socket to be returned by accept(). */ -static struct sock *sctp_v6_create_accept_sk(struct sock *sk, - struct sctp_association *asoc, - bool kern) -{ - struct ipv6_pinfo *newnp, *np = inet6_sk(sk); - struct sctp6_sock *newsctp6sk; - struct inet_sock *newinet; - struct sock *newsk; - - newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, kern); - if (!newsk) - return NULL; - - sock_init_data(NULL, newsk); - - sctp_copy_sock(newsk, sk, asoc); - sock_reset_flag(sk, SOCK_ZAPPED); - - newsctp6sk = (struct sctp6_sock *)newsk; - newinet = inet_sk(newsk); - newinet->pinet6 = &newsctp6sk->inet6; - newinet->ipv6_fl_list = NULL; - - sctp_sk(newsk)->v4mapped = sctp_sk(sk)->v4mapped; - - newnp = inet6_sk(newsk); - - memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - newnp->ipv6_mc_list = NULL; - newnp->ipv6_ac_list = NULL; - - sctp_v6_copy_ip_options(sk, newsk); - - /* Initialize sk's sport, dport, rcv_saddr and daddr for getsockname() - * and getpeername(). - */ - sctp_v6_to_sk_daddr(&asoc->peer.primary_addr, newsk); - - newsk->sk_v6_rcv_saddr = sk->sk_v6_rcv_saddr; - - return newsk; -} - /* Format a sockaddr for return to user space. This makes sure the return is * AF_INET or AF_INET6 depending on the SCTP_I_WANT_MAPPED_V4_ADDR option. */ @@ -1167,7 +1123,6 @@ static struct sctp_pf sctp_pf_inet6 = { .bind_verify = sctp_inet6_bind_verify, .send_verify = sctp_inet6_send_verify, .supported_addrs = sctp_inet6_supported_addrs, - .create_accept_sk = sctp_v6_create_accept_sk, .addr_to_user = sctp_v6_addr_to_user, .to_sk_saddr = sctp_v6_to_sk_saddr, .to_sk_daddr = sctp_v6_to_sk_daddr, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index ad2722d1ec150..2c3398f75d766 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -580,32 +580,6 @@ static int sctp_v4_is_ce(const struct sk_buff *skb) return INET_ECN_is_ce(ip_hdr(skb)->tos); } -/* Create and initialize a new sk for the socket returned by accept(). */ -static struct sock *sctp_v4_create_accept_sk(struct sock *sk, - struct sctp_association *asoc, - bool kern) -{ - struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL, - sk->sk_prot, kern); - struct inet_sock *newinet; - - if (!newsk) - return NULL; - - sock_init_data(NULL, newsk); - - sctp_copy_sock(newsk, sk, asoc); - sock_reset_flag(newsk, SOCK_ZAPPED); - - sctp_v4_copy_ip_options(sk, newsk); - - newinet = inet_sk(newsk); - - newinet->inet_daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr; - - return newsk; -} - static int sctp_v4_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr) { /* No address mapping for V4 sockets */ @@ -1113,7 +1087,6 @@ static struct sctp_pf sctp_pf_inet = { .bind_verify = sctp_inet_bind_verify, .send_verify = sctp_inet_send_verify, .supported_addrs = sctp_inet_supported_addrs, - .create_accept_sk = sctp_v4_create_accept_sk, .addr_to_user = sctp_v4_addr_to_user, .to_sk_saddr = sctp_v4_to_sk_saddr, .to_sk_daddr = sctp_v4_to_sk_daddr, From b7ddb55f31279f4e59acde3395fc03c3d94b6e5f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 23 Oct 2025 23:16:56 +0000 Subject: [PATCH 286/867] sctp: Use sctp_clone_sock() in sctp_do_peeloff(). sctp_do_peeloff() calls sock_create() to allocate and initialise struct sock, inet_sock, and sctp_sock, but later sctp_copy_sock() and sctp_sock_migrate() overwrite most fields. What sctp_do_peeloff() does is more like accept(). Let's use sock_create_lite() and sctp_clone_sock(). Signed-off-by: Kuniyuki Iwashima Acked-by: Xin Long Link: https://patch.msgid.link/20251023231751.4168390-8-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/af_inet.c | 1 + net/sctp/socket.c | 36 +++++++++++++++--------------------- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 77f6ae0fc231d..0784e2a873a15 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -788,6 +788,7 @@ void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *new newsock->state = SS_CONNECTED; } +EXPORT_SYMBOL_GPL(__inet_accept); /* * Accept a pending connection. The TCP layer now gives BSD semantics. diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 826f17747f176..60d3e340dfeda 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5671,11 +5671,11 @@ static int sctp_getsockopt_autoclose(struct sock *sk, int len, char __user *optv /* Helper routine to branch off an association to a new socket. */ static int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, - struct socket **sockp) + struct socket **sockp) { struct sctp_association *asoc = sctp_id2assoc(sk, id); - struct sctp_sock *sp = sctp_sk(sk); struct socket *sock; + struct sock *newsk; int err = 0; /* Do not peel off from one netns to another one. */ @@ -5691,30 +5691,24 @@ static int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, if (!sctp_style(sk, UDP)) return -EINVAL; - /* Create a new socket. */ - err = sock_create(sk->sk_family, SOCK_SEQPACKET, IPPROTO_SCTP, &sock); - if (err < 0) + err = sock_create_lite(sk->sk_family, SOCK_SEQPACKET, IPPROTO_SCTP, &sock); + if (err) return err; - sctp_copy_sock(sock->sk, sk, asoc); - - /* Make peeled-off sockets more like 1-1 accepted sockets. - * Set the daddr and initialize id to something more random and also - * copy over any ip options. - */ - sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sock->sk); - sp->pf->copy_ip_options(sk, sock->sk); - - /* Populate the fields of the newsk from the oldsk and migrate the - * asoc to the newsk. - */ - err = sctp_sock_migrate(sk, sock->sk, asoc, - SCTP_SOCKET_UDP_HIGH_BANDWIDTH); - if (err) { + newsk = sctp_clone_sock(sk, asoc, SCTP_SOCKET_UDP_HIGH_BANDWIDTH); + if (IS_ERR(newsk)) { sock_release(sock); - sock = NULL; + *sockp = NULL; + return PTR_ERR(newsk); } + lock_sock_nested(newsk, SINGLE_DEPTH_NESTING); + __inet_accept(sk->sk_socket, sock, newsk); + release_sock(newsk); + + sock->ops = sk->sk_socket->ops; + __module_get(sock->ops->owner); + *sockp = sock; return err; From 71068e2e1b6bd78f5599e5bc89e125a75149884b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 23 Oct 2025 23:16:57 +0000 Subject: [PATCH 287/867] sctp: Remove sctp_copy_sock() and sctp_copy_descendant(). Now, sctp_accept() and sctp_do_peeloff() use sk_clone(), and we no longer need sctp_copy_sock() and sctp_copy_descendant(). Let's remove them. Signed-off-by: Kuniyuki Iwashima Acked-by: Xin Long Link: https://patch.msgid.link/20251023231751.4168390-9-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/inet_sock.h | 8 ----- include/net/sctp/sctp.h | 3 +- net/sctp/socket.c | 71 ----------------------------------------- 3 files changed, 1 insertion(+), 81 deletions(-) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index b6ec08072533a..ac1c75975908a 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -355,14 +355,6 @@ static inline struct sock *skb_to_full_sk(const struct sk_buff *skb) #define inet_sk(ptr) container_of_const(ptr, struct inet_sock, sk) -static inline void __inet_sk_copy_descendant(struct sock *sk_to, - const struct sock *sk_from, - const int ancestor_size) -{ - memcpy(inet_sk(sk_to) + 1, inet_sk(sk_from) + 1, - sk_from->sk_prot->obj_size - ancestor_size); -} - int inet_sk_rebuild_header(struct sock *sk); /** diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index e96d1bd087f62..bb4b80c12541a 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -94,8 +94,7 @@ void sctp_data_ready(struct sock *sk); __poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait); void sctp_sock_rfree(struct sk_buff *skb); -void sctp_copy_sock(struct sock *newsk, struct sock *sk, - struct sctp_association *asoc); + extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int *); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 60d3e340dfeda..ac737e60829b9 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -9491,72 +9491,6 @@ static void sctp_skb_set_owner_r_frag(struct sk_buff *skb, struct sock *sk) sctp_skb_set_owner_r(skb, sk); } -void sctp_copy_sock(struct sock *newsk, struct sock *sk, - struct sctp_association *asoc) -{ - struct inet_sock *inet = inet_sk(sk); - struct inet_sock *newinet; - struct sctp_sock *sp = sctp_sk(sk); - - newsk->sk_type = sk->sk_type; - newsk->sk_bound_dev_if = sk->sk_bound_dev_if; - newsk->sk_flags = sk->sk_flags; - newsk->sk_tsflags = sk->sk_tsflags; - newsk->sk_no_check_tx = sk->sk_no_check_tx; - newsk->sk_no_check_rx = sk->sk_no_check_rx; - newsk->sk_reuse = sk->sk_reuse; - sctp_sk(newsk)->reuse = sp->reuse; - - newsk->sk_shutdown = sk->sk_shutdown; - newsk->sk_destruct = sk->sk_destruct; - newsk->sk_family = sk->sk_family; - newsk->sk_protocol = IPPROTO_SCTP; - newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; - newsk->sk_sndbuf = sk->sk_sndbuf; - newsk->sk_rcvbuf = sk->sk_rcvbuf; - newsk->sk_lingertime = sk->sk_lingertime; - newsk->sk_rcvtimeo = READ_ONCE(sk->sk_rcvtimeo); - newsk->sk_sndtimeo = READ_ONCE(sk->sk_sndtimeo); - newsk->sk_rxhash = sk->sk_rxhash; - newsk->sk_gso_type = sk->sk_gso_type; - - newinet = inet_sk(newsk); - - /* Initialize sk's sport, dport, rcv_saddr and daddr for - * getsockname() and getpeername() - */ - newinet->inet_sport = inet->inet_sport; - newinet->inet_saddr = inet->inet_saddr; - newinet->inet_rcv_saddr = inet->inet_rcv_saddr; - newinet->inet_dport = htons(asoc->peer.port); - newinet->pmtudisc = inet->pmtudisc; - atomic_set(&newinet->inet_id, get_random_u16()); - - newinet->uc_ttl = inet->uc_ttl; - inet_set_bit(MC_LOOP, newsk); - newinet->mc_ttl = 1; - newinet->mc_index = 0; - newinet->mc_list = NULL; - - if (newsk->sk_flags & SK_FLAGS_TIMESTAMP) - net_enable_timestamp(); - - /* Set newsk security attributes from original sk and connection - * security attribute from asoc. - */ - security_sctp_sk_clone(asoc, sk, newsk); -} - -static inline void sctp_copy_descendant(struct sock *sk_to, - const struct sock *sk_from) -{ - size_t ancestor_size = sizeof(struct inet_sock); - - ancestor_size += sk_from->sk_prot->obj_size; - ancestor_size -= offsetof(struct sctp_sock, pd_lobby); - __inet_sk_copy_descendant(sk_to, sk_from, ancestor_size); -} - /* Populate the fields of the newsk from the oldsk and migrate the assoc * and its messages to the newsk. */ @@ -9573,11 +9507,6 @@ static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, struct sctp_bind_hashbucket *head; int err; - /* Migrate all the socket level options to the new socket. - * Brute force copy old sctp opt. - */ - sctp_copy_descendant(newsk, oldsk); - /* Restore the ep value that was overwritten with the above structure * copy. */ From 6f147c8328e045de3a35155ca7c883d88da9e916 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Fri, 24 Oct 2025 10:51:45 +0800 Subject: [PATCH 288/867] net/sched: Remove unused typedef psched_tdiff_t Since commit 051d44209842 ("net/sched: Retire CBQ qdisc") this is not used anymore. Signed-off-by: Yue Haibing Link: https://patch.msgid.link/20251024025145.4069583-1-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski --- include/net/pkt_sched.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index c660ac8710831..4678db45832a1 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -43,7 +43,6 @@ struct qdisc_walker { */ typedef u64 psched_time_t; -typedef long psched_tdiff_t; /* Avoid doing 64 bit divide */ #define PSCHED_SHIFT 6 From 0ae1ac7335ca2328c42fbbe2b71bc9c3fc8e65d9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 24 Oct 2025 12:07:07 +0000 Subject: [PATCH 289/867] tcp: remove one ktime_get() from recvmsg() fast path Each time some payload is consumed by user space (recvmsg() and friends), TCP calls tcp_rcv_space_adjust() to run DRS algorithm to check if an increase of sk->sk_rcvbuf is needed. This function is based on time sampling, and currently calls tcp_mstamp_refresh(tp), which is a wrapper around ktime_get_ns(). ktime_get_ns() has a high cost on some platforms. 100+ cycles for rdtscp on AMD EPYC Turin for instance. We do not have to refresh tp->tcp_mpstamp, using the last cached value is enough. We only need to refresh it from __tcp_cleanup_rbuf() if an ACK must be sent (this is a rare event). Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20251024120707.3516550-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 4 +++- net/ipv4/tcp_input.c | 10 ++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b79da6d393927..a9345aa5a2e5f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1556,8 +1556,10 @@ void __tcp_cleanup_rbuf(struct sock *sk, int copied) time_to_ack = true; } } - if (time_to_ack) + if (time_to_ack) { + tcp_mstamp_refresh(tp); tcp_send_ack(sk); + } } void tcp_cleanup_rbuf(struct sock *sk, int copied) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8fc97f4d8a6b2..ff19f6e54d55c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -928,9 +928,15 @@ void tcp_rcv_space_adjust(struct sock *sk) trace_tcp_rcv_space_adjust(sk); - tcp_mstamp_refresh(tp); + if (unlikely(!tp->rcv_rtt_est.rtt_us)) + return; + + /* We do not refresh tp->tcp_mstamp here. + * Some platforms have expensive ktime_get() implementations. + * Using the last cached value is enough for DRS. + */ time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time); - if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0) + if (time < (tp->rcv_rtt_est.rtt_us >> 3)) return; /* Number of bytes copied to user in last RTT */ From 19ab0a22efbd824f342a794a5c61bce7842daef5 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Sat, 25 Oct 2025 07:48:50 +0100 Subject: [PATCH 290/867] dt-bindings: net: phy: vsc8531: Convert to DT schema Convert VSC8531 Gigabit ethernet phy binding to DT schema format. While at it add compatible string for VSC8541 PHY which is very much similar to the VSC8531 PHY and is already supported in the kernel. VSC8541 PHY is present on the Renesas RZ/T2H EVK. Signed-off-by: Lad Prabhakar Reviewed-by: Rob Herring (Arm) Link: https://patch.msgid.link/20251025064850.393797-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Jakub Kicinski --- .../bindings/net/mscc-phy-vsc8531.txt | 73 ---------- .../bindings/net/mscc-phy-vsc8531.yaml | 131 ++++++++++++++++++ .../devicetree/bindings/vendor-prefixes.yaml | 2 +- 3 files changed, 132 insertions(+), 74 deletions(-) delete mode 100644 Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt create mode 100644 Documentation/devicetree/bindings/net/mscc-phy-vsc8531.yaml diff --git a/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt b/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt deleted file mode 100644 index 0a3647fe331b6..0000000000000 --- a/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt +++ /dev/null @@ -1,73 +0,0 @@ -* Microsemi - vsc8531 Giga bit ethernet phy - -Optional properties: -- vsc8531,vddmac : The vddmac in mV. Allowed values is listed - in the first row of Table 1 (below). - This property is only used in combination - with the 'edge-slowdown' property. - Default value is 3300. -- vsc8531,edge-slowdown : % the edge should be slowed down relative to - the fastest possible edge time. - Edge rate sets the drive strength of the MAC - interface output signals. Changing the - drive strength will affect the edge rate of - the output signal. The goal of this setting - is to help reduce electrical emission (EMI) - by being able to reprogram drive strength - and in effect slow down the edge rate if - desired. - To adjust the edge-slowdown, the 'vddmac' - must be specified. Table 1 lists the - supported edge-slowdown values for a given - 'vddmac'. - Default value is 0%. - Ref: Table:1 - Edge rate change (below). -- vsc8531,led-[N]-mode : LED mode. Specify how the LED[N] should behave. - N depends on the number of LEDs supported by a - PHY. - Allowed values are defined in - "include/dt-bindings/net/mscc-phy-vsc8531.h". - Default values are VSC8531_LINK_1000_ACTIVITY (1), - VSC8531_LINK_100_ACTIVITY (2), - VSC8531_LINK_ACTIVITY (0) and - VSC8531_DUPLEX_COLLISION (8). -- load-save-gpios : GPIO used for the load/save operation of the PTP - hardware clock (PHC). - - -Table: 1 - Edge rate change -----------------------------------------------------------------| -| Edge Rate Change (VDDMAC) | -| | -| 3300 mV 2500 mV 1800 mV 1500 mV | -|---------------------------------------------------------------| -| 0% 0% 0% 0% | -| (Fastest) (recommended) (recommended) | -|---------------------------------------------------------------| -| 2% 3% 5% 6% | -|---------------------------------------------------------------| -| 4% 6% 9% 14% | -|---------------------------------------------------------------| -| 7% 10% 16% 21% | -|(recommended) (recommended) | -|---------------------------------------------------------------| -| 10% 14% 23% 29% | -|---------------------------------------------------------------| -| 17% 23% 35% 42% | -|---------------------------------------------------------------| -| 29% 37% 52% 58% | -|---------------------------------------------------------------| -| 53% 63% 76% 77% | -| (slowest) | -|---------------------------------------------------------------| - -Example: - - vsc8531_0: ethernet-phy@0 { - compatible = "ethernet-phy-id0007.0570"; - vsc8531,vddmac = <3300>; - vsc8531,edge-slowdown = <7>; - vsc8531,led-0-mode = ; - vsc8531,led-1-mode = ; - load-save-gpios = <&gpio 10 GPIO_ACTIVE_HIGH>; - }; diff --git a/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.yaml b/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.yaml new file mode 100644 index 0000000000000..0afbd0ff126f5 --- /dev/null +++ b/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.yaml @@ -0,0 +1,131 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/mscc-phy-vsc8531.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Microsemi VSC8531 Gigabit Ethernet PHY + +maintainers: + - Lad Prabhakar + +description: + The VSC8531 is a Gigabit Ethernet PHY with configurable MAC interface + drive strength and LED modes. + +allOf: + - $ref: ethernet-phy.yaml# + +select: + properties: + compatible: + contains: + enum: + - ethernet-phy-id0007.0570 # VSC8531 + - ethernet-phy-id0007.0772 # VSC8541 + required: + - compatible + +properties: + compatible: + items: + - enum: + - ethernet-phy-id0007.0570 # VSC8531 + - ethernet-phy-id0007.0772 # VSC8541 + - const: ethernet-phy-ieee802.3-c22 + + vsc8531,vddmac: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + The VDDMAC voltage in millivolts. This property is used in combination + with the edge-slowdown property to control the drive strength of the + MAC interface output signals. + enum: [3300, 2500, 1800, 1500] + default: 3300 + + vsc8531,edge-slowdown: + $ref: /schemas/types.yaml#/definitions/uint32 + description: > + Percentage by which the edge rate should be slowed down relative to + the fastest possible edge time. This setting helps reduce electromagnetic + interference (EMI) by adjusting the drive strength of the MAC interface + output signals. Valid values depend on the vddmac voltage setting + according to the edge rate change table in the datasheet. + + - When vsc8531,vddmac = 3300 mV: allowed values are 0, 2, 4, 7, 10, 17, 29, and 53. + (Recommended: 7) + - When vsc8531,vddmac = 2500 mV: allowed values are 0, 3, 6, 10, 14, 23, 37, and 63. + (Recommended: 10) + - When vsc8531,vddmac = 1800 mV: allowed values are 0, 5, 9, 16, 23, 35, 52, and 76. + (Recommended: 0) + - When vsc8531,vddmac = 1500 mV: allowed values are 0, 6, 14, 21, 29, 42, 58, and 77. + (Recommended: 0) + enum: [0, 2, 3, 4, 5, 6, 7, 9, 10, 14, 16, 17, 21, 23, 29, 35, 37, 42, 52, 53, 58, 63, 76, 77] + default: 0 + + vsc8531,led-0-mode: + $ref: /schemas/types.yaml#/definitions/uint32 + description: LED[0] behavior mode. See include/dt-bindings/net/mscc-phy-vsc8531.h + for available modes. + minimum: 0 + maximum: 15 + default: 1 + + vsc8531,led-1-mode: + $ref: /schemas/types.yaml#/definitions/uint32 + description: LED[1] behavior mode. See include/dt-bindings/net/mscc-phy-vsc8531.h + for available modes. + minimum: 0 + maximum: 15 + default: 2 + + vsc8531,led-2-mode: + $ref: /schemas/types.yaml#/definitions/uint32 + description: LED[2] behavior mode. See include/dt-bindings/net/mscc-phy-vsc8531.h + for available modes. + minimum: 0 + maximum: 15 + default: 0 + + vsc8531,led-3-mode: + $ref: /schemas/types.yaml#/definitions/uint32 + description: LED[3] behavior mode. See include/dt-bindings/net/mscc-phy-vsc8531.h + for available modes. + minimum: 0 + maximum: 15 + default: 8 + + load-save-gpios: + description: GPIO phandle used for the load/save operation of the PTP hardware + clock (PHC). + maxItems: 1 + +dependencies: + vsc8531,edge-slowdown: + - vsc8531,vddmac + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + #include + #include + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethernet-phy@0 { + compatible = "ethernet-phy-id0007.0772", "ethernet-phy-ieee802.3-c22"; + reg = <0>; + vsc8531,vddmac = <3300>; + vsc8531,edge-slowdown = <7>; + vsc8531,led-0-mode = ; + vsc8531,led-1-mode = ; + load-save-gpios = <&gpio 10 GPIO_ACTIVE_HIGH>; + }; + }; diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index f1d1882009ba9..424aa7b911a77 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -20,7 +20,7 @@ patternProperties: "^(keypad|m25p|max8952|max8997|max8998|mpmc),.*": true "^(pciclass|pinctrl-single|#pinctrl-single|PowerPC),.*": true "^(pl022|pxa-mmc|rcar_sound|rotary-encoder|s5m8767|sdhci),.*": true - "^(simple-audio-card|st-plgpio|st-spics|ts),.*": true + "^(simple-audio-card|st-plgpio|st-spics|ts|vsc8531),.*": true "^pool[0-3],.*": true # Keep list in alphabetical order. From a71e367773482a78566abd862dfee9cc3bb9332e Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Thu, 23 Oct 2025 09:45:36 +0800 Subject: [PATCH 291/867] net: txgbe: support RX desc merge mode RX descriptor merge mode is supported on AML devices. When it is enabled, the hardware process the RX descriptors in batches. Signed-off-by: Jiawen Wu Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20251023014538.12644-2-jiawenwu@trustnetic.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/wangxun/libwx/wx_hw.c | 10 ++++++++++ drivers/net/ethernet/wangxun/libwx/wx_type.h | 7 +++++++ drivers/net/ethernet/wangxun/libwx/wx_vf.h | 1 + drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c | 3 +++ drivers/net/ethernet/wangxun/txgbe/txgbe_main.c | 1 + drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c | 11 +++++++++++ 6 files changed, 33 insertions(+) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index 1e2713f0c9212..2dbbb42aa9c01 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -1935,6 +1935,10 @@ static void wx_configure_rx_ring(struct wx *wx, rxdctl |= (ring->count / 128) << WX_PX_RR_CFG_RR_SIZE_SHIFT; rxdctl |= 0x1 << WX_PX_RR_CFG_RR_THER_SHIFT; + + if (test_bit(WX_FLAG_RX_MERGE_ENABLED, wx->flags)) + rxdctl |= WX_PX_RR_CFG_DESC_MERGE; + wr32(wx, WX_PX_RR_CFG(reg_idx), rxdctl); /* reset head and tail pointers */ @@ -2190,6 +2194,12 @@ void wx_configure_rx(struct wx *wx) /* set_rx_buffer_len must be called before ring initialization */ wx_set_rx_buffer_len(wx); + if (test_bit(WX_FLAG_RX_MERGE_ENABLED, wx->flags)) { + wr32(wx, WX_RDM_DCACHE_CTL, WX_RDM_DCACHE_CTL_EN); + wr32m(wx, WX_RDM_RSC_CTL, + WX_RDM_RSC_CTL_FREE_CTL | WX_RDM_RSC_CTL_FREE_CNT_DIS, + WX_RDM_RSC_CTL_FREE_CTL); + } /* Setup the HW Rx Head and Tail Descriptor Pointers and * the Base and Length of the Rx Descriptor Ring */ diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 4880268b620e1..eb3f32551c14b 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -83,8 +83,13 @@ /*********************** Receive DMA registers **************************/ #define WX_RDM_VF_RE(_i) (0x12004 + ((_i) * 4)) +#define WX_RDM_RSC_CTL 0x1200C +#define WX_RDM_RSC_CTL_FREE_CNT_DIS BIT(8) +#define WX_RDM_RSC_CTL_FREE_CTL BIT(7) #define WX_RDM_PF_QDE(_i) (0x12080 + ((_i) * 4)) #define WX_RDM_VFRE_CLR(_i) (0x120A0 + ((_i) * 4)) +#define WX_RDM_DCACHE_CTL 0x120A8 +#define WX_RDM_DCACHE_CTL_EN BIT(0) #define WX_RDM_DRP_PKT 0x12500 #define WX_RDM_PKT_CNT 0x12504 #define WX_RDM_BYTE_CNT_LSB 0x12508 @@ -447,6 +452,7 @@ enum WX_MSCA_CMD_value { #define WX_PX_RR_CFG_VLAN BIT(31) #define WX_PX_RR_CFG_DROP_EN BIT(30) #define WX_PX_RR_CFG_SPLIT_MODE BIT(26) +#define WX_PX_RR_CFG_DESC_MERGE BIT(19) #define WX_PX_RR_CFG_RR_THER_SHIFT 16 #define WX_PX_RR_CFG_RR_HDR_SZ GENMASK(15, 12) #define WX_PX_RR_CFG_RR_BUF_SZ GENMASK(11, 8) @@ -1232,6 +1238,7 @@ enum wx_pf_flags { WX_FLAG_NEED_SFP_RESET, WX_FLAG_NEED_UPDATE_LINK, WX_FLAG_NEED_DO_RESET, + WX_FLAG_RX_MERGE_ENABLED, WX_PF_FLAGS_NBITS /* must be last */ }; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf.h b/drivers/net/ethernet/wangxun/libwx/wx_vf.h index 3f16de0fa4272..ecb1985923936 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_vf.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf.h @@ -74,6 +74,7 @@ #define WX_VXRXDCTL_BUFSZ(f) FIELD_PREP(GENMASK(11, 8), f) #define WX_VXRXDCTL_HDRSZ_MASK GENMASK(15, 12) #define WX_VXRXDCTL_HDRSZ(f) FIELD_PREP(GENMASK(15, 12), f) +#define WX_VXRXDCTL_DESC_MERGE BIT(19) #define WX_VXRXDCTL_RSCMAX_MASK GENMASK(24, 23) #define WX_VXRXDCTL_RSCMAX(f) FIELD_PREP(GENMASK(24, 23), f) #define WX_VXRXDCTL_RSCEN BIT(29) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c index a87887b9f8ee3..f54107f3c6d7f 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c @@ -272,6 +272,9 @@ void wx_configure_rx_ring_vf(struct wx *wx, struct wx_ring *ring) rxdctl |= WX_VXRXDCTL_RSCMAX(0); rxdctl |= WX_VXRXDCTL_RSCEN; + if (test_bit(WX_FLAG_RX_MERGE_ENABLED, wx->flags)) + rxdctl |= WX_VXRXDCTL_DESC_MERGE; + wr32(wx, WX_VXRXDCTL(reg_idx), rxdctl); /* pf/vf reuse */ diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index c4c4d70d8466a..60a04c5a76782 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -423,6 +423,7 @@ static int txgbe_sw_init(struct wx *wx) break; case wx_mac_aml: case wx_mac_aml40: + set_bit(WX_FLAG_RX_MERGE_ENABLED, wx->flags); set_bit(WX_FLAG_SWFW_RING, wx->flags); wx->swfw_index = 0; break; diff --git a/drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c b/drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c index 72663e3c4205a..52c1e223bbd78 100644 --- a/drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c +++ b/drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c @@ -157,6 +157,17 @@ static int txgbevf_sw_init(struct wx *wx) wx->set_num_queues = txgbevf_set_num_queues; + switch (wx->mac.type) { + case wx_mac_sp: + break; + case wx_mac_aml: + case wx_mac_aml40: + set_bit(WX_FLAG_RX_MERGE_ENABLED, wx->flags); + break; + default: + break; + } + return 0; err_reset_hw: kfree(wx->vfinfo); From eb57b16d90d3ec9eee17b0e66026507cd09a2734 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Thu, 23 Oct 2025 09:45:37 +0800 Subject: [PATCH 292/867] net: txgbe: support TX head write-back mode TX head write-back mode is supported on AML devices. When it is enabled, the hardware no longer writes the descriptors DD one by one, but write back pointer of completion descriptor to the head_wb address. Signed-off-by: Jiawen Wu Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20251023014538.12644-3-jiawenwu@trustnetic.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/wangxun/libwx/wx_hw.c | 9 ++++ drivers/net/ethernet/wangxun/libwx/wx_lib.c | 53 ++++++++++++++++++- drivers/net/ethernet/wangxun/libwx/wx_type.h | 7 +++ drivers/net/ethernet/wangxun/libwx/wx_vf.h | 3 ++ .../net/ethernet/wangxun/libwx/wx_vf_lib.c | 9 ++++ .../net/ethernet/wangxun/txgbe/txgbe_main.c | 1 + .../ethernet/wangxun/txgbevf/txgbevf_main.c | 1 + 7 files changed, 81 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index 2dbbb42aa9c01..986bc5acc4721 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -1905,6 +1905,15 @@ static void wx_configure_tx_ring(struct wx *wx, memset(ring->tx_buffer_info, 0, sizeof(struct wx_tx_buffer) * ring->count); + if (ring->headwb_mem) { + wr32(wx, WX_PX_TR_HEAD_ADDRL(reg_idx), + ring->headwb_dma & DMA_BIT_MASK(32)); + wr32(wx, WX_PX_TR_HEAD_ADDRH(reg_idx), + upper_32_bits(ring->headwb_dma)); + + txdctl |= WX_PX_TR_CFG_HEAD_WB; + } + /* enable queue */ wr32(wx, WX_PX_TR_CFG(reg_idx), txdctl); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 3adf7048320ae..622213fe67902 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -735,9 +735,22 @@ static bool wx_clean_tx_irq(struct wx_q_vector *q_vector, /* prevent any other reads prior to eop_desc */ smp_rmb(); - /* if DD is not set pending work has not been completed */ - if (!(eop_desc->wb.status & cpu_to_le32(WX_TXD_STAT_DD))) + if (tx_ring->headwb_mem) { + u32 head = *tx_ring->headwb_mem; + + if (head == tx_ring->next_to_clean) + break; + else if (head > tx_ring->next_to_clean && + !(tx_buffer->next_eop >= tx_ring->next_to_clean && + tx_buffer->next_eop < head)) + break; + else if (!(tx_buffer->next_eop >= tx_ring->next_to_clean || + tx_buffer->next_eop < head)) + break; + } else if (!(eop_desc->wb.status & cpu_to_le32(WX_TXD_STAT_DD))) { + /* if DD is not set pending work has not been completed */ break; + } /* clear next_to_watch to prevent false hangs */ tx_buffer->next_to_watch = NULL; @@ -1075,6 +1088,10 @@ static int wx_tx_map(struct wx_ring *tx_ring, /* set next_to_watch value indicating a packet is present */ first->next_to_watch = tx_desc; + /* set next_eop for amlite tx head wb */ + if (tx_ring->headwb_mem) + first->next_eop = i; + i++; if (i == tx_ring->count) i = 0; @@ -2683,6 +2700,16 @@ void wx_clean_all_tx_rings(struct wx *wx) } EXPORT_SYMBOL(wx_clean_all_tx_rings); +static void wx_free_headwb_resources(struct wx_ring *tx_ring) +{ + if (!tx_ring->headwb_mem) + return; + + dma_free_coherent(tx_ring->dev, sizeof(u32), + tx_ring->headwb_mem, tx_ring->headwb_dma); + tx_ring->headwb_mem = NULL; +} + /** * wx_free_tx_resources - Free Tx Resources per Queue * @tx_ring: Tx descriptor ring for a specific queue @@ -2702,6 +2729,8 @@ static void wx_free_tx_resources(struct wx_ring *tx_ring) dma_free_coherent(tx_ring->dev, tx_ring->size, tx_ring->desc, tx_ring->dma); tx_ring->desc = NULL; + + wx_free_headwb_resources(tx_ring); } /** @@ -2840,6 +2869,24 @@ static int wx_setup_all_rx_resources(struct wx *wx) return err; } +static void wx_setup_headwb_resources(struct wx_ring *tx_ring) +{ + struct wx *wx = netdev_priv(tx_ring->netdev); + + if (!test_bit(WX_FLAG_TXHEAD_WB_ENABLED, wx->flags)) + return; + + if (!tx_ring->q_vector) + return; + + tx_ring->headwb_mem = dma_alloc_coherent(tx_ring->dev, + sizeof(u32), + &tx_ring->headwb_dma, + GFP_KERNEL); + if (!tx_ring->headwb_mem) + dev_info(tx_ring->dev, "Allocate headwb memory failed, disable it\n"); +} + /** * wx_setup_tx_resources - allocate Tx resources (Descriptors) * @tx_ring: tx descriptor ring (for a specific queue) to setup @@ -2880,6 +2927,8 @@ static int wx_setup_tx_resources(struct wx_ring *tx_ring) if (!tx_ring->desc) goto err; + wx_setup_headwb_resources(tx_ring); + tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index eb3f32551c14b..8b3c39945c0b0 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -434,12 +434,15 @@ enum WX_MSCA_CMD_value { #define WX_PX_TR_WP(_i) (0x03008 + ((_i) * 0x40)) #define WX_PX_TR_RP(_i) (0x0300C + ((_i) * 0x40)) #define WX_PX_TR_CFG(_i) (0x03010 + ((_i) * 0x40)) +#define WX_PX_TR_HEAD_ADDRL(_i) (0x03028 + ((_i) * 0x40)) +#define WX_PX_TR_HEAD_ADDRH(_i) (0x0302C + ((_i) * 0x40)) /* Transmit Config masks */ #define WX_PX_TR_CFG_ENABLE BIT(0) /* Ena specific Tx Queue */ #define WX_PX_TR_CFG_TR_SIZE_SHIFT 1 /* tx desc number per ring */ #define WX_PX_TR_CFG_SWFLSH BIT(26) /* Tx Desc. wr-bk flushing */ #define WX_PX_TR_CFG_WTHRESH_SHIFT 16 /* shift to WTHRESH bits */ #define WX_PX_TR_CFG_THRE_SHIFT 8 +#define WX_PX_TR_CFG_HEAD_WB BIT(27) /* Receive DMA Registers */ #define WX_PX_RR_BAL(_i) (0x01000 + ((_i) * 0x40)) @@ -1011,6 +1014,7 @@ struct wx_tx_buffer { DEFINE_DMA_UNMAP_LEN(len); __be16 protocol; u32 tx_flags; + u32 next_eop; }; struct wx_rx_buffer { @@ -1062,6 +1066,8 @@ struct wx_ring { }; u8 __iomem *tail; dma_addr_t dma; /* phys. address of descriptor ring */ + dma_addr_t headwb_dma; + u32 *headwb_mem; unsigned int size; /* length in bytes */ u16 count; /* amount of descriptors */ @@ -1239,6 +1245,7 @@ enum wx_pf_flags { WX_FLAG_NEED_UPDATE_LINK, WX_FLAG_NEED_DO_RESET, WX_FLAG_RX_MERGE_ENABLED, + WX_FLAG_TXHEAD_WB_ENABLED, WX_PF_FLAGS_NBITS /* must be last */ }; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf.h b/drivers/net/ethernet/wangxun/libwx/wx_vf.h index ecb1985923936..eb6ca3fe4e97b 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_vf.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf.h @@ -92,6 +92,9 @@ #define WX_VXTXDCTL_PTHRESH(f) FIELD_PREP(GENMASK(11, 8), f) #define WX_VXTXDCTL_WTHRESH(f) FIELD_PREP(GENMASK(22, 16), f) #define WX_VXTXDCTL_FLUSH BIT(26) +#define WX_VXTXDCTL_HEAD_WB BIT(27) +#define WX_VXTXD_HEAD_ADDRL(r) (0x3028 + (0x40 * (r))) +#define WX_VXTXD_HEAD_ADDRH(r) (0x302C + (0x40 * (r))) #define WX_PFLINK_STATUS(g) FIELD_GET(BIT(0), g) #define WX_PFLINK_SPEED(g) FIELD_GET(GENMASK(31, 1), g) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c index f54107f3c6d7f..aa8be036956c0 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c @@ -132,6 +132,15 @@ static void wx_configure_tx_ring_vf(struct wx *wx, struct wx_ring *ring) txdctl |= WX_VXTXDCTL_BUFLEN(wx_buf_len(ring->count)); txdctl |= WX_VXTXDCTL_ENABLE; + if (ring->headwb_mem) { + wr32(wx, WX_VXTXD_HEAD_ADDRL(reg_idx), + ring->headwb_dma & DMA_BIT_MASK(32)); + wr32(wx, WX_VXTXD_HEAD_ADDRH(reg_idx), + upper_32_bits(ring->headwb_dma)); + + txdctl |= WX_VXTXDCTL_HEAD_WB; + } + /* reinitialize tx_buffer_info */ memset(ring->tx_buffer_info, 0, sizeof(struct wx_tx_buffer) * ring->count); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index 60a04c5a76782..ff690e9a075af 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -424,6 +424,7 @@ static int txgbe_sw_init(struct wx *wx) case wx_mac_aml: case wx_mac_aml40: set_bit(WX_FLAG_RX_MERGE_ENABLED, wx->flags); + set_bit(WX_FLAG_TXHEAD_WB_ENABLED, wx->flags); set_bit(WX_FLAG_SWFW_RING, wx->flags); wx->swfw_index = 0; break; diff --git a/drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c b/drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c index 52c1e223bbd78..37e4ec487afdd 100644 --- a/drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c +++ b/drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c @@ -163,6 +163,7 @@ static int txgbevf_sw_init(struct wx *wx) case wx_mac_aml: case wx_mac_aml40: set_bit(WX_FLAG_RX_MERGE_ENABLED, wx->flags); + set_bit(WX_FLAG_TXHEAD_WB_ENABLED, wx->flags); break; default: break; From eaed17770637af6d35d9b5465d91f1256a5eaacf Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Thu, 23 Oct 2025 09:45:38 +0800 Subject: [PATCH 293/867] net: txgbe: support RSC offload Support to enable and disable RSC for txgbe devices. Signed-off-by: Jiawen Wu Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20251023014538.12644-4-jiawenwu@trustnetic.com Signed-off-by: Paolo Abeni --- .../net/ethernet/wangxun/libwx/wx_ethtool.c | 61 ++++++++++++- drivers/net/ethernet/wangxun/libwx/wx_hw.c | 50 ++++++++++- drivers/net/ethernet/wangxun/libwx/wx_lib.c | 90 +++++++++++++++++-- drivers/net/ethernet/wangxun/libwx/wx_sriov.c | 4 + drivers/net/ethernet/wangxun/libwx/wx_type.h | 33 +++++-- .../net/ethernet/wangxun/txgbe/txgbe_main.c | 3 + 6 files changed, 224 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c index 06f401bd975c7..9aa3964187e1d 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c @@ -51,6 +51,11 @@ static const struct wx_stats wx_gstrings_fdir_stats[] = { WX_STAT("fdir_miss", stats.fdirmiss), }; +static const struct wx_stats wx_gstrings_rsc_stats[] = { + WX_STAT("rsc_aggregated", rsc_count), + WX_STAT("rsc_flushed", rsc_flush), +}; + /* drivers allocates num_tx_queues and num_rx_queues symmetrically so * we set the num_rx_queues to evaluate to num_tx_queues. This is * used because we do not have a good way to get the max number of @@ -64,16 +69,21 @@ static const struct wx_stats wx_gstrings_fdir_stats[] = { (sizeof(struct wx_queue_stats) / sizeof(u64))) #define WX_GLOBAL_STATS_LEN ARRAY_SIZE(wx_gstrings_stats) #define WX_FDIR_STATS_LEN ARRAY_SIZE(wx_gstrings_fdir_stats) +#define WX_RSC_STATS_LEN ARRAY_SIZE(wx_gstrings_rsc_stats) #define WX_STATS_LEN (WX_GLOBAL_STATS_LEN + WX_QUEUE_STATS_LEN) int wx_get_sset_count(struct net_device *netdev, int sset) { struct wx *wx = netdev_priv(netdev); + int len = WX_STATS_LEN; switch (sset) { case ETH_SS_STATS: - return (test_bit(WX_FLAG_FDIR_CAPABLE, wx->flags)) ? - WX_STATS_LEN + WX_FDIR_STATS_LEN : WX_STATS_LEN; + if (test_bit(WX_FLAG_FDIR_CAPABLE, wx->flags)) + len += WX_FDIR_STATS_LEN; + if (test_bit(WX_FLAG_RSC_CAPABLE, wx->flags)) + len += WX_RSC_STATS_LEN; + return len; default: return -EOPNOTSUPP; } @@ -94,6 +104,10 @@ void wx_get_strings(struct net_device *netdev, u32 stringset, u8 *data) for (i = 0; i < WX_FDIR_STATS_LEN; i++) ethtool_puts(&p, wx_gstrings_fdir_stats[i].stat_string); } + if (test_bit(WX_FLAG_RSC_CAPABLE, wx->flags)) { + for (i = 0; i < WX_RSC_STATS_LEN; i++) + ethtool_puts(&p, wx_gstrings_rsc_stats[i].stat_string); + } for (i = 0; i < netdev->num_tx_queues; i++) { ethtool_sprintf(&p, "tx_queue_%u_packets", i); ethtool_sprintf(&p, "tx_queue_%u_bytes", i); @@ -131,6 +145,13 @@ void wx_get_ethtool_stats(struct net_device *netdev, } } + if (test_bit(WX_FLAG_RSC_CAPABLE, wx->flags)) { + for (k = 0; k < WX_RSC_STATS_LEN; k++) { + p = (char *)wx + wx_gstrings_rsc_stats[k].stat_offset; + data[i++] = *(u64 *)p; + } + } + for (j = 0; j < netdev->num_tx_queues; j++) { ring = wx->tx_ring[j]; if (!ring) { @@ -322,6 +343,40 @@ int wx_get_coalesce(struct net_device *netdev, } EXPORT_SYMBOL(wx_get_coalesce); +static void wx_update_rsc(struct wx *wx) +{ + struct net_device *netdev = wx->netdev; + bool need_reset = false; + + /* nothing to do if LRO or RSC are not enabled */ + if (!test_bit(WX_FLAG_RSC_CAPABLE, wx->flags) || + !(netdev->features & NETIF_F_LRO)) + return; + + /* check the feature flag value and enable RSC if necessary */ + if (wx->rx_itr_setting == 1 || + wx->rx_itr_setting > WX_MIN_RSC_ITR) { + if (!test_bit(WX_FLAG_RSC_ENABLED, wx->flags)) { + set_bit(WX_FLAG_RSC_ENABLED, wx->flags); + dev_info(&wx->pdev->dev, + "rx-usecs value high enough to re-enable RSC\n"); + + need_reset = true; + } + /* if interrupt rate is too high then disable RSC */ + } else if (test_bit(WX_FLAG_RSC_ENABLED, wx->flags)) { + clear_bit(WX_FLAG_RSC_ENABLED, wx->flags); + dev_info(&wx->pdev->dev, + "rx-usecs set too low, disabling RSC\n"); + + need_reset = true; + } + + /* reset the device to apply the new RSC setting */ + if (need_reset && wx->do_reset) + wx->do_reset(netdev); +} + int wx_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, struct kernel_ethtool_coalesce *kernel_coal, @@ -414,6 +469,8 @@ int wx_set_coalesce(struct net_device *netdev, wx_write_eitr(q_vector); } + wx_update_rsc(wx); + return 0; } EXPORT_SYMBOL(wx_set_coalesce); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index 986bc5acc4721..8141644597077 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -1779,7 +1779,9 @@ EXPORT_SYMBOL(wx_set_rx_mode); static void wx_set_rx_buffer_len(struct wx *wx) { struct net_device *netdev = wx->netdev; + struct wx_ring *rx_ring; u32 mhadd, max_frame; + int i; max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; /* adjust max frame to be at least the size of a standard frame */ @@ -1789,6 +1791,19 @@ static void wx_set_rx_buffer_len(struct wx *wx) mhadd = rd32(wx, WX_PSR_MAX_SZ); if (max_frame != mhadd) wr32(wx, WX_PSR_MAX_SZ, max_frame); + + /* + * Setup the HW Rx Head and Tail Descriptor Pointers and + * the Base and Length of the Rx Descriptor Ring + */ + for (i = 0; i < wx->num_rx_queues; i++) { + rx_ring = wx->rx_ring[i]; + rx_ring->rx_buf_len = WX_RXBUFFER_2K; +#if (PAGE_SIZE < 8192) + if (test_bit(WX_FLAG_RSC_ENABLED, wx->flags)) + rx_ring->rx_buf_len = WX_RXBUFFER_3K; +#endif + } } /** @@ -1865,11 +1880,27 @@ static void wx_configure_srrctl(struct wx *wx, srrctl |= WX_RXBUFFER_256 << WX_PX_RR_CFG_BHDRSIZE_SHIFT; /* configure the packet buffer length */ - srrctl |= WX_RX_BUFSZ >> WX_PX_RR_CFG_BSIZEPKT_SHIFT; + srrctl |= rx_ring->rx_buf_len >> WX_PX_RR_CFG_BSIZEPKT_SHIFT; wr32(wx, WX_PX_RR_CFG(reg_idx), srrctl); } +static void wx_configure_rscctl(struct wx *wx, + struct wx_ring *ring) +{ + u8 reg_idx = ring->reg_idx; + u32 rscctrl; + + if (!test_bit(WX_FLAG_RSC_ENABLED, wx->flags)) + return; + + rscctrl = rd32(wx, WX_PX_RR_CFG(reg_idx)); + rscctrl |= WX_PX_RR_CFG_RSC; + rscctrl |= WX_PX_RR_CFG_MAX_RSCBUF_16; + + wr32(wx, WX_PX_RR_CFG(reg_idx), rscctrl); +} + static void wx_configure_tx_ring(struct wx *wx, struct wx_ring *ring) { @@ -1956,6 +1987,7 @@ static void wx_configure_rx_ring(struct wx *wx, ring->tail = wx->hw_addr + WX_PX_RR_WP(reg_idx); wx_configure_srrctl(wx, ring); + wx_configure_rscctl(wx, ring); /* initialize rx_buffer_info */ memset(ring->rx_buffer_info, 0, @@ -2194,7 +2226,9 @@ void wx_configure_rx(struct wx *wx) /* RSC Setup */ psrctl = rd32(wx, WX_PSR_CTL); psrctl |= WX_PSR_CTL_RSC_ACK; /* Disable RSC for ACK packets */ - psrctl |= WX_PSR_CTL_RSC_DIS; + psrctl &= ~WX_PSR_CTL_RSC_DIS; + if (!test_bit(WX_FLAG_RSC_ENABLED, wx->flags)) + psrctl |= WX_PSR_CTL_RSC_DIS; wr32(wx, WX_PSR_CTL, psrctl); } @@ -2824,6 +2858,18 @@ void wx_update_stats(struct wx *wx) wx->hw_csum_rx_error = hw_csum_rx_error; wx->hw_csum_rx_good = hw_csum_rx_good; + if (test_bit(WX_FLAG_RSC_ENABLED, wx->flags)) { + u64 rsc_count = 0; + u64 rsc_flush = 0; + + for (i = 0; i < wx->num_rx_queues; i++) { + rsc_count += wx->rx_ring[i]->rx_stats.rsc_count; + rsc_flush += wx->rx_ring[i]->rx_stats.rsc_flush; + } + wx->rsc_count = rsc_count; + wx->rsc_flush = rsc_flush; + } + for (i = 0; i < wx->num_tx_queues; i++) { struct wx_ring *tx_ring = wx->tx_ring[i]; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 622213fe67902..32cadafa4b3b8 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -235,7 +235,7 @@ static struct sk_buff *wx_build_skb(struct wx_ring *rx_ring, { unsigned int size = le16_to_cpu(rx_desc->wb.upper.length); #if (PAGE_SIZE < 8192) - unsigned int truesize = WX_RX_BUFSZ; + unsigned int truesize = wx_rx_pg_size(rx_ring) / 2; #else unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); #endif @@ -341,7 +341,7 @@ void wx_alloc_rx_buffers(struct wx_ring *rx_ring, u16 cleaned_count) /* sync the buffer for use by the device */ dma_sync_single_range_for_device(rx_ring->dev, bi->dma, bi->page_offset, - WX_RX_BUFSZ, + rx_ring->rx_buf_len, DMA_FROM_DEVICE); rx_desc->read.pkt_addr = @@ -404,6 +404,7 @@ static bool wx_is_non_eop(struct wx_ring *rx_ring, union wx_rx_desc *rx_desc, struct sk_buff *skb) { + struct wx *wx = rx_ring->q_vector->wx; u32 ntc = rx_ring->next_to_clean + 1; /* fetch, update, and store next to clean */ @@ -412,6 +413,24 @@ static bool wx_is_non_eop(struct wx_ring *rx_ring, prefetch(WX_RX_DESC(rx_ring, ntc)); + /* update RSC append count if present */ + if (test_bit(WX_FLAG_RSC_ENABLED, wx->flags)) { + __le32 rsc_enabled = rx_desc->wb.lower.lo_dword.data & + cpu_to_le32(WX_RXD_RSCCNT_MASK); + + if (unlikely(rsc_enabled)) { + u32 rsc_cnt = le32_to_cpu(rsc_enabled); + + rsc_cnt >>= WX_RXD_RSCCNT_SHIFT; + WX_CB(skb)->append_cnt += rsc_cnt - 1; + + /* update ntc based on RSC value */ + ntc = le32_to_cpu(rx_desc->wb.upper.status_error); + ntc &= WX_RXD_NEXTP_MASK; + ntc >>= WX_RXD_NEXTP_SHIFT; + } + } + /* if we are the last buffer then there is nothing else to do */ if (likely(wx_test_staterr(rx_desc, WX_RXD_STAT_EOP))) return false; @@ -582,6 +601,33 @@ static void wx_rx_vlan(struct wx_ring *ring, union wx_rx_desc *rx_desc, } } +static void wx_set_rsc_gso_size(struct wx_ring *ring, + struct sk_buff *skb) +{ + u16 hdr_len = skb_headlen(skb); + + /* set gso_size to avoid messing up TCP MSS */ + skb_shinfo(skb)->gso_size = DIV_ROUND_UP((skb->len - hdr_len), + WX_CB(skb)->append_cnt); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; +} + +static void wx_update_rsc_stats(struct wx_ring *rx_ring, + struct sk_buff *skb) +{ + /* if append_cnt is 0 then frame is not RSC */ + if (!WX_CB(skb)->append_cnt) + return; + + rx_ring->rx_stats.rsc_count += WX_CB(skb)->append_cnt; + rx_ring->rx_stats.rsc_flush++; + + wx_set_rsc_gso_size(rx_ring, skb); + + /* gso_size is computed using append_cnt so always clear it last */ + WX_CB(skb)->append_cnt = 0; +} + /** * wx_process_skb_fields - Populate skb header fields from Rx descriptor * @rx_ring: rx descriptor ring packet is being transacted on @@ -598,6 +644,9 @@ static void wx_process_skb_fields(struct wx_ring *rx_ring, { struct wx *wx = netdev_priv(rx_ring->netdev); + if (test_bit(WX_FLAG_RSC_CAPABLE, wx->flags)) + wx_update_rsc_stats(rx_ring, skb); + wx_rx_hash(rx_ring, rx_desc, skb); wx_rx_checksum(rx_ring, rx_desc, skb); @@ -2549,7 +2598,7 @@ static void wx_clean_rx_ring(struct wx_ring *rx_ring) dma_sync_single_range_for_cpu(rx_ring->dev, rx_buffer->dma, rx_buffer->page_offset, - WX_RX_BUFSZ, + rx_ring->rx_buf_len, DMA_FROM_DEVICE); /* free resources associated with mapping */ @@ -2760,13 +2809,14 @@ static int wx_alloc_page_pool(struct wx_ring *rx_ring) struct page_pool_params pp_params = { .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, - .order = 0, - .pool_size = rx_ring->count, + .order = wx_rx_pg_order(rx_ring), + .pool_size = rx_ring->count * rx_ring->rx_buf_len / + wx_rx_pg_size(rx_ring), .nid = dev_to_node(rx_ring->dev), .dev = rx_ring->dev, .dma_dir = DMA_FROM_DEVICE, .offset = 0, - .max_len = PAGE_SIZE, + .max_len = wx_rx_pg_size(rx_ring), }; rx_ring->page_pool = page_pool_create(&pp_params); @@ -3075,8 +3125,25 @@ int wx_set_features(struct net_device *netdev, netdev_features_t features) else if (changed & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER)) wx_set_rx_mode(netdev); + if (test_bit(WX_FLAG_RSC_CAPABLE, wx->flags)) { + if (!(features & NETIF_F_LRO)) { + if (test_bit(WX_FLAG_RSC_ENABLED, wx->flags)) + need_reset = true; + clear_bit(WX_FLAG_RSC_ENABLED, wx->flags); + } else if (!(test_bit(WX_FLAG_RSC_ENABLED, wx->flags))) { + if (wx->rx_itr_setting == 1 || + wx->rx_itr_setting > WX_MIN_RSC_ITR) { + set_bit(WX_FLAG_RSC_ENABLED, wx->flags); + need_reset = true; + } else if (changed & NETIF_F_LRO) { + dev_info(&wx->pdev->dev, + "rx-usecs set too low, disable RSC\n"); + } + } + } + if (!(test_bit(WX_FLAG_FDIR_CAPABLE, wx->flags))) - return 0; + goto out; /* Check if Flow Director n-tuple support was enabled or disabled. If * the state changed, we need to reset. @@ -3102,6 +3169,7 @@ int wx_set_features(struct net_device *netdev, netdev_features_t features) break; } +out: if (need_reset && wx->do_reset) wx->do_reset(netdev); @@ -3151,6 +3219,14 @@ netdev_features_t wx_fix_features(struct net_device *netdev, } } + /* If Rx checksum is disabled, then RSC/LRO should also be disabled */ + if (!(features & NETIF_F_RXCSUM)) + features &= ~NETIF_F_LRO; + + /* Turn off LRO if not RSC capable */ + if (!test_bit(WX_FLAG_RSC_CAPABLE, wx->flags)) + features &= ~NETIF_F_LRO; + return features; } EXPORT_SYMBOL(wx_fix_features); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c index c6d158cd70da3..493da5fffdb6e 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c @@ -122,6 +122,10 @@ static int __wx_enable_sriov(struct wx *wx, u8 num_vfs) WX_CFG_PORT_CTL_NUM_VT_MASK, value); + /* Disable RSC when in SR-IOV mode */ + clear_bit(WX_FLAG_RSC_CAPABLE, wx->flags); + clear_bit(WX_FLAG_RSC_ENABLED, wx->flags); + return ret; } diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 8b3c39945c0b0..d0cbcded1dd44 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -424,6 +424,7 @@ enum WX_MSCA_CMD_value { #define WX_7K_ITR 595 #define WX_12K_ITR 336 #define WX_20K_ITR 200 +#define WX_MIN_RSC_ITR 24 #define WX_SP_MAX_EITR 0x00000FF8U #define WX_AML_MAX_EITR 0x00000FFFU #define WX_EM_MAX_EITR 0x00007FFCU @@ -454,7 +455,9 @@ enum WX_MSCA_CMD_value { /* PX_RR_CFG bit definitions */ #define WX_PX_RR_CFG_VLAN BIT(31) #define WX_PX_RR_CFG_DROP_EN BIT(30) +#define WX_PX_RR_CFG_RSC BIT(29) #define WX_PX_RR_CFG_SPLIT_MODE BIT(26) +#define WX_PX_RR_CFG_MAX_RSCBUF_16 FIELD_PREP(GENMASK(24, 23), 3) #define WX_PX_RR_CFG_DESC_MERGE BIT(19) #define WX_PX_RR_CFG_RR_THER_SHIFT 16 #define WX_PX_RR_CFG_RR_HDR_SZ GENMASK(15, 12) @@ -551,14 +554,9 @@ enum WX_MSCA_CMD_value { /* Supported Rx Buffer Sizes */ #define WX_RXBUFFER_256 256 /* Used for skb receive header */ #define WX_RXBUFFER_2K 2048 +#define WX_RXBUFFER_3K 3072 #define WX_MAX_RXBUFFER 16384 /* largest size for single descriptor */ -#if MAX_SKB_FRAGS < 8 -#define WX_RX_BUFSZ ALIGN(WX_MAX_RXBUFFER / MAX_SKB_FRAGS, 1024) -#else -#define WX_RX_BUFSZ WX_RXBUFFER_2K -#endif - #define WX_RX_BUFFER_WRITE 16 /* Must be power of 2 */ #define WX_MAX_DATA_PER_TXD BIT(14) @@ -652,6 +650,12 @@ enum wx_l2_ptypes { #define WX_RXD_PKTTYPE(_rxd) \ ((le32_to_cpu((_rxd)->wb.lower.lo_dword.data) >> 9) & 0xFF) + +#define WX_RXD_RSCCNT_MASK GENMASK(20, 17) +#define WX_RXD_RSCCNT_SHIFT 17 +#define WX_RXD_NEXTP_MASK GENMASK(19, 4) +#define WX_RXD_NEXTP_SHIFT 4 + /*********************** Transmit Descriptor Config Masks ****************/ #define WX_TXD_STAT_DD BIT(0) /* Descriptor Done */ #define WX_TXD_DTYP_DATA 0 /* Adv Data Descriptor */ @@ -1039,6 +1043,8 @@ struct wx_rx_queue_stats { u64 csum_good_cnt; u64 csum_err; u64 alloc_rx_buff_failed; + u64 rsc_count; + u64 rsc_flush; }; /* iterator for handling rings in ring container */ @@ -1081,6 +1087,7 @@ struct wx_ring { */ u16 next_to_use; u16 next_to_clean; + u16 rx_buf_len; union { u16 next_to_alloc; struct { @@ -1237,6 +1244,7 @@ enum wx_pf_flags { WX_FLAG_FDIR_HASH, WX_FLAG_FDIR_PERFECT, WX_FLAG_RSC_CAPABLE, + WX_FLAG_RSC_ENABLED, WX_FLAG_RX_HWTSTAMP_ENABLED, WX_FLAG_RX_HWTSTAMP_IN_REGISTER, WX_FLAG_PTP_PPS_ENABLED, @@ -1352,6 +1360,8 @@ struct wx { u64 hw_csum_rx_good; u64 hw_csum_rx_error; u64 alloc_rx_buff_failed; + u64 rsc_count; + u64 rsc_flush; unsigned int num_vfs; struct vf_data_storage *vfinfo; struct vf_macvlans vf_mvs; @@ -1483,4 +1493,15 @@ static inline int wx_set_state_reset(struct wx *wx) return 0; } +static inline unsigned int wx_rx_pg_order(struct wx_ring *ring) +{ +#if (PAGE_SIZE < 8192) + if (ring->rx_buf_len == WX_RXBUFFER_3K) + return 1; +#endif + return 0; +} + +#define wx_rx_pg_size(_ring) (PAGE_SIZE << wx_rx_pg_order(_ring)) + #endif /* _WX_TYPE_H_ */ diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index ff690e9a075af..daa761e48f9d1 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -398,6 +398,7 @@ static int txgbe_sw_init(struct wx *wx) wx->configure_fdir = txgbe_configure_fdir; set_bit(WX_FLAG_RSC_CAPABLE, wx->flags); + set_bit(WX_FLAG_RSC_ENABLED, wx->flags); set_bit(WX_FLAG_MULTI_64_FUNC, wx->flags); /* enable itr by default in dynamic mode */ @@ -803,6 +804,8 @@ static int txgbe_probe(struct pci_dev *pdev, netdev->features |= NETIF_F_HIGHDMA; netdev->hw_features |= NETIF_F_GRO; netdev->features |= NETIF_F_GRO; + netdev->hw_features |= NETIF_F_LRO; + netdev->features |= NETIF_F_LRO; netdev->features |= NETIF_F_RX_UDP_TUNNEL_PORT; netdev->priv_flags |= IFF_UNICAST_FLT; From 211de28b1caf51581ba5e0978e83213db4f488c6 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 23 Oct 2025 12:16:56 +0300 Subject: [PATCH 294/867] net/mlx5: Use common mlx5_same_hw_devs function Refactor duplicate hardware device comparison code to use the common mlx5_same_hw_devs() function instead of reimplementing system GUID comparison logic in multiple places. This cleanup eliminates code duplication in: - Bridge representor device comparison. - TC hardware device comparison. Using the centralized function improves maintainability and ensures consistent behavior across the driver. Signed-off-by: Mark Bloch Reviewed-by: Shay Drori Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761211020-925651-2-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c | 6 +----- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index 9d1c677814e06..87a2ad69526d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -30,15 +30,11 @@ static bool mlx5_esw_bridge_dev_same_hw(struct net_device *dev, struct mlx5_eswi { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev, *esw_mdev; - u64 system_guid, esw_system_guid; mdev = priv->mdev; esw_mdev = esw->dev; - system_guid = mlx5_query_nic_system_image_guid(mdev); - esw_system_guid = mlx5_query_nic_system_image_guid(esw_mdev); - - return system_guid == esw_system_guid; + return mlx5_same_hw_devs(mdev, esw_mdev); } static struct net_device * diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 00c2763e57ca1..54ccfb4e6c4e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3614,15 +3614,11 @@ static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) { struct mlx5_core_dev *fmdev, *pmdev; - u64 fsystem_guid, psystem_guid; fmdev = priv->mdev; pmdev = peer_priv->mdev; - fsystem_guid = mlx5_query_nic_system_image_guid(fmdev); - psystem_guid = mlx5_query_nic_system_image_guid(pmdev); - - return (fsystem_guid == psystem_guid); + return mlx5_same_hw_devs(fmdev, pmdev); } static int From 7718f2a8b87af7363d60819ac0ac0da8b2f8ff00 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 23 Oct 2025 12:16:57 +0300 Subject: [PATCH 295/867] net/mlx5: Add software system image GUID infrastructure Replace direct hardware system image GUID usage with a new software system image GUID function that supports variable-length identifiers. Key changes: - Add mlx5_query_nic_sw_system_image_guid() function with length parameter. - Update all callsites to use the new function and buffer/length approach. - Modify mapping contexts to use byte arrays instead of u64 keys. - Update devcom matching to support variable-length keys. - Change mlx5_same_hw_devs() to use buffer comparison instead of u64. This refactoring prepares the infrastructure for balance ID support, which requires extending the system image GUID with additional data. The change maintains backward compatibility while enabling future enhancements. Signed-off-by: Mark Bloch Reviewed-by: Shay Drori Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761211020-925651-3-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 12 ++++++--- .../ethernet/mellanox/mlx5/core/en/devlink.c | 7 ++--- .../ethernet/mellanox/mlx5/core/en/mapping.c | 13 +++++++--- .../ethernet/mellanox/mlx5/core/en/mapping.h | 3 ++- .../mellanox/mlx5/core/en/tc/int_port.c | 8 +++--- .../ethernet/mellanox/mlx5/core/en/tc_ct.c | 11 +++++--- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 26 ++++++++++++------- .../mellanox/mlx5/core/esw/devlink_port.c | 6 +---- .../mellanox/mlx5/core/eswitch_offloads.c | 8 +++--- .../net/ethernet/mellanox/mlx5/core/lag/lag.c | 4 ++- .../ethernet/mellanox/mlx5/core/lib/devcom.h | 2 ++ .../ethernet/mellanox/mlx5/core/mlx5_core.h | 2 ++ .../net/ethernet/mellanox/mlx5/core/vport.c | 15 +++++++++++ include/linux/mlx5/driver.h | 3 +++ 14 files changed, 80 insertions(+), 40 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 891bbbbfbbf1a..64c04f52990fe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -564,10 +564,14 @@ int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev) bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev) { - u64 fsystem_guid, psystem_guid; + u8 fsystem_guid[MLX5_SW_IMAGE_GUID_MAX_BYTES]; + u8 psystem_guid[MLX5_SW_IMAGE_GUID_MAX_BYTES]; + u8 flen; + u8 plen; - fsystem_guid = mlx5_query_nic_system_image_guid(dev); - psystem_guid = mlx5_query_nic_system_image_guid(peer_dev); + mlx5_query_nic_sw_system_image_guid(dev, fsystem_guid, &flen); + mlx5_query_nic_sw_system_image_guid(peer_dev, psystem_guid, &plen); - return (fsystem_guid && psystem_guid && fsystem_guid == psystem_guid); + return plen && flen && flen == plen && + !memcmp(fsystem_guid, psystem_guid, flen); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c index 0b1ac6e5c8900..8818f65d1fbc4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c @@ -40,11 +40,8 @@ void mlx5e_destroy_devlink(struct mlx5e_dev *mlx5e_dev) static void mlx5e_devlink_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_id *ppid) { - u64 parent_id; - - parent_id = mlx5_query_nic_system_image_guid(dev); - ppid->id_len = sizeof(parent_id); - memcpy(ppid->id, &parent_id, sizeof(parent_id)); + BUILD_BUG_ON(MLX5_SW_IMAGE_GUID_MAX_BYTES > MAX_PHYS_ITEM_ID_LEN); + mlx5_query_nic_sw_system_image_guid(dev, ppid->id, &ppid->id_len); } int mlx5e_devlink_port_register(struct mlx5e_dev *mlx5e_dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c index 4e72ca8070e24..1de18c7e96ec9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "mapping.h" @@ -24,7 +25,8 @@ struct mapping_ctx { struct delayed_work dwork; struct list_head pending_list; spinlock_t pending_list_lock; /* Guards pending list */ - u64 id; + u8 id[MLX5_SW_IMAGE_GUID_MAX_BYTES]; + u8 id_len; u8 type; struct list_head list; refcount_t refcount; @@ -220,13 +222,15 @@ mapping_create(size_t data_size, u32 max_id, bool delayed_removal) } struct mapping_ctx * -mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal) +mapping_create_for_id(u8 *id, u8 id_len, u8 type, size_t data_size, u32 max_id, + bool delayed_removal) { struct mapping_ctx *ctx; mutex_lock(&shared_ctx_lock); list_for_each_entry(ctx, &shared_ctx_list, list) { - if (ctx->id == id && ctx->type == type) { + if (ctx->type == type && ctx->id_len == id_len && + !memcmp(id, ctx->id, id_len)) { if (refcount_inc_not_zero(&ctx->refcount)) goto unlock; break; @@ -237,7 +241,8 @@ mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delaye if (IS_ERR(ctx)) goto unlock; - ctx->id = id; + memcpy(ctx->id, id, id_len); + ctx->id_len = id_len; ctx->type = type; list_add(&ctx->list, &shared_ctx_list); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h index 4e2119f0f4c11..e86a103d58b9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h @@ -27,6 +27,7 @@ void mapping_destroy(struct mapping_ctx *ctx); /* adds mapping with an id or get an existing mapping with the same id */ struct mapping_ctx * -mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal); +mapping_create_for_id(u8 *id, u8 id_len, u8 type, size_t data_size, u32 max_id, + bool delayed_removal); #endif /* __MLX5_MAPPING_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c index 896f718483c30..991f47050643d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c @@ -307,7 +307,8 @@ mlx5e_tc_int_port_init(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_int_port_priv *int_port_priv; - u64 mapping_id; + u8 mapping_id[MLX5_SW_IMAGE_GUID_MAX_BYTES]; + u8 id_len; if (!mlx5e_tc_int_port_supported(esw)) return NULL; @@ -316,9 +317,10 @@ mlx5e_tc_int_port_init(struct mlx5e_priv *priv) if (!int_port_priv) return NULL; - mapping_id = mlx5_query_nic_system_image_guid(priv->mdev); + mlx5_query_nic_sw_system_image_guid(priv->mdev, mapping_id, &id_len); - int_port_priv->metadata_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_INT_PORT, + int_port_priv->metadata_mapping = mapping_create_for_id(mapping_id, id_len, + MAPPING_TYPE_INT_PORT, sizeof(u32) * 2, (1 << ESW_VPORT_BITS) - 1, true); if (IS_ERR(int_port_priv->metadata_mapping)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 870d12364f99e..fc0e57403d254 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -2287,9 +2287,10 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, enum mlx5_flow_namespace_type ns_type, struct mlx5e_post_act *post_act) { + u8 mapping_id[MLX5_SW_IMAGE_GUID_MAX_BYTES]; struct mlx5_tc_ct_priv *ct_priv; struct mlx5_core_dev *dev; - u64 mapping_id; + u8 id_len; int err; dev = priv->mdev; @@ -2301,16 +2302,18 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, if (!ct_priv) goto err_alloc; - mapping_id = mlx5_query_nic_system_image_guid(dev); + mlx5_query_nic_sw_system_image_guid(dev, mapping_id, &id_len); - ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE, + ct_priv->zone_mapping = mapping_create_for_id(mapping_id, id_len, + MAPPING_TYPE_ZONE, sizeof(u16), 0, true); if (IS_ERR(ct_priv->zone_mapping)) { err = PTR_ERR(ct_priv->zone_mapping); goto err_mapping_zone; } - ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS, + ct_priv->labels_mapping = mapping_create_for_id(mapping_id, id_len, + MAPPING_TYPE_LABELS, sizeof(u32) * 4, 0, true); if (IS_ERR(ct_priv->labels_mapping)) { err = PTR_ERR(ct_priv->labels_mapping); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 54ccfb4e6c4e2..a8773b2342c2a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -5233,10 +5233,11 @@ static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv *priv) int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + u8 mapping_id[MLX5_SW_IMAGE_GUID_MAX_BYTES]; struct mlx5_core_dev *dev = priv->mdev; struct mapping_ctx *chains_mapping; struct mlx5_chains_attr attr = {}; - u64 mapping_id; + u8 id_len; int err; mlx5e_mod_hdr_tbl_init(&tc->mod_hdr); @@ -5252,11 +5253,13 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key); lockdep_init_map(&tc->ht.run_work.lockdep_map, "tc_ht_wq_key", &tc_ht_wq_key, 0); - mapping_id = mlx5_query_nic_system_image_guid(dev); + mlx5_query_nic_sw_system_image_guid(dev, mapping_id, &id_len); - chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN, + chains_mapping = mapping_create_for_id(mapping_id, id_len, + MAPPING_TYPE_CHAIN, sizeof(struct mlx5_mapped_obj), - MLX5E_TC_TABLE_CHAIN_TAG_MASK, true); + MLX5E_TC_TABLE_CHAIN_TAG_MASK, + true); if (IS_ERR(chains_mapping)) { err = PTR_ERR(chains_mapping); @@ -5387,14 +5390,15 @@ void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht) int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv) { const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts); + u8 mapping_id[MLX5_SW_IMAGE_GUID_MAX_BYTES]; struct mlx5_devcom_match_attr attr = {}; struct netdev_phys_item_id ppid; struct mlx5e_rep_priv *rpriv; struct mapping_ctx *mapping; struct mlx5_eswitch *esw; struct mlx5e_priv *priv; - u64 mapping_id; int err = 0; + u8 id_len; rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); priv = netdev_priv(rpriv->netdev); @@ -5412,9 +5416,9 @@ int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv) uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act); - mapping_id = mlx5_query_nic_system_image_guid(esw->dev); + mlx5_query_nic_sw_system_image_guid(esw->dev, mapping_id, &id_len); - mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL, + mapping = mapping_create_for_id(mapping_id, id_len, MAPPING_TYPE_TUNNEL, sizeof(struct tunnel_match_key), TUNNEL_INFO_BITS_MASK, true); @@ -5427,8 +5431,10 @@ int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv) /* Two last values are reserved for stack devices slow path table mark * and bridge ingress push mark. */ - mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS, - sz_enc_opts, ENC_OPTS_BITS_MASK - 2, true); + mapping = mapping_create_for_id(mapping_id, id_len, + MAPPING_TYPE_TUNNEL_ENC_OPTS, + sz_enc_opts, ENC_OPTS_BITS_MASK - 2, + true); if (IS_ERR(mapping)) { err = PTR_ERR(mapping); goto err_enc_opts_mapping; @@ -5449,7 +5455,7 @@ int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv) err = netif_get_port_parent_id(priv->netdev, &ppid, false); if (!err) { - memcpy(&attr.key.val, &ppid.id, sizeof(attr.key.val)); + memcpy(&attr.key.buf, &ppid.id, ppid.id_len); attr.flags = MLX5_DEVCOM_MATCH_FLAGS_NS; attr.net = mlx5_core_net(esw->dev); mlx5_esw_offloads_devcom_init(esw, &attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index cf88a106d80d7..89a58dee50b3a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -7,11 +7,7 @@ static void mlx5_esw_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_id *ppid) { - u64 parent_id; - - parent_id = mlx5_query_nic_system_image_guid(dev); - ppid->id_len = sizeof(parent_id); - memcpy(ppid->id, &parent_id, sizeof(parent_id)); + mlx5_query_nic_sw_system_image_guid(dev, ppid->id, &ppid->id_len); } static bool mlx5_esw_devlink_port_supported(struct mlx5_eswitch *esw, u16 vport_num) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 34749814f19b8..9735a75732cf5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3557,10 +3557,11 @@ bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 cont int esw_offloads_enable(struct mlx5_eswitch *esw) { + u8 mapping_id[MLX5_SW_IMAGE_GUID_MAX_BYTES]; struct mapping_ctx *reg_c0_obj_pool; struct mlx5_vport *vport; unsigned long i; - u64 mapping_id; + u8 id_len; int err; mutex_init(&esw->offloads.termtbl_mutex); @@ -3582,9 +3583,10 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) if (err) goto err_vport_metadata; - mapping_id = mlx5_query_nic_system_image_guid(esw->dev); + mlx5_query_nic_sw_system_image_guid(esw->dev, mapping_id, &id_len); - reg_c0_obj_pool = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN, + reg_c0_obj_pool = mapping_create_for_id(mapping_id, id_len, + MAPPING_TYPE_CHAIN, sizeof(struct mlx5_mapped_obj), ESW_REG_C0_USER_DATA_METADATA_MASK, true); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 3db0387bf6dcb..1ac933cd8f02b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -1418,10 +1418,12 @@ static void mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev *dev) static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev) { struct mlx5_devcom_match_attr attr = { - .key.val = mlx5_query_nic_system_image_guid(dev), .flags = MLX5_DEVCOM_MATCH_FLAGS_NS, .net = mlx5_core_net(dev), }; + u8 len __always_unused; + + mlx5_query_nic_sw_system_image_guid(dev, attr.key.buf, &len); /* This component is use to sync adding core_dev to lag_dev and to sync * changes of mlx5_adev_devices between LAG layer and other layers. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h index 609c85f479173..91e5ae529d5c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -10,8 +10,10 @@ enum mlx5_devom_match_flags { MLX5_DEVCOM_MATCH_FLAGS_NS = BIT(0), }; +#define MLX5_DEVCOM_MATCH_KEY_MAX 32 union mlx5_devcom_match_key { u64 val; + u8 buf[MLX5_DEVCOM_MATCH_KEY_MAX]; }; struct mlx5_devcom_match_attr { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 082259b56816c..acef7d0ffa097 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -444,6 +444,8 @@ int mlx5_init_one_light(struct mlx5_core_dev *dev); void mlx5_uninit_one_light(struct mlx5_core_dev *dev); void mlx5_unload_one_light(struct mlx5_core_dev *dev); +void mlx5_query_nic_sw_system_image_guid(struct mlx5_core_dev *mdev, u8 *buf, + u8 *len); int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 vport, u16 opmod); #define mlx5_vport_get_other_func_general_cap(dev, vport, out) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 2ed2e530b07d0..4224e27508654 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -1190,6 +1190,21 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev) } EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); +void mlx5_query_nic_sw_system_image_guid(struct mlx5_core_dev *mdev, u8 *buf, + u8 *len) +{ + u64 fw_system_image_guid; + + *len = 0; + + fw_system_image_guid = mlx5_query_nic_system_image_guid(mdev); + if (!fw_system_image_guid) + return; + + memcpy(buf, &fw_system_image_guid, sizeof(fw_system_image_guid)); + *len += sizeof(fw_system_image_guid); +} + static bool mlx5_vport_use_vhca_id_as_func_id(struct mlx5_core_dev *dev, u16 vport_num, u16 *vhca_id) { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5405ca1038f9e..dcf262aa9ea6b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1379,4 +1379,7 @@ static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev) { return devlink_net(priv_to_devlink(dev)); } + +#define MLX5_SW_IMAGE_GUID_MAX_BYTES 8 + #endif /* MLX5_DRIVER_H */ From cd36818c34ac5ff7f6a50ce88822c7bbb5ac9e0d Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 23 Oct 2025 12:16:58 +0300 Subject: [PATCH 296/867] net/mlx5: Refactor PTP clock devcom pairing Refactor PTP clock device component pairing to use the clock identity buffer instead of casting it to a u64 key. This change leverages the new software system image GUID infrastructure. Changes include: - Pass identity buffer to mlx5_shared_clock_register(). - Use memcpy for identity buffer in devcom matching attributes. - Remove intermediate u64 key conversion. - Add BUILD_BUG_ON to ensure identity size fits in match key. Signed-off-by: Mark Bloch Reviewed-by: Shay Drori Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761211020-925651-4-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- .../ethernet/mellanox/mlx5/core/lib/clock.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 29e7fa09c32c8..0ba0ef8bae421 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -1432,15 +1432,17 @@ static int mlx5_clock_alloc(struct mlx5_core_dev *mdev, bool shared) return 0; } -static void mlx5_shared_clock_register(struct mlx5_core_dev *mdev, u64 key) +static void mlx5_shared_clock_register(struct mlx5_core_dev *mdev, + u8 identity[MLX5_RT_CLOCK_IDENTITY_SIZE]) { struct mlx5_core_dev *peer_dev, *next = NULL; - struct mlx5_devcom_match_attr attr = { - .key.val = key, - }; + struct mlx5_devcom_match_attr attr = {}; struct mlx5_devcom_comp_dev *compd; struct mlx5_devcom_comp_dev *pos; + BUILD_BUG_ON(MLX5_RT_CLOCK_IDENTITY_SIZE > MLX5_DEVCOM_MATCH_KEY_MAX); + memcpy(attr.key.buf, identity, MLX5_RT_CLOCK_IDENTITY_SIZE); + compd = mlx5_devcom_register_component(mdev->priv.devc, MLX5_DEVCOM_SHARED_CLOCK, &attr, NULL, mdev); @@ -1594,7 +1596,6 @@ int mlx5_init_clock(struct mlx5_core_dev *mdev) { u8 identity[MLX5_RT_CLOCK_IDENTITY_SIZE]; struct mlx5_clock_dev_state *clock_state; - u64 key; int err; if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) { @@ -1610,12 +1611,10 @@ int mlx5_init_clock(struct mlx5_core_dev *mdev) mdev->clock_state = clock_state; if (MLX5_CAP_MCAM_REG3(mdev, mrtcq) && mlx5_real_time_mode(mdev)) { - if (mlx5_clock_identity_get(mdev, identity)) { + if (mlx5_clock_identity_get(mdev, identity)) mlx5_core_warn(mdev, "failed to get rt clock identity, create ptp dev per function\n"); - } else { - memcpy(&key, &identity, sizeof(key)); - mlx5_shared_clock_register(mdev, key); - } + else + mlx5_shared_clock_register(mdev, identity); } if (!mdev->clock) { From 075e85a1261e4653c2068e68a8c91da6c7bc4e60 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 23 Oct 2025 12:16:59 +0300 Subject: [PATCH 297/867] net/mlx5: Refactor HCA cap 2 setting Refactor HCA capability 2 setting logic to be more structured and conditional. Move the sw_vhca_id_valid setting inside proper conditional checks and prepare the function for additional capability settings. The refactoring: - Always copy current capabilities to set_hca_cap buffer. - Apply sw_vhca_id_valid setting only when conditions are met. - Improve code readability and maintainability. This cleanup prepares the handle_hca_cap_2() function for the upcoming balance ID capability setting. Signed-off-by: Mark Bloch Reviewed-by: Moshe Shemesh Reviewed-by: Shay Drori Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761211020-925651-5-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 70c156591b0ba..563267acf386f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -553,6 +553,7 @@ EXPORT_SYMBOL(mlx5_is_roce_on); static int handle_hca_cap_2(struct mlx5_core_dev *dev, void *set_ctx) { + bool do_set = false; void *set_hca_cap; int err; @@ -563,17 +564,22 @@ static int handle_hca_cap_2(struct mlx5_core_dev *dev, void *set_ctx) if (err) return err; - if (!MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) || - !(dev->priv.sw_vhca_id > 0)) - return 0; - set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL_2]->cur, MLX5_ST_SZ_BYTES(cmd_hca_cap_2)); - MLX5_SET(cmd_hca_cap_2, set_hca_cap, sw_vhca_id_valid, 1); - return set_caps(dev, set_ctx, MLX5_CAP_GENERAL_2); + if (MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) && + dev->priv.sw_vhca_id > 0) { + MLX5_SET(cmd_hca_cap_2, set_hca_cap, sw_vhca_id_valid, 1); + do_set = true; + } + + /* some FW versions that support querying MLX5_CAP_GENERAL_2 + * capabilities but don't support setting them. + * Skip unnecessary update to hca_cap_2 when no changes were introduced + */ + return do_set ? set_caps(dev, set_ctx, MLX5_CAP_GENERAL_2) : 0; } static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) From 20d78ead947783b039b02ca4b8c551b4d1894759 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 23 Oct 2025 12:17:00 +0300 Subject: [PATCH 298/867] net/mlx5: Add balance ID support for LAG multiplane groups Implement balance ID support for multiplane LAG configurations. This feature enables per-multiplane group load balancing by extending the software system image GUID with a balance ID component. Key implementations: - Enable lag_per_mp_group capability when supported by hardware. - Append load_balance_id to software system image GUID when conditions are met. - Increase MLX5_SW_IMAGE_GUID_MAX_BYTES from 8 to 9 to accommodate the extra byte. The balance ID is appended to the system image GUID only when both load_balance_id and lag_per_mp_group capabilities are available, ensuring backward compatibility while enabling enhanced LAG functionality. This enhancement allows for more granular load balancing control in complex multi-plane LAG deployments, improving network performance and flexibility. Signed-off-by: Mark Bloch Reviewed-by: Moshe Shemesh Reviewed-by: Shay Drori Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761211020-925651-6-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 +++++ drivers/net/ethernet/mellanox/mlx5/core/vport.c | 4 ++++ include/linux/mlx5/driver.h | 2 +- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 563267acf386f..c904696cbc3a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -575,6 +575,11 @@ static int handle_hca_cap_2(struct mlx5_core_dev *dev, void *set_ctx) do_set = true; } + if (MLX5_CAP_GEN_2_MAX(dev, lag_per_mp_group)) { + MLX5_SET(cmd_hca_cap_2, set_hca_cap, lag_per_mp_group, 1); + do_set = true; + } + /* some FW versions that support querying MLX5_CAP_GENERAL_2 * capabilities but don't support setting them. * Skip unnecessary update to hca_cap_2 when no changes were introduced diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 4224e27508654..992873536c1b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -1203,6 +1203,10 @@ void mlx5_query_nic_sw_system_image_guid(struct mlx5_core_dev *mdev, u8 *buf, memcpy(buf, &fw_system_image_guid, sizeof(fw_system_image_guid)); *len += sizeof(fw_system_image_guid); + + if (MLX5_CAP_GEN_2(mdev, load_balance_id) && + MLX5_CAP_GEN_2(mdev, lag_per_mp_group)) + buf[(*len)++] = MLX5_CAP_GEN_2(mdev, load_balance_id); } static bool mlx5_vport_use_vhca_id_as_func_id(struct mlx5_core_dev *dev, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index dcf262aa9ea6b..046396269ccf6 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1380,6 +1380,6 @@ static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev) return devlink_net(priv_to_devlink(dev)); } -#define MLX5_SW_IMAGE_GUID_MAX_BYTES 8 +#define MLX5_SW_IMAGE_GUID_MAX_BYTES 9 #endif /* MLX5_DRIVER_H */ From 182663bbff784e88cf7acbae7c53c6c5e0cb9ca4 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Thu, 23 Oct 2025 17:06:35 +0200 Subject: [PATCH 299/867] dibs: Remove reset of static vars in dibs_init() 'clients' and 'max_client' are static variables and therefore don't need to be initialized. Reported-by: Mete Durlu Signed-off-by: Alexandra Winter Reviewed-by: Alexander Lobakin Reviewed-by: Dust Li Link: https://patch.msgid.link/20251023150636.3995476-1-wintera@linux.ibm.com Signed-off-by: Paolo Abeni --- drivers/dibs/dibs_main.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/dibs/dibs_main.c b/drivers/dibs/dibs_main.c index 0374f8350ff7f..b015578b4d2e3 100644 --- a/drivers/dibs/dibs_main.c +++ b/drivers/dibs/dibs_main.c @@ -254,9 +254,6 @@ static int __init dibs_init(void) { int rc; - memset(clients, 0, sizeof(clients)); - max_client = 0; - dibs_class = class_create("dibs"); if (IS_ERR(dibs_class)) return PTR_ERR(dibs_class); From 968822086b74dd0a3df693f9d179bd4fe508faf9 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Thu, 23 Oct 2025 17:06:36 +0200 Subject: [PATCH 300/867] dibs: Use subsys_initcall() In the case of built-in modules, the order of module_init() calls are derived from the Makefiles. Use subsys_initcall() for the dibs module, to make sure dibs_init() is executed before dibs clients like smc and dibs devices like ism are initialized. So future dibs client or dibs device modules can use module_init() without the risk of getting the order in the Makefiles wrong. Reported-by: Mete Durlu Signed-off-by: Alexandra Winter Reviewed-by: Alexander Lobakin Link: https://patch.msgid.link/20251023150636.3995476-2-wintera@linux.ibm.com Signed-off-by: Paolo Abeni --- drivers/dibs/dibs_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dibs/dibs_main.c b/drivers/dibs/dibs_main.c index b015578b4d2e3..dac14d843af73 100644 --- a/drivers/dibs/dibs_main.c +++ b/drivers/dibs/dibs_main.c @@ -271,5 +271,5 @@ static void __exit dibs_exit(void) class_destroy(dibs_class); } -module_init(dibs_init); +subsys_initcall(dibs_init); module_exit(dibs_exit); From 7ceba45a6658ce637da334cd0ebf27f4ede6c0fe Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Tue, 28 Oct 2025 12:58:37 +0200 Subject: [PATCH 301/867] wifi: cfg80211: add an hrtimer based delayed work item The normal timer mechanism assume that timeout further in the future need a lower accuracy. As an example, the granularity for a timer scheduled 4096 ms in the future on a 1000 Hz system is already 512 ms. This granularity is perfectly sufficient for e.g. timeouts, but there are other types of events that will happen at a future point in time and require a higher accuracy. Add a new wiphy_hrtimer_work type that uses an hrtimer internally. The API is almost identical to the existing wiphy_delayed_work and it can be used as a drop-in replacement after minor adjustments. The work will be scheduled relative to the current time with a slack of 1 millisecond. CC: stable@vger.kernel.org # 6.4+ Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251028125710.7f13a2adc5eb.I01b5af0363869864b0580d9c2a1770bafab69566@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 78 ++++++++++++++++++++++++++++++++++++++++++ net/wireless/core.c | 56 ++++++++++++++++++++++++++++++ net/wireless/trace.h | 21 ++++++++++++ 3 files changed, 155 insertions(+) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 781624f5913af..820e299f06b5e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6435,6 +6435,11 @@ static inline void wiphy_delayed_work_init(struct wiphy_delayed_work *dwork, * after wiphy_lock() was called. Therefore, wiphy_cancel_work() can * use just cancel_work() instead of cancel_work_sync(), it requires * being in a section protected by wiphy_lock(). + * + * Note that these are scheduled with a timer where the accuracy + * becomes less the longer in the future the scheduled timer is. Use + * wiphy_hrtimer_work_queue() if the timer must be not be late by more + * than approximately 10 percent. */ void wiphy_delayed_work_queue(struct wiphy *wiphy, struct wiphy_delayed_work *dwork, @@ -6506,6 +6511,79 @@ void wiphy_delayed_work_flush(struct wiphy *wiphy, bool wiphy_delayed_work_pending(struct wiphy *wiphy, struct wiphy_delayed_work *dwork); +struct wiphy_hrtimer_work { + struct wiphy_work work; + struct wiphy *wiphy; + struct hrtimer timer; +}; + +enum hrtimer_restart wiphy_hrtimer_work_timer(struct hrtimer *t); + +static inline void wiphy_hrtimer_work_init(struct wiphy_hrtimer_work *hrwork, + wiphy_work_func_t func) +{ + hrtimer_setup(&hrwork->timer, wiphy_hrtimer_work_timer, + CLOCK_BOOTTIME, HRTIMER_MODE_REL); + wiphy_work_init(&hrwork->work, func); +} + +/** + * wiphy_hrtimer_work_queue - queue hrtimer work for the wiphy + * @wiphy: the wiphy to queue for + * @hrwork: the high resolution timer worker + * @delay: the delay given as a ktime_t + * + * Please refer to wiphy_delayed_work_queue(). The difference is that + * the hrtimer work uses a high resolution timer for scheduling. This + * may be needed if timeouts might be scheduled further in the future + * and the accuracy of the normal timer is not sufficient. + * + * Expect a delay of a few milliseconds as the timer is scheduled + * with some slack and some more time may pass between queueing the + * work and its start. + */ +void wiphy_hrtimer_work_queue(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork, + ktime_t delay); + +/** + * wiphy_hrtimer_work_cancel - cancel previously queued hrtimer work + * @wiphy: the wiphy, for debug purposes + * @hrtimer: the hrtimer work to cancel + * + * Cancel the work *without* waiting for it, this assumes being + * called under the wiphy mutex acquired by wiphy_lock(). + */ +void wiphy_hrtimer_work_cancel(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrtimer); + +/** + * wiphy_hrtimer_work_flush - flush previously queued hrtimer work + * @wiphy: the wiphy, for debug purposes + * @hrwork: the hrtimer work to flush + * + * Flush the work (i.e. run it if pending). This must be called + * under the wiphy mutex acquired by wiphy_lock(). + */ +void wiphy_hrtimer_work_flush(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork); + +/** + * wiphy_hrtimer_work_pending - Find out whether a wiphy hrtimer + * work item is currently pending. + * + * @wiphy: the wiphy, for debug purposes + * @hrwork: the hrtimer work in question + * + * Return: true if timer is pending, false otherwise + * + * Please refer to the wiphy_delayed_work_pending() documentation as + * this is the equivalent function for hrtimer based delayed work + * items. + */ +bool wiphy_hrtimer_work_pending(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork); + /** * enum ieee80211_ap_reg_power - regulatory power for an Access Point * diff --git a/net/wireless/core.c b/net/wireless/core.c index 797f9f2004a69..54a34d8d356e0 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1787,6 +1787,62 @@ bool wiphy_delayed_work_pending(struct wiphy *wiphy, } EXPORT_SYMBOL_GPL(wiphy_delayed_work_pending); +enum hrtimer_restart wiphy_hrtimer_work_timer(struct hrtimer *t) +{ + struct wiphy_hrtimer_work *hrwork = + container_of(t, struct wiphy_hrtimer_work, timer); + + wiphy_work_queue(hrwork->wiphy, &hrwork->work); + + return HRTIMER_NORESTART; +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_timer); + +void wiphy_hrtimer_work_queue(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork, + ktime_t delay) +{ + trace_wiphy_hrtimer_work_queue(wiphy, &hrwork->work, delay); + + if (!delay) { + hrtimer_cancel(&hrwork->timer); + wiphy_work_queue(wiphy, &hrwork->work); + return; + } + + hrwork->wiphy = wiphy; + hrtimer_start_range_ns(&hrwork->timer, delay, + 1000 * NSEC_PER_USEC, HRTIMER_MODE_REL); +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_queue); + +void wiphy_hrtimer_work_cancel(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork) +{ + lockdep_assert_held(&wiphy->mtx); + + hrtimer_cancel(&hrwork->timer); + wiphy_work_cancel(wiphy, &hrwork->work); +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_cancel); + +void wiphy_hrtimer_work_flush(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork) +{ + lockdep_assert_held(&wiphy->mtx); + + hrtimer_cancel(&hrwork->timer); + wiphy_work_flush(wiphy, &hrwork->work); +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_flush); + +bool wiphy_hrtimer_work_pending(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork) +{ + return hrtimer_is_queued(&hrwork->timer); +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_pending); + static int __init cfg80211_init(void) { int err; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 8a4c34112eb5f..2b71f1d867a08 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -304,6 +304,27 @@ TRACE_EVENT(wiphy_delayed_work_queue, __entry->delay) ); +TRACE_EVENT(wiphy_hrtimer_work_queue, + TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work, + ktime_t delay), + TP_ARGS(wiphy, work, delay), + TP_STRUCT__entry( + WIPHY_ENTRY + __field(void *, instance) + __field(void *, func) + __field(ktime_t, delay) + ), + TP_fast_assign( + WIPHY_ASSIGN; + __entry->instance = work; + __entry->func = work->func; + __entry->delay = delay; + ), + TP_printk(WIPHY_PR_FMT " instance=%p func=%pS delay=%llu", + WIPHY_PR_ARG, __entry->instance, __entry->func, + __entry->delay) +); + TRACE_EVENT(wiphy_work_worker_start, TP_PROTO(struct wiphy *wiphy), TP_ARGS(wiphy), From dfa865d490b1bd252045463588a91a4d3c82f3c8 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Tue, 28 Oct 2025 12:58:38 +0200 Subject: [PATCH 302/867] wifi: mac80211: use wiphy_hrtimer_work for ttlm_work The work item may be scheduled relatively far in the future. As the event happens at a specific point in time, the normal timer accuracy is not sufficient in that case. Switch to use wiphy_hrtimer_work so that the accuracy is sufficient. CC: stable@vger.kernel.org Fixes: 702e80470a33 ("wifi: mac80211: support handling of advertised TID-to-link mapping") Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251028125710.83c2c611545e.I35498a6d883ea24b0dc4910cf521aa768d2a0e90@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/mlme.c | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 73fd86ec1bce4..eb22279c6e011 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -616,7 +616,7 @@ struct ieee80211_if_managed { u16 removed_links; /* TID-to-link mapping support */ - struct wiphy_delayed_work ttlm_work; + struct wiphy_hrtimer_work ttlm_work; struct ieee80211_adv_ttlm_info ttlm_info; struct wiphy_work teardown_ttlm_work; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 3b5827ea438ee..623a46b3214eb 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -45,7 +45,7 @@ #define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10) #define IEEE80211_ASSOC_MAX_TRIES 3 -#define IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS msecs_to_jiffies(100) +#define IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS (100 * USEC_PER_MSEC) #define IEEE80211_ADV_TTLM_ST_UNDERFLOW 0xff00 #define IEEE80211_NEG_TTLM_REQ_TIMEOUT (HZ / 5) @@ -4242,7 +4242,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, memset(&sdata->u.mgd.ttlm_info, 0, sizeof(sdata->u.mgd.ttlm_info)); - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work); + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work); memset(&sdata->vif.neg_ttlm, 0, sizeof(sdata->vif.neg_ttlm)); wiphy_delayed_work_cancel(sdata->local->hw.wiphy, @@ -7095,7 +7095,7 @@ static void ieee80211_process_adv_ttlm(struct ieee80211_sub_if_data *sdata, /* if a planned TID-to-link mapping was cancelled - * abort it */ - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &sdata->u.mgd.ttlm_work); } else if (sdata->u.mgd.ttlm_info.active) { /* if no TID-to-link element, set to default mapping in @@ -7130,7 +7130,7 @@ static void ieee80211_process_adv_ttlm(struct ieee80211_sub_if_data *sdata, if (ttlm_info.switch_time) { u16 beacon_ts_tu, st_tu, delay; - u32 delay_jiffies; + u64 delay_usec; u64 mask; /* The t2l map switch time is indicated with a partial @@ -7152,23 +7152,23 @@ static void ieee80211_process_adv_ttlm(struct ieee80211_sub_if_data *sdata, if (delay > IEEE80211_ADV_TTLM_ST_UNDERFLOW) return; - delay_jiffies = TU_TO_JIFFIES(delay); + delay_usec = ieee80211_tu_to_usec(delay); /* Link switching can take time, so schedule it * 100ms before to be ready on time */ - if (delay_jiffies > IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS) - delay_jiffies -= + if (delay_usec > IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS) + delay_usec -= IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS; else - delay_jiffies = 0; + delay_usec = 0; sdata->u.mgd.ttlm_info = ttlm_info; - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &sdata->u.mgd.ttlm_work); - wiphy_delayed_work_queue(sdata->local->hw.wiphy, + wiphy_hrtimer_work_queue(sdata->local->hw.wiphy, &sdata->u.mgd.ttlm_work, - delay_jiffies); + us_to_ktime(delay_usec)); return; } } @@ -8802,7 +8802,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) timer_setup(&ifmgd->conn_mon_timer, ieee80211_sta_conn_mon_timer, 0); wiphy_delayed_work_init(&ifmgd->tx_tspec_wk, ieee80211_sta_handle_tspec_ac_params_wk); - wiphy_delayed_work_init(&ifmgd->ttlm_work, + wiphy_hrtimer_work_init(&ifmgd->ttlm_work, ieee80211_tid_to_link_map_work); wiphy_delayed_work_init(&ifmgd->neg_ttlm_timeout_work, ieee80211_neg_ttlm_timeout_work); From 3f654d53dff565095d83a84e3b6187526dadf4c8 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Tue, 28 Oct 2025 12:58:39 +0200 Subject: [PATCH 303/867] wifi: mac80211: use wiphy_hrtimer_work for ml_reconf_work The work item may be scheduled relatively far in the future. As the event happens at a specific point in time, the normal timer accuracy is not sufficient in that case. Switch to use wiphy_hrtimer_work so that the accuracy is sufficient. CC: stable@vger.kernel.org Fixes: 8eb8dd2ffbbb ("wifi: mac80211: Support link removal using Reconfiguration ML element") Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251028125710.24a7b54e9e37.I063c5c15bf7672f94cea75f83e486a3ca52d098f@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/mlme.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index eb22279c6e011..eb38049b2252d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -612,7 +612,7 @@ struct ieee80211_if_managed { u8 *assoc_req_ies; size_t assoc_req_ies_len; - struct wiphy_delayed_work ml_reconf_work; + struct wiphy_hrtimer_work ml_reconf_work; u16 removed_links; /* TID-to-link mapping support */ diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 623a46b3214eb..f95bcf84ecc2b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4249,7 +4249,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, &ifmgd->neg_ttlm_timeout_work); sdata->u.mgd.removed_links = 0; - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &sdata->u.mgd.ml_reconf_work); wiphy_work_cancel(sdata->local->hw.wiphy, @@ -6876,7 +6876,7 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, /* In case the removal was cancelled, abort it */ if (sdata->u.mgd.removed_links) { sdata->u.mgd.removed_links = 0; - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &sdata->u.mgd.ml_reconf_work); } return; @@ -6906,9 +6906,9 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, } sdata->u.mgd.removed_links = removed_links; - wiphy_delayed_work_queue(sdata->local->hw.wiphy, + wiphy_hrtimer_work_queue(sdata->local->hw.wiphy, &sdata->u.mgd.ml_reconf_work, - TU_TO_JIFFIES(delay)); + us_to_ktime(ieee80211_tu_to_usec(delay))); } static int ieee80211_ttlm_set_links(struct ieee80211_sub_if_data *sdata, @@ -8793,7 +8793,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) ieee80211_csa_connection_drop_work); wiphy_delayed_work_init(&ifmgd->tdls_peer_del_work, ieee80211_tdls_peer_del_work); - wiphy_delayed_work_init(&ifmgd->ml_reconf_work, + wiphy_hrtimer_work_init(&ifmgd->ml_reconf_work, ieee80211_ml_reconf_work); wiphy_delayed_work_init(&ifmgd->reconf.wk, ieee80211_ml_sta_reconf_timeout); From fbc1cc6973099f45e4c30b86f12b4435c7cb7d24 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Tue, 28 Oct 2025 12:58:40 +0200 Subject: [PATCH 304/867] wifi: mac80211: use wiphy_hrtimer_work for csa.switch_work The work item may be scheduled relatively far in the future. As the event happens at a specific point in time, the normal timer accuracy is not sufficient in that case. Switch to use wiphy_hrtimer_work so that the accuracy is sufficient. To make this work, use the same clock to store the timestamp. CC: stable@vger.kernel.org Fixes: ec3252bff7b6 ("wifi: mac80211: use wiphy work for channel switch") Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251028125710.68258c7e4ac4.I4ff2b2cdffbbf858bf5f08baccc7a88c4f9efe6f@changeid Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 2 +- net/mac80211/ieee80211_i.h | 4 ++-- net/mac80211/link.c | 4 ++-- net/mac80211/mlme.c | 18 +++++++++--------- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 57065714cf8ce..7f8799fd673eb 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1290,7 +1290,7 @@ ieee80211_link_chanctx_reservation_complete(struct ieee80211_link_data *link) &link->csa.finalize_work); break; case NL80211_IFTYPE_STATION: - wiphy_delayed_work_queue(sdata->local->hw.wiphy, + wiphy_hrtimer_work_queue(sdata->local->hw.wiphy, &link->u.mgd.csa.switch_work, 0); break; case NL80211_IFTYPE_UNSPECIFIED: diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index eb38049b2252d..878c3b14aeb80 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1017,10 +1017,10 @@ struct ieee80211_link_data_managed { bool operating_11g_mode; struct { - struct wiphy_delayed_work switch_work; + struct wiphy_hrtimer_work switch_work; struct cfg80211_chan_def ap_chandef; struct ieee80211_parsed_tpe tpe; - unsigned long time; + ktime_t time; bool waiting_bcn; bool ignored_same_chan; bool blocked_tx; diff --git a/net/mac80211/link.c b/net/mac80211/link.c index d71eabe5abf8d..4a19b765ccb69 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -472,10 +472,10 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, * from there. */ if (link->conf->csa_active) - wiphy_delayed_work_queue(local->hw.wiphy, + wiphy_hrtimer_work_queue(local->hw.wiphy, &link->u.mgd.csa.switch_work, link->u.mgd.csa.time - - jiffies); + ktime_get_boottime()); } for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f95bcf84ecc2b..f3138d1585353 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2594,7 +2594,7 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success, return; } - wiphy_delayed_work_queue(sdata->local->hw.wiphy, + wiphy_hrtimer_work_queue(sdata->local->hw.wiphy, &link->u.mgd.csa.switch_work, 0); } @@ -2753,7 +2753,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, .timestamp = timestamp, .device_timestamp = device_timestamp, }; - unsigned long now; + u32 csa_time_tu; + ktime_t now; int res; lockdep_assert_wiphy(local->hw.wiphy); @@ -2983,10 +2984,9 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, csa_ie.mode); /* we may have to handle timeout for deactivated link in software */ - now = jiffies; - link->u.mgd.csa.time = now + - TU_TO_JIFFIES((max_t(int, csa_ie.count, 1) - 1) * - link->conf->beacon_int); + now = ktime_get_boottime(); + csa_time_tu = (max_t(int, csa_ie.count, 1) - 1) * link->conf->beacon_int; + link->u.mgd.csa.time = now + us_to_ktime(ieee80211_tu_to_usec(csa_time_tu)); if (ieee80211_vif_link_active(&sdata->vif, link->link_id) && local->ops->channel_switch) { @@ -3001,7 +3001,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, } /* channel switch handled in software */ - wiphy_delayed_work_queue(local->hw.wiphy, + wiphy_hrtimer_work_queue(local->hw.wiphy, &link->u.mgd.csa.switch_work, link->u.mgd.csa.time - now); return; @@ -8849,7 +8849,7 @@ void ieee80211_mgd_setup_link(struct ieee80211_link_data *link) else link->u.mgd.req_smps = IEEE80211_SMPS_OFF; - wiphy_delayed_work_init(&link->u.mgd.csa.switch_work, + wiphy_hrtimer_work_init(&link->u.mgd.csa.switch_work, ieee80211_csa_switch_work); ieee80211_clear_tpe(&link->conf->tpe); @@ -10064,7 +10064,7 @@ void ieee80211_mgd_stop_link(struct ieee80211_link_data *link) &link->u.mgd.request_smps_work); wiphy_work_cancel(link->sdata->local->hw.wiphy, &link->u.mgd.recalc_smps); - wiphy_delayed_work_cancel(link->sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(link->sdata->local->hw.wiphy, &link->u.mgd.csa.switch_work); } From 37ff03b356ef9d2e960c07596e6fa9276198206d Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Mon, 15 Sep 2025 11:34:22 +0300 Subject: [PATCH 305/867] wifi: iwlwifi: align the name of iwl_alive_ntf_v6 to the convention This struct rperesent versions 6 and 7. The convention is to name an API struct with the last version it represent, so rename to iwl_alive_ntf_v7. Reviewed-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250915113137.81240e1d4df3.I2c1264a49b9f0fc160f960cf3c5dc4cedf6ceb6d@changeid --- drivers/net/wireless/intel/iwlwifi/fw/api/alive.h | 2 +- drivers/net/wireless/intel/iwlwifi/fw/api/commands.h | 2 +- drivers/net/wireless/intel/iwlwifi/mld/fw.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/alive.h b/drivers/net/wireless/intel/iwlwifi/fw/api/alive.h index ad5b95cad0bf7..ea2ba4b4cb7b0 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/alive.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/alive.h @@ -88,7 +88,7 @@ struct iwl_imr_alive_info { __le32 enabled; } __packed; /* IMR_ALIVE_INFO_API_S_VER_1 */ -struct iwl_alive_ntf_v6 { +struct iwl_alive_ntf_v7 { __le16 status; __le16 flags; struct iwl_lmac_alive lmac_data[2]; diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h index 997b0c9ce9840..8d64a271bb945 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h @@ -60,7 +60,7 @@ enum iwl_legacy_cmds { * @UCODE_ALIVE_NTFY: * Alive data from the firmware, as described in * &struct iwl_alive_ntf_v3 or &struct iwl_alive_ntf_v4 or - * &struct iwl_alive_ntf_v5 or &struct iwl_alive_ntf_v6. + * &struct iwl_alive_ntf_v5 or &struct iwl_alive_ntf_v7. */ UCODE_ALIVE_NTFY = 0x1, diff --git a/drivers/net/wireless/intel/iwlwifi/mld/fw.c b/drivers/net/wireless/intel/iwlwifi/mld/fw.c index b372173c4a795..bdb69c098fd14 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/fw.c @@ -126,7 +126,7 @@ static bool iwl_alive_fn(struct iwl_notif_wait_data *notif_wait, switch (version) { case 6: case 7: - expected_sz = sizeof(struct iwl_alive_ntf_v6); + expected_sz = sizeof(struct iwl_alive_ntf_v7); break; case 8: expected_sz = sizeof(struct iwl_alive_ntf); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 865f973f677db..6b76ce35443d2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -115,7 +115,7 @@ static bool iwl_alive_fn(struct iwl_notif_wait_data *notif_wait, if (version >= 6) { - struct iwl_alive_ntf_v6 *palive; + struct iwl_alive_ntf_v7 *palive; if (pkt_len < sizeof(*palive)) return false; From 9dc6e9dfdf9d9efe5afd2322e450b83a449993db Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Mon, 15 Sep 2025 11:34:23 +0300 Subject: [PATCH 306/867] wifi: iwlwifi: mld: remove support from of alive notif version 6 The last FW API that supports version 6 is 97. Since this API is no longer supported on any device that loads iwlmld, we can remove support of it. Reviewed-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250915113137.4923c981b0bf.Iff598c6d109fdbf0d5a5bab59d53485478ecc125@changeid --- drivers/net/wireless/intel/iwlwifi/mld/fw.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/fw.c b/drivers/net/wireless/intel/iwlwifi/mld/fw.c index bdb69c098fd14..b3abfa1ec8102 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/fw.c @@ -124,7 +124,6 @@ static bool iwl_alive_fn(struct iwl_notif_wait_data *notif_wait, u16 status; switch (version) { - case 6: case 7: expected_sz = sizeof(struct iwl_alive_ntf_v7); break; @@ -188,9 +187,8 @@ static bool iwl_alive_fn(struct iwl_notif_wait_data *notif_wait, le32_to_cpu(umac->umac_major), le32_to_cpu(umac->umac_minor)); - if (version >= 7) - IWL_DEBUG_FW(mld, "FW alive flags 0x%x\n", - le16_to_cpu(palive->flags)); + IWL_DEBUG_FW(mld, "FW alive flags 0x%x\n", + le16_to_cpu(palive->flags)); if (version >= 8) IWL_DEBUG_FW(mld, "platform_id 0x%llx\n", From 5c0251598f29646a19a7232c9437b6faa22ab4c2 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Mon, 15 Sep 2025 11:34:24 +0300 Subject: [PATCH 307/867] wifi: iwlwifi: mld: reschedule check_tpt_wk also not in EMLSR When the throughput count reaches the threshold, EMLSR is no longer blocked by throughput. This doesn't mean that EMLSR will be activated immediately, since there might be other reasons that block EMLSR. When the throughput blocker is not set, check_tpt_wk should run every 5 seconds and check if the throughput blocker should be set (if the throughtput counter dropped). If not, it should reschedule itself. In the current code, the worker will reschedule itself only if we are in EMLSR. This is wrong, since we might be in a case where the throughput blocker is not set but we are not in EMLSR, and then we will never check again the throughput counters (and block EMLSR if needed). Fix this by rescheduling the worker also when EMLSR is not active. Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250915113137.2a9cf2b2529d.I8284c0da9597e4c963e38ae133384f6f42044499@changeid --- drivers/net/wireless/intel/iwlwifi/mld/mlo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mlo.c b/drivers/net/wireless/intel/iwlwifi/mld/mlo.c index 241a6271d13d6..fa04fbe06656a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mlo.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mlo.c @@ -603,7 +603,7 @@ void iwl_mld_emlsr_check_tpt(struct wiphy *wiphy, struct wiphy_work *wk) /* EMLSR is not active */ if (sec_link_id == -1) - return; + goto schedule; IWL_DEBUG_INFO(mld, "Secondary Link %d: Tx MPDUs: %ld. Rx MPDUs: %ld\n", sec_link_id, sec_link_tx, sec_link_rx); @@ -625,6 +625,7 @@ void iwl_mld_emlsr_check_tpt(struct wiphy *wiphy, struct wiphy_work *wk) return; } +schedule: /* Check again when the next window ends */ wiphy_delayed_work_queue(mld_vif->mld->wiphy, &mld_vif->emlsr.check_tpt_wk, From eade5cacc95c3d8b2e88a694a6a0a42dc0dc1819 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 15 Sep 2025 11:34:25 +0300 Subject: [PATCH 308/867] wifi: iwlwifi: mvm: move rate conversions to utils.c These aren't really related to rate scaling, they're just firmware API functions. Try to keep rs.c more for scaling and move these out. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250915113137.547129c7732e.I12c40876537722680d069b4bb5fc058206ba63d4@changeid --- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 5 + drivers/net/wireless/intel/iwlwifi/mvm/rs.c | 164 ------------------ drivers/net/wireless/intel/iwlwifi/mvm/rs.h | 3 - .../net/wireless/intel/iwlwifi/mvm/utils.c | 164 ++++++++++++++++++ 4 files changed, 169 insertions(+), 167 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index b515028adc8f5..301d590fe0bd6 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -2894,4 +2894,9 @@ iwl_mvm_send_ap_tx_power_constraint_cmd(struct iwl_mvm *mvm, void iwl_mvm_smps_workaround(struct iwl_mvm *mvm, struct ieee80211_vif *vif, bool update); + +/* rate_n_flags conversion */ +u32 iwl_mvm_v3_rate_from_fw(__le32 rate, u8 rate_ver); +__le32 iwl_mvm_v3_rate_to_fw(u32 rate, u8 rate_ver); + #endif /* __IWL_MVM_H__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index 5802ed80a9ca9..d1619a229d8f2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -4178,167 +4178,3 @@ int iwl_mvm_tx_protection(struct iwl_mvm *mvm, struct iwl_mvm_sta *mvmsta, else return rs_drv_tx_protection(mvm, mvmsta, enable); } - -static u32 iwl_legacy_rate_to_fw_idx(u32 rate_n_flags) -{ - int rate = rate_n_flags & RATE_LEGACY_RATE_MSK_V1; - int idx; - bool ofdm = !(rate_n_flags & RATE_MCS_CCK_MSK_V1); - int offset = ofdm ? IWL_FIRST_OFDM_RATE : 0; - int last = ofdm ? IWL_RATE_COUNT_LEGACY : IWL_FIRST_OFDM_RATE; - - for (idx = offset; idx < last; idx++) - if (iwl_fw_rate_idx_to_plcp(idx) == rate) - return idx - offset; - return IWL_RATE_INVALID; -} - -u32 iwl_mvm_v3_rate_from_fw(__le32 rate, u8 rate_ver) -{ - u32 rate_v3 = 0, rate_v1; - u32 dup = 0; - - if (rate_ver > 1) - return iwl_v3_rate_from_v2_v3(rate, rate_ver >= 3); - - rate_v1 = le32_to_cpu(rate); - if (rate_v1 == 0) - return rate_v1; - /* convert rate */ - if (rate_v1 & RATE_MCS_HT_MSK_V1) { - u32 nss; - - rate_v3 |= RATE_MCS_MOD_TYPE_HT; - rate_v3 |= - rate_v1 & RATE_HT_MCS_RATE_CODE_MSK_V1; - nss = u32_get_bits(rate_v1, RATE_HT_MCS_MIMO2_MSK); - rate_v3 |= u32_encode_bits(nss, RATE_MCS_NSS_MSK); - } else if (rate_v1 & RATE_MCS_VHT_MSK_V1 || - rate_v1 & RATE_MCS_HE_MSK_V1) { - u32 nss = u32_get_bits(rate_v1, RATE_VHT_MCS_NSS_MSK); - - rate_v3 |= rate_v1 & RATE_VHT_MCS_RATE_CODE_MSK; - - rate_v3 |= u32_encode_bits(nss, RATE_MCS_NSS_MSK); - - if (rate_v1 & RATE_MCS_HE_MSK_V1) { - u32 he_type_bits = rate_v1 & RATE_MCS_HE_TYPE_MSK_V1; - u32 he_type = he_type_bits >> RATE_MCS_HE_TYPE_POS_V1; - u32 he_106t = (rate_v1 & RATE_MCS_HE_106T_MSK_V1) >> - RATE_MCS_HE_106T_POS_V1; - u32 he_gi_ltf = (rate_v1 & RATE_MCS_HE_GI_LTF_MSK_V1) >> - RATE_MCS_HE_GI_LTF_POS; - - if ((he_type_bits == RATE_MCS_HE_TYPE_SU || - he_type_bits == RATE_MCS_HE_TYPE_EXT_SU) && - he_gi_ltf == RATE_MCS_HE_SU_4_LTF) - /* the new rate have an additional bit to - * represent the value 4 rather then using SGI - * bit for this purpose - as it was done in the - * old rate - */ - he_gi_ltf += (rate_v1 & RATE_MCS_SGI_MSK_V1) >> - RATE_MCS_SGI_POS_V1; - - rate_v3 |= he_gi_ltf << RATE_MCS_HE_GI_LTF_POS; - rate_v3 |= he_type << RATE_MCS_HE_TYPE_POS; - rate_v3 |= he_106t << RATE_MCS_HE_106T_POS; - rate_v3 |= rate_v1 & RATE_HE_DUAL_CARRIER_MODE_MSK; - rate_v3 |= RATE_MCS_MOD_TYPE_HE; - } else { - rate_v3 |= RATE_MCS_MOD_TYPE_VHT; - } - /* if legacy format */ - } else { - u32 legacy_rate = iwl_legacy_rate_to_fw_idx(rate_v1); - - if (WARN_ON_ONCE(legacy_rate == IWL_RATE_INVALID)) - legacy_rate = (rate_v1 & RATE_MCS_CCK_MSK_V1) ? - IWL_FIRST_CCK_RATE : IWL_FIRST_OFDM_RATE; - - rate_v3 |= legacy_rate; - if (!(rate_v1 & RATE_MCS_CCK_MSK_V1)) - rate_v3 |= RATE_MCS_MOD_TYPE_LEGACY_OFDM; - } - - /* convert flags */ - if (rate_v1 & RATE_MCS_LDPC_MSK_V1) - rate_v3 |= RATE_MCS_LDPC_MSK; - rate_v3 |= (rate_v1 & RATE_MCS_CHAN_WIDTH_MSK_V1) | - (rate_v1 & RATE_MCS_ANT_AB_MSK) | - (rate_v1 & RATE_MCS_STBC_MSK) | - (rate_v1 & RATE_MCS_BF_MSK); - - dup = (rate_v1 & RATE_MCS_DUP_MSK_V1) >> RATE_MCS_DUP_POS_V1; - if (dup) { - rate_v3 |= RATE_MCS_DUP_MSK; - rate_v3 |= dup << RATE_MCS_CHAN_WIDTH_POS; - } - - if ((!(rate_v1 & RATE_MCS_HE_MSK_V1)) && - (rate_v1 & RATE_MCS_SGI_MSK_V1)) - rate_v3 |= RATE_MCS_SGI_MSK; - - return rate_v3; -} - -__le32 iwl_mvm_v3_rate_to_fw(u32 rate, u8 rate_ver) -{ - u32 result = 0; - int rate_idx; - - if (rate_ver > 1) - return iwl_v3_rate_to_v2_v3(rate, rate_ver > 2); - - switch (rate & RATE_MCS_MOD_TYPE_MSK) { - case RATE_MCS_MOD_TYPE_CCK: - result = RATE_MCS_CCK_MSK_V1; - fallthrough; - case RATE_MCS_MOD_TYPE_LEGACY_OFDM: - rate_idx = u32_get_bits(rate, RATE_LEGACY_RATE_MSK); - if (!(result & RATE_MCS_CCK_MSK_V1)) - rate_idx += IWL_FIRST_OFDM_RATE; - result |= u32_encode_bits(iwl_fw_rate_idx_to_plcp(rate_idx), - RATE_LEGACY_RATE_MSK_V1); - break; - case RATE_MCS_MOD_TYPE_HT: - result = RATE_MCS_HT_MSK_V1; - result |= u32_encode_bits(u32_get_bits(rate, - RATE_HT_MCS_CODE_MSK), - RATE_HT_MCS_RATE_CODE_MSK_V1); - result |= u32_encode_bits(u32_get_bits(rate, - RATE_MCS_NSS_MSK), - RATE_HT_MCS_MIMO2_MSK); - break; - case RATE_MCS_MOD_TYPE_VHT: - result = RATE_MCS_VHT_MSK_V1; - result |= u32_encode_bits(u32_get_bits(rate, - RATE_VHT_MCS_NSS_MSK), - RATE_MCS_CODE_MSK); - result |= u32_encode_bits(u32_get_bits(rate, RATE_MCS_NSS_MSK), - RATE_VHT_MCS_NSS_MSK); - break; - case RATE_MCS_MOD_TYPE_HE: /* not generated */ - default: - WARN_ONCE(1, "bad modulation type %d\n", - u32_get_bits(rate, RATE_MCS_MOD_TYPE_MSK)); - return 0; - } - - if (rate & RATE_MCS_LDPC_MSK) - result |= RATE_MCS_LDPC_MSK_V1; - WARN_ON_ONCE(u32_get_bits(rate, RATE_MCS_CHAN_WIDTH_MSK) > - RATE_MCS_CHAN_WIDTH_160_VAL); - result |= (rate & RATE_MCS_CHAN_WIDTH_MSK_V1) | - (rate & RATE_MCS_ANT_AB_MSK) | - (rate & RATE_MCS_STBC_MSK) | - (rate & RATE_MCS_BF_MSK); - - /* not handling DUP since we don't use it */ - WARN_ON_ONCE(rate & RATE_MCS_DUP_MSK); - - if (rate & RATE_MCS_SGI_MSK) - result |= RATE_MCS_SGI_MSK_V1; - - return cpu_to_le32(result); -} diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.h b/drivers/net/wireless/intel/iwlwifi/mvm/rs.h index dfb062b7c5c21..34c957bef6f8a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.h @@ -425,9 +425,6 @@ void iwl_mvm_rate_control_unregister(void); struct iwl_mvm_sta; -u32 iwl_mvm_v3_rate_from_fw(__le32 rate, u8 rate_ver); -__le32 iwl_mvm_v3_rate_to_fw(u32 rate, u8 rate_ver); - int iwl_mvm_tx_protection(struct iwl_mvm *mvm, struct iwl_mvm_sta *mvmsta, bool enable); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 22602c32faa5b..aa7af04d914eb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -1237,3 +1237,167 @@ bool iwl_mvm_vif_is_active(struct iwl_mvm_vif *mvmvif) return false; } + +static u32 iwl_legacy_rate_to_fw_idx(u32 rate_n_flags) +{ + int rate = rate_n_flags & RATE_LEGACY_RATE_MSK_V1; + int idx; + bool ofdm = !(rate_n_flags & RATE_MCS_CCK_MSK_V1); + int offset = ofdm ? IWL_FIRST_OFDM_RATE : 0; + int last = ofdm ? IWL_RATE_COUNT_LEGACY : IWL_FIRST_OFDM_RATE; + + for (idx = offset; idx < last; idx++) + if (iwl_fw_rate_idx_to_plcp(idx) == rate) + return idx - offset; + return IWL_RATE_INVALID; +} + +u32 iwl_mvm_v3_rate_from_fw(__le32 rate, u8 rate_ver) +{ + u32 rate_v3 = 0, rate_v1; + u32 dup = 0; + + if (rate_ver > 1) + return iwl_v3_rate_from_v2_v3(rate, rate_ver >= 3); + + rate_v1 = le32_to_cpu(rate); + if (rate_v1 == 0) + return rate_v1; + /* convert rate */ + if (rate_v1 & RATE_MCS_HT_MSK_V1) { + u32 nss; + + rate_v3 |= RATE_MCS_MOD_TYPE_HT; + rate_v3 |= + rate_v1 & RATE_HT_MCS_RATE_CODE_MSK_V1; + nss = u32_get_bits(rate_v1, RATE_HT_MCS_MIMO2_MSK); + rate_v3 |= u32_encode_bits(nss, RATE_MCS_NSS_MSK); + } else if (rate_v1 & RATE_MCS_VHT_MSK_V1 || + rate_v1 & RATE_MCS_HE_MSK_V1) { + u32 nss = u32_get_bits(rate_v1, RATE_VHT_MCS_NSS_MSK); + + rate_v3 |= rate_v1 & RATE_VHT_MCS_RATE_CODE_MSK; + + rate_v3 |= u32_encode_bits(nss, RATE_MCS_NSS_MSK); + + if (rate_v1 & RATE_MCS_HE_MSK_V1) { + u32 he_type_bits = rate_v1 & RATE_MCS_HE_TYPE_MSK_V1; + u32 he_type = he_type_bits >> RATE_MCS_HE_TYPE_POS_V1; + u32 he_106t = (rate_v1 & RATE_MCS_HE_106T_MSK_V1) >> + RATE_MCS_HE_106T_POS_V1; + u32 he_gi_ltf = (rate_v1 & RATE_MCS_HE_GI_LTF_MSK_V1) >> + RATE_MCS_HE_GI_LTF_POS; + + if ((he_type_bits == RATE_MCS_HE_TYPE_SU || + he_type_bits == RATE_MCS_HE_TYPE_EXT_SU) && + he_gi_ltf == RATE_MCS_HE_SU_4_LTF) + /* the new rate have an additional bit to + * represent the value 4 rather then using SGI + * bit for this purpose - as it was done in the + * old rate + */ + he_gi_ltf += (rate_v1 & RATE_MCS_SGI_MSK_V1) >> + RATE_MCS_SGI_POS_V1; + + rate_v3 |= he_gi_ltf << RATE_MCS_HE_GI_LTF_POS; + rate_v3 |= he_type << RATE_MCS_HE_TYPE_POS; + rate_v3 |= he_106t << RATE_MCS_HE_106T_POS; + rate_v3 |= rate_v1 & RATE_HE_DUAL_CARRIER_MODE_MSK; + rate_v3 |= RATE_MCS_MOD_TYPE_HE; + } else { + rate_v3 |= RATE_MCS_MOD_TYPE_VHT; + } + /* if legacy format */ + } else { + u32 legacy_rate = iwl_legacy_rate_to_fw_idx(rate_v1); + + if (WARN_ON_ONCE(legacy_rate == IWL_RATE_INVALID)) + legacy_rate = (rate_v1 & RATE_MCS_CCK_MSK_V1) ? + IWL_FIRST_CCK_RATE : IWL_FIRST_OFDM_RATE; + + rate_v3 |= legacy_rate; + if (!(rate_v1 & RATE_MCS_CCK_MSK_V1)) + rate_v3 |= RATE_MCS_MOD_TYPE_LEGACY_OFDM; + } + + /* convert flags */ + if (rate_v1 & RATE_MCS_LDPC_MSK_V1) + rate_v3 |= RATE_MCS_LDPC_MSK; + rate_v3 |= (rate_v1 & RATE_MCS_CHAN_WIDTH_MSK_V1) | + (rate_v1 & RATE_MCS_ANT_AB_MSK) | + (rate_v1 & RATE_MCS_STBC_MSK) | + (rate_v1 & RATE_MCS_BF_MSK); + + dup = (rate_v1 & RATE_MCS_DUP_MSK_V1) >> RATE_MCS_DUP_POS_V1; + if (dup) { + rate_v3 |= RATE_MCS_DUP_MSK; + rate_v3 |= dup << RATE_MCS_CHAN_WIDTH_POS; + } + + if ((!(rate_v1 & RATE_MCS_HE_MSK_V1)) && + (rate_v1 & RATE_MCS_SGI_MSK_V1)) + rate_v3 |= RATE_MCS_SGI_MSK; + + return rate_v3; +} + +__le32 iwl_mvm_v3_rate_to_fw(u32 rate, u8 rate_ver) +{ + u32 result = 0; + int rate_idx; + + if (rate_ver > 1) + return iwl_v3_rate_to_v2_v3(rate, rate_ver > 2); + + switch (rate & RATE_MCS_MOD_TYPE_MSK) { + case RATE_MCS_MOD_TYPE_CCK: + result = RATE_MCS_CCK_MSK_V1; + fallthrough; + case RATE_MCS_MOD_TYPE_LEGACY_OFDM: + rate_idx = u32_get_bits(rate, RATE_LEGACY_RATE_MSK); + if (!(result & RATE_MCS_CCK_MSK_V1)) + rate_idx += IWL_FIRST_OFDM_RATE; + result |= u32_encode_bits(iwl_fw_rate_idx_to_plcp(rate_idx), + RATE_LEGACY_RATE_MSK_V1); + break; + case RATE_MCS_MOD_TYPE_HT: + result = RATE_MCS_HT_MSK_V1; + result |= u32_encode_bits(u32_get_bits(rate, + RATE_HT_MCS_CODE_MSK), + RATE_HT_MCS_RATE_CODE_MSK_V1); + result |= u32_encode_bits(u32_get_bits(rate, + RATE_MCS_NSS_MSK), + RATE_HT_MCS_MIMO2_MSK); + break; + case RATE_MCS_MOD_TYPE_VHT: + result = RATE_MCS_VHT_MSK_V1; + result |= u32_encode_bits(u32_get_bits(rate, + RATE_VHT_MCS_NSS_MSK), + RATE_MCS_CODE_MSK); + result |= u32_encode_bits(u32_get_bits(rate, RATE_MCS_NSS_MSK), + RATE_VHT_MCS_NSS_MSK); + break; + case RATE_MCS_MOD_TYPE_HE: /* not generated */ + default: + WARN_ONCE(1, "bad modulation type %d\n", + u32_get_bits(rate, RATE_MCS_MOD_TYPE_MSK)); + return 0; + } + + if (rate & RATE_MCS_LDPC_MSK) + result |= RATE_MCS_LDPC_MSK_V1; + WARN_ON_ONCE(u32_get_bits(rate, RATE_MCS_CHAN_WIDTH_MSK) > + RATE_MCS_CHAN_WIDTH_160_VAL); + result |= (rate & RATE_MCS_CHAN_WIDTH_MSK_V1) | + (rate & RATE_MCS_ANT_AB_MSK) | + (rate & RATE_MCS_STBC_MSK) | + (rate & RATE_MCS_BF_MSK); + + /* not handling DUP since we don't use it */ + WARN_ON_ONCE(rate & RATE_MCS_DUP_MSK); + + if (rate & RATE_MCS_SGI_MSK) + result |= RATE_MCS_SGI_MSK_V1; + + return cpu_to_le32(result); +} From ba85816979360b78a40a3dd0cb8e6f57d6b22456 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Mon, 15 Sep 2025 11:34:26 +0300 Subject: [PATCH 309/867] wifi: iwlwifi: iwlmld is always used for wifi7 devices iwlmld is used since API 97 and for wifi7 devices. Since APIs < 97 are no longer supported on such devices, we can remove the API check and always load iwlmld for the wifi7 devices. Reviewed-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250915113137.45ab33fcdc00.Ia3a40b687b75c872cf7e7a19331a014bccf5f2d6@changeid --- drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index 607fcea6f4efc..7d58e294618d6 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -179,7 +179,8 @@ static inline char iwl_drv_get_step(int step) static bool iwl_drv_is_wifi7_supported(struct iwl_trans *trans) { - return CSR_HW_RFID_TYPE(trans->info.hw_rf_id) >= IWL_CFG_RF_TYPE_FM; + return trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_BZ && + CSR_HW_RFID_TYPE(trans->info.hw_rf_id) >= IWL_CFG_RF_TYPE_FM; } const char *iwl_drv_get_fwname_pre(struct iwl_trans *trans, char *buf) @@ -427,7 +428,6 @@ struct iwl_firmware_pieces { size_t dbg_trigger_tlv_len[FW_DBG_TRIGGER_MAX]; struct iwl_fw_dbg_mem_seg_tlv *dbg_mem_tlv; size_t n_mem_tlv; - u32 major; }; static void alloc_sec_data(struct iwl_firmware_pieces *pieces, @@ -1069,19 +1069,19 @@ static int iwl_parse_tlv_firmware(struct iwl_drv *drv, break; case IWL_UCODE_TLV_FW_VERSION: { const __le32 *ptr = (const void *)tlv_data; - u32 minor; + u32 major, minor; u8 local_comp; if (tlv_len != sizeof(u32) * 3) goto invalid_tlv_len; - pieces->major = le32_to_cpup(ptr++); + major = le32_to_cpup(ptr++); minor = le32_to_cpup(ptr++); local_comp = le32_to_cpup(ptr); snprintf(drv->fw.fw_version, sizeof(drv->fw.fw_version), - "%u.%08x.%u %s", pieces->major, minor, + "%u.%08x.%u %s", major, minor, local_comp, iwl_reduced_fw_name(drv)); break; } @@ -1589,8 +1589,6 @@ static void _iwl_op_mode_stop(struct iwl_drv *drv) } } -#define IWL_MLD_SUPPORTED_FW_VERSION 97 - /* * iwl_req_fw_callback - callback when firmware was loaded * @@ -1859,12 +1857,10 @@ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context) } #if IS_ENABLED(CONFIG_IWLMLD) - if (pieces->major >= IWL_MLD_SUPPORTED_FW_VERSION && - iwl_drv_is_wifi7_supported(drv->trans)) + if (iwl_drv_is_wifi7_supported(drv->trans)) op = &iwlwifi_opmode_table[MLD_OP_MODE]; #else - if (pieces->major >= IWL_MLD_SUPPORTED_FW_VERSION && - iwl_drv_is_wifi7_supported(drv->trans)) { + if (iwl_drv_is_wifi7_supported(drv->trans)) { IWL_ERR(drv, "IWLMLD needs to be compiled to support this firmware\n"); mutex_unlock(&iwlwifi_opmode_table_mtx); From 30d47d8fe781469ebd4e38240999767f139effb2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 15 Sep 2025 11:34:27 +0300 Subject: [PATCH 310/867] wifi: iwlwifi: cfg: add new device names Add a couple of device names so that these new devices will be shown correctly. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250915113137.1cbc0251532f.I6183a6a08a7998e598042a50c7d7a6b82f9fa58e@changeid --- drivers/net/wireless/intel/iwlwifi/cfg/rf-wh.c | 1 + drivers/net/wireless/intel/iwlwifi/iwl-config.h | 1 + drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 3 +++ 3 files changed, 5 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/rf-wh.c b/drivers/net/wireless/intel/iwlwifi/cfg/rf-wh.c index 97735175cb0ea..b8c6b06e70991 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/rf-wh.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/rf-wh.c @@ -13,3 +13,4 @@ const char iwl_killer_be1775i_name[] = const char iwl_be211_name[] = "Intel(R) Wi-Fi 7 BE211 320MHz"; const char iwl_be213_name[] = "Intel(R) Wi-Fi 7 BE213 160MHz"; +const char iwl_ax221_name[] = "Intel(R) Wi-Fi 6E AX221 160MHz"; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index a607e7ab914ba..cfd1629f00163 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -688,6 +688,7 @@ extern const char iwl_killer_bn1850i_name[]; extern const char iwl_bn201_name[]; extern const char iwl_be221_name[]; extern const char iwl_be223_name[]; +extern const char iwl_ax221_name[]; #if IS_ENABLED(CONFIG_IWLDVM) extern const struct iwl_rf_cfg iwl5300_agn_cfg; extern const struct iwl_rf_cfg iwl5350_agn_cfg; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index b21a4d8eb1051..de04a84def0d8 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1061,11 +1061,14 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { /* WH RF */ IWL_DEV_INFO(iwl_rf_wh, iwl_be211_name, RF_TYPE(WH)), + IWL_DEV_INFO(iwl_rf_wh, iwl_ax221_name, RF_TYPE(WH), SUBDEV(0x0514)), + IWL_DEV_INFO(iwl_rf_wh, iwl_ax221_name, RF_TYPE(WH), SUBDEV(0x4514)), IWL_DEV_INFO(iwl_rf_wh_160mhz, iwl_be213_name, RF_TYPE(WH), BW_LIMITED), /* PE RF */ IWL_DEV_INFO(iwl_rf_pe, iwl_bn201_name, RF_TYPE(PE)), IWL_DEV_INFO(iwl_rf_pe, iwl_be223_name, RF_TYPE(PE), SUBDEV(0x0524)), + IWL_DEV_INFO(iwl_rf_pe, iwl_be223_name, RF_TYPE(PE), SUBDEV(0x4524)), IWL_DEV_INFO(iwl_rf_pe, iwl_be221_name, RF_TYPE(PE), SUBDEV(0x0324)), /* Killer */ From ef56bbed4c03fc272c65da821d31094b63a8461e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 15 Sep 2025 11:34:28 +0300 Subject: [PATCH 311/867] wifi: iwlwifi: tests: check listed PCI IDs have configs Add a test that checks, for the old pre-CNVI devices, that PCI IDs listed in the PCI IDs table will also match in the config table. Newer ones we test against our database of devices, but the current database doesn't go back that far, so at least this checks against the PCI IDs the driver has. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250915113137.eb728b270d46.Ie5754f4201b807eb0d55feb339a728fc0b42e8bf@changeid --- drivers/net/wireless/intel/iwlwifi/cfg/bz.c | 1 + .../wireless/intel/iwlwifi/tests/devinfo.c | 27 +++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/bz.c b/drivers/net/wireless/intel/iwlwifi/cfg/bz.c index 3e6206e739f65..18c9244ee8efd 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/bz.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/bz.c @@ -90,6 +90,7 @@ const struct iwl_mac_cfg iwl_bz_mac_cfg = { .low_latency_xtal = true, .ltr_delay = IWL_CFG_TRANS_LTR_DELAY_2500US, }; +EXPORT_SYMBOL_IF_IWLWIFI_KUNIT(iwl_bz_mac_cfg); const struct iwl_mac_cfg iwl_gl_mac_cfg = { .device_family = IWL_DEVICE_FAMILY_BZ, diff --git a/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c b/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c index c31bbd4e7a4ae..3054bc0a9c679 100644 --- a/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c +++ b/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c @@ -265,6 +265,32 @@ static void devinfo_api_range(struct kunit *test) } } +static void devinfo_pci_ids_config(struct kunit *test) +{ + for (int i = 0; iwl_hw_card_ids[i].vendor; i++) { + const struct pci_device_id *s = &iwl_hw_card_ids[i]; + const struct iwl_dev_info *di; + + if (s->device == PCI_ANY_ID || s->subdevice == PCI_ANY_ID) + continue; + + /* + * The check below only works for old (pre-CNVI) devices. Most + * new have subdevice==ANY, so are already skipped, but for some + * Bz platform(s) we list all the RF PCI IDs. Skip those too. + */ + if (s->driver_data == (kernel_ulong_t)&iwl_bz_mac_cfg) + continue; + + di = iwl_pci_find_dev_info(s->device, s->subdevice, + 0, 0, 0, 0, true); + + KUNIT_EXPECT_PTR_NE_MSG(test, di, NULL, + "PCI ID %04x:%04x not found\n", + s->device, s->subdevice); + } +} + static struct kunit_case devinfo_test_cases[] = { KUNIT_CASE(devinfo_table_order), KUNIT_CASE(devinfo_discrete_match), @@ -276,6 +302,7 @@ static struct kunit_case devinfo_test_cases[] = { KUNIT_CASE(devinfo_pci_ids), KUNIT_CASE(devinfo_no_mac_cfg_dups), KUNIT_CASE(devinfo_api_range), + KUNIT_CASE(devinfo_pci_ids_config), {} }; From 58a4ebe3168813a04bef08f7858a63b199e866e1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 15 Sep 2025 11:34:29 +0300 Subject: [PATCH 312/867] wifi: iwlwifi: fix remaining kernel-doc warnings Fix the remaining kernel-doc warnings across the driver. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250915113137.f94b6d4ef142.I91007eed4cf37125ca7a012f2021615b4fa9eb66@changeid --- drivers/net/wireless/intel/iwlwifi/fw/acpi.h | 1 + .../wireless/intel/iwlwifi/fw/api/cmdhdr.h | 4 +- .../net/wireless/intel/iwlwifi/fw/api/coex.h | 4 +- .../wireless/intel/iwlwifi/fw/api/dbg-tlv.h | 14 ++-- .../net/wireless/intel/iwlwifi/fw/api/debug.h | 2 +- .../wireless/intel/iwlwifi/fw/api/location.h | 8 +- .../net/wireless/intel/iwlwifi/fw/api/power.h | 5 +- .../net/wireless/intel/iwlwifi/fw/api/scan.h | 78 ++++++++++--------- .../net/wireless/intel/iwlwifi/fw/api/sta.h | 6 +- .../net/wireless/intel/iwlwifi/fw/api/stats.h | 39 +++++----- .../net/wireless/intel/iwlwifi/fw/api/tx.h | 2 +- .../wireless/intel/iwlwifi/fw/error-dump.h | 4 +- drivers/net/wireless/intel/iwlwifi/fw/file.h | 74 +++++++++++++----- drivers/net/wireless/intel/iwlwifi/fw/img.h | 12 ++- .../net/wireless/intel/iwlwifi/fw/runtime.h | 22 +++++- .../net/wireless/intel/iwlwifi/iwl-config.h | 2 +- .../net/wireless/intel/iwlwifi/iwl-dbg-tlv.h | 4 +- drivers/net/wireless/intel/iwlwifi/iwl-drv.h | 3 +- .../wireless/intel/iwlwifi/iwl-modparams.h | 4 +- .../wireless/intel/iwlwifi/iwl-nvm-parse.h | 17 ++-- .../net/wireless/intel/iwlwifi/iwl-op-mode.h | 1 + .../net/wireless/intel/iwlwifi/iwl-trans.h | 6 +- 22 files changed, 194 insertions(+), 118 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h index 20bc6671f4eb2..06cece4ea6d95 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h @@ -151,6 +151,7 @@ union acpi_object *iwl_acpi_get_dsm_object(struct device *dev, int rev, * @mcc: output buffer (3 bytes) that will get the MCC * * This function tries to read the current MCC from ACPI if available. + * Return: 0 on success, or a negative error code */ int iwl_acpi_get_mcc(struct iwl_fw_runtime *fwrt, char *mcc); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/cmdhdr.h b/drivers/net/wireless/intel/iwlwifi/fw/api/cmdhdr.h index d130d4f854447..073f003bdc5d2 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/cmdhdr.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/cmdhdr.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2005-2014 Intel Corporation + * Copyright (C) 2005-2014, 2025 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -98,7 +98,7 @@ struct iwl_cmd_header { } __packed; /** - * struct iwl_cmd_header_wide + * struct iwl_cmd_header_wide - wide command header * * This header format appears in the beginning of each command sent from the * driver, and each response/notification received from uCode. diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/coex.h b/drivers/net/wireless/intel/iwlwifi/fw/api/coex.h index ddc84430d8959..616f00a8b6034 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/coex.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/coex.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * Copyright (C) 2013-2014, 2018-2019 Intel Corporation * Copyright (C) 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2017 Intel Deutschland GmbH @@ -52,7 +52,7 @@ struct iwl_bt_coex_cmd { } __packed; /* BT_COEX_CMD_API_S_VER_6 */ /** - * struct iwl_bt_coex_reduced_txp_update_cmd + * struct iwl_bt_coex_reduced_txp_update_cmd - reduced TX power command * @reduced_txp: bit BT_REDUCED_TX_POWER_BIT to enable / disable, rest of the * bits are the sta_id (value) */ diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h b/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h index 3173fa96cb489..b62f0687327a6 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h @@ -16,7 +16,7 @@ #define IWL_FW_INI_PRESET_DISABLE 0xff /** - * struct iwl_fw_ini_hcmd + * struct iwl_fw_ini_hcmd - debug configuration host command * * @id: the debug configuration command type for instance: 0xf6 / 0xf5 / DHC * @group: the desired cmd group @@ -199,7 +199,7 @@ struct iwl_fw_ini_region_tlv { } __packed; /* FW_TLV_DEBUG_REGION_API_S_VER_1 */ /** - * struct iwl_fw_ini_debug_info_tlv + * struct iwl_fw_ini_debug_info_tlv - debug info TLV * * debug configuration name for a specific image * @@ -311,7 +311,7 @@ struct iwl_fw_ini_conf_set_tlv { } __packed; /* FW_TLV_DEBUG_CONFIG_SET_API_S_VER_1 */ /** - * enum iwl_fw_ini_config_set_type + * enum iwl_fw_ini_config_set_type - configuration set type * * @IWL_FW_INI_CONFIG_SET_TYPE_INVALID: invalid config set * @IWL_FW_INI_CONFIG_SET_TYPE_DEVICE_PERIPHERY_MAC: for PERIPHERY MAC configuration @@ -337,7 +337,7 @@ enum iwl_fw_ini_config_set_type { } __packed; /** - * enum iwl_fw_ini_allocation_id + * enum iwl_fw_ini_allocation_id - allocation ID * * @IWL_FW_INI_ALLOCATION_INVALID: invalid * @IWL_FW_INI_ALLOCATION_ID_DBGC1: allocation meant for DBGC1 configuration @@ -356,7 +356,7 @@ enum iwl_fw_ini_allocation_id { }; /* FW_DEBUG_TLV_ALLOCATION_ID_E_VER_1 */ /** - * enum iwl_fw_ini_buffer_location + * enum iwl_fw_ini_buffer_location - buffer location * * @IWL_FW_INI_LOCATION_INVALID: invalid * @IWL_FW_INI_LOCATION_SRAM_PATH: SRAM location @@ -373,7 +373,7 @@ enum iwl_fw_ini_buffer_location { }; /* FW_DEBUG_TLV_BUFFER_LOCATION_E_VER_1 */ /** - * enum iwl_fw_ini_region_type + * enum iwl_fw_ini_region_type - region type * * @IWL_FW_INI_REGION_INVALID: invalid * @IWL_FW_INI_REGION_TLV: uCode and debug TLVs @@ -437,7 +437,7 @@ enum iwl_fw_ini_region_device_memory_subtype { }; /* FW_TLV_DEBUG_REGION_DEVICE_MEMORY_SUBTYPE_API_E */ /** - * enum iwl_fw_ini_time_point + * enum iwl_fw_ini_time_point - time point type * * Hard coded time points in which the driver can send hcmd or perform dump * collection diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h b/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h index 0cf1e5124fba7..61a850de26fcc 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h @@ -421,7 +421,7 @@ struct iwl_dbgc1_info { } __packed; /* INIT_DRAM_FRAGS_ALLOCATIONS_S_VER_1 */ /** - * struct iwl_dbg_host_event_cfg_cmd + * struct iwl_dbg_host_event_cfg_cmd - host event config command * @enabled_severities: enabled severities */ struct iwl_dbg_host_event_cfg_cmd { diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/location.h b/drivers/net/wireless/intel/iwlwifi/fw/api/location.h index 33541f92c7c73..2ee3a48aa5dfe 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/location.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/location.h @@ -1092,7 +1092,7 @@ struct iwl_tof_range_req_ap_entry { } __packed; /* LOCATION_RANGE_REQ_AP_ENTRY_CMD_API_S_VER_9 */ /** - * enum iwl_tof_response_mode + * enum iwl_tof_response_mode - TOF response mode * @IWL_MVM_TOF_RESPONSE_ASAP: report each AP measurement separately as soon as * possible (not supported for this release) * @IWL_MVM_TOF_RESPONSE_TIMEOUT: report all AP measurements as a batch upon @@ -1108,7 +1108,7 @@ enum iwl_tof_response_mode { }; /** - * enum iwl_tof_initiator_flags + * enum iwl_tof_initiator_flags - TOF initiator flags * * @IWL_TOF_INITIATOR_FLAGS_FAST_ALGO_DISABLED: disable fast algo, meaning run * the algo on ant A+B, instead of only one of them. @@ -1409,7 +1409,7 @@ enum iwl_tof_range_request_status { }; /** - * enum iwl_tof_entry_status + * enum iwl_tof_entry_status - TOF entry status * * @IWL_TOF_ENTRY_SUCCESS: successful measurement. * @IWL_TOF_ENTRY_GENERAL_FAILURE: General failure. @@ -1856,7 +1856,7 @@ struct iwl_tof_mcsi_notif { } __packed; /** - * struct iwl_tof_range_abort_cmd + * struct iwl_tof_range_abort_cmd - TOF range abort command * @request_id: corresponds to a range request * @reserved: reserved */ diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/power.h b/drivers/net/wireless/intel/iwlwifi/fw/api/power.h index 5eb8d10678fd4..535864e226260 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/power.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/power.h @@ -620,7 +620,7 @@ struct iwl_sar_offset_mapping_cmd { } __packed; /*SAR_OFFSET_MAPPING_TABLE_CMD_API_S*/ /** - * struct iwl_beacon_filter_cmd + * struct iwl_beacon_filter_cmd - beacon filter command * REPLY_BEACON_FILTERING_CMD = 0xd2 (command) * @bf_energy_delta: Used for RSSI filtering, if in 'normal' state. Send beacon * to driver if delta in Energy values calculated for this and last @@ -762,7 +762,7 @@ enum iwl_6ghz_ap_type { }; /* PHY_AP_TYPE_API_E_VER_1 */ /** - * struct iwl_txpower_constraints_cmd + * struct iwl_txpower_constraints_cmd - TX power constraints command * AP_TX_POWER_CONSTRAINTS_CMD * Used for VLP/LPI/AFC Access Point power constraints for 6GHz channels * @link_id: linkId @@ -786,4 +786,5 @@ struct iwl_txpower_constraints_cmd { __s8 psd_pwr[IWL_MAX_TX_EIRP_PSD_PWR_MAX_SIZE]; u8 reserved[3]; } __packed; /* PHY_AP_TX_POWER_CONSTRAINTS_CMD_API_S_VER_1 */ + #endif /* __iwl_fw_api_power_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h index f486d624500be..60f0a4924ddfb 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2012-2014, 2018-2024 Intel Corporation + * Copyright (C) 2012-2014, 2018-2025 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -129,7 +129,7 @@ struct iwl_scan_offload_profile { } __packed; /** - * struct iwl_scan_offload_profile_cfg_data + * struct iwl_scan_offload_profile_cfg_data - scan offload profile configs * @blocklist_len: length of blocklist * @num_profiles: num of profiles in the list * @match_notify: clients waiting for match found notification @@ -159,7 +159,7 @@ struct iwl_scan_offload_profile_cfg_v1 { } __packed; /* SCAN_OFFLOAD_PROFILES_CFG_API_S_VER_1-2*/ /** - * struct iwl_scan_offload_profile_cfg + * struct iwl_scan_offload_profile_cfg - scan offload profile config * @profiles: profiles to search for match * @data: the rest of the data for profile_cfg */ @@ -507,7 +507,7 @@ enum iwl_uhb_chan_cfg_flags { IWL_UHB_CHAN_CFG_FLAG_FORCE_PASSIVE = BIT(26), }; /** - * struct iwl_scan_dwell + * struct iwl_scan_dwell - scan dwell configuration * @active: default dwell time for active scan * @passive: default dwell time for passive scan * @fragmented: default dwell time for fragmented scan @@ -728,7 +728,7 @@ enum iwl_umac_scan_general_params_flags2 { }; /** - * struct iwl_scan_channel_cfg_umac + * struct iwl_scan_channel_cfg_umac - scan channel config * @flags: bitmap - 0-19: directed scan to i'th ssid. * @channel_num: channel number 1-13 etc. * @v1: command version 1 @@ -774,7 +774,7 @@ struct iwl_scan_channel_cfg_umac { } __packed; /** - * struct iwl_scan_umac_schedule + * struct iwl_scan_umac_schedule - scan schedule parameters * @interval: interval in seconds between scan iterations * @iter_count: num of scan iterations for schedule plan, 0xff for infinite loop * @reserved: for alignment and future use @@ -815,7 +815,7 @@ struct iwl_scan_req_umac_tail_v2 { } __packed; /** - * struct iwl_scan_umac_chan_param + * struct iwl_scan_umac_chan_param - scan channel parameters * @flags: channel flags &enum iwl_scan_channel_flags * @count: num of channels in scan request * @reserved: for future use and alignment @@ -827,33 +827,37 @@ struct iwl_scan_umac_chan_param { } __packed; /*SCAN_CHANNEL_PARAMS_API_S_VER_1 */ /** - * struct iwl_scan_req_umac + * struct iwl_scan_req_umac - scan request command * @flags: &enum iwl_umac_scan_flags * @uid: scan id, &enum iwl_umac_scan_uid_offsets * @ooc_priority: out of channel priority - &enum iwl_scan_priority * @general_flags: &enum iwl_umac_scan_general_flags + * @reserved: reserved * @scan_start_mac_id: report the scan start TSF time according to this mac TSF - * @extended_dwell: dwell time for channels 1, 6 and 11 - * @active_dwell: dwell time for active scan per LMAC - * @passive_dwell: dwell time for passive scan per LMAC - * @fragmented_dwell: dwell time for fragmented passive scan - * @adwell_default_n_aps: for adaptive dwell the default number of APs + * @v1: version 1 command data + * @v6: version 6 command data + * @v7: version 7 command data + * @v8: version 8 command data + * @v9: version 9 command data + * @v1.extended_dwell: dwell time for channels 1, 6 and 11 + * @v1.active_dwell: dwell time for active scan per LMAC + * @v1.passive_dwell: dwell time for passive scan per LMAC + * @v1.fragmented_dwell: dwell time for fragmented passive scan + * @v7.adwell_default_n_aps: for adaptive dwell the default number of APs * per channel - * @adwell_default_n_aps_social: for adaptive dwell the default + * @v7.adwell_default_n_aps_social: for adaptive dwell the default * number of APs per social (1,6,11) channel - * @general_flags2: &enum iwl_umac_scan_general_flags2 - * @adwell_max_budget: for adaptive dwell the maximal budget of TU to be added - * to total scan time - * @max_out_time: max out of serving channel time, per LMAC - for CDB there - * are 2 LMACs - * @suspend_time: max suspend time, per LMAC - for CDB there are 2 LMACs - * @scan_priority: scan internal prioritization &enum iwl_scan_priority - * @num_of_fragments: Number of fragments needed for full coverage per band. + * @v8.general_flags2: &enum iwl_umac_scan_general_flags2 + * @v7.adwell_max_budget: for adaptive dwell the maximal budget of TU to be + * added to total scan time + * @v1.max_out_time: max out of serving channel time, per LMAC - for CDB + * there are 2 LMACs + * @v1.suspend_time: max suspend time, per LMAC - for CDB there are 2 LMACs + * @v1.scan_priority: scan internal prioritization &enum iwl_scan_priority + * @v8.num_of_fragments: Number of fragments needed for full coverage per band. * Relevant only for fragmented scan. - * @channel: &struct iwl_scan_umac_chan_param - * @reserved: for future use and alignment - * @reserved3: for future use and alignment - * @data: &struct iwl_scan_channel_cfg_umac and + * @v1.channel: &struct iwl_scan_umac_chan_param + * @v1.data: &struct iwl_scan_channel_cfg_umac and * &struct iwl_scan_req_umac_tail */ struct iwl_scan_req_umac { @@ -939,7 +943,7 @@ struct iwl_scan_req_umac { #define IWL_SCAN_REQ_UMAC_SIZE_V1 36 /** - * struct iwl_scan_probe_params_v3 + * struct iwl_scan_probe_params_v3 - scan probe parameters * @preq: scan probe request params * @ssid_num: number of valid SSIDs in direct scan array * @short_ssid_num: number of valid short SSIDs in short ssid array @@ -961,7 +965,7 @@ struct iwl_scan_probe_params_v3 { } __packed; /* SCAN_PROBE_PARAMS_API_S_VER_3 */ /** - * struct iwl_scan_probe_params_v4 + * struct iwl_scan_probe_params_v4 - scan probe parameters * @preq: scan probe request params * @short_ssid_num: number of valid short SSIDs in short ssid array * @bssid_num: number of valid bssid in bssids array @@ -983,7 +987,7 @@ struct iwl_scan_probe_params_v4 { #define SCAN_MAX_NUM_CHANS_V3 67 /** - * struct iwl_scan_channel_params_v4 + * struct iwl_scan_channel_params_v4 - channel params * @flags: channel flags &enum iwl_scan_channel_flags * @count: num of channels in scan request * @num_of_aps_override: override the number of APs the FW uses to calculate @@ -1006,7 +1010,7 @@ struct iwl_scan_channel_params_v4 { SCAN_CHANNEL_PARAMS_API_S_VER_5 */ /** - * struct iwl_scan_channel_params_v7 + * struct iwl_scan_channel_params_v7 - channel params * @flags: channel flags &enum iwl_scan_channel_flags * @count: num of channels in scan request * @n_aps_override: override the number of APs the FW uses to calculate dwell @@ -1024,7 +1028,7 @@ struct iwl_scan_channel_params_v7 { } __packed; /* SCAN_CHANNEL_PARAMS_API_S_VER_6 */ /** - * struct iwl_scan_general_params_v11 + * struct iwl_scan_general_params_v11 - channel params * @flags: &enum iwl_umac_scan_general_flags_v2 * @reserved: reserved for future * @scan_start_mac_or_link_id: report the scan start TSF time according to this @@ -1066,7 +1070,7 @@ struct iwl_scan_general_params_v11 { } __packed; /* SCAN_GENERAL_PARAMS_API_S_VER_12, *_VER_11 and *_VER_10 */ /** - * struct iwl_scan_periodic_parms_v1 + * struct iwl_scan_periodic_parms_v1 - periodicity parameters * @schedule: can scheduling parameter * @delay: initial delay of the periodic scan in seconds * @reserved: reserved for future @@ -1078,7 +1082,7 @@ struct iwl_scan_periodic_parms_v1 { } __packed; /* SCAN_PERIODIC_PARAMS_API_S_VER_1 */ /** - * struct iwl_scan_req_params_v12 + * struct iwl_scan_req_params_v12 - scan request parameters (v12) * @general_params: &struct iwl_scan_general_params_v11 * @channel_params: &struct iwl_scan_channel_params_v4 * @periodic_params: &struct iwl_scan_periodic_parms_v1 @@ -1106,7 +1110,7 @@ struct iwl_scan_req_params_v17 { } __packed; /* SCAN_REQUEST_PARAMS_API_S_VER_17 - 14 */ /** - * struct iwl_scan_req_umac_v12 + * struct iwl_scan_req_umac_v12 - scan request command (v12) * @uid: scan id, &enum iwl_umac_scan_uid_offsets * @ooc_priority: out of channel priority - &enum iwl_scan_priority * @scan_params: scan parameters @@ -1130,7 +1134,7 @@ struct iwl_scan_req_umac_v17 { } __packed; /* SCAN_REQUEST_CMD_UMAC_API_S_VER_17 - 14 */ /** - * struct iwl_umac_scan_abort + * struct iwl_umac_scan_abort - scan abort command * @uid: scan id, &enum iwl_umac_scan_uid_offsets * @flags: reserved */ @@ -1140,7 +1144,7 @@ struct iwl_umac_scan_abort { } __packed; /* SCAN_ABORT_CMD_UMAC_API_S_VER_1 */ /** - * enum iwl_umac_scan_abort_status + * enum iwl_umac_scan_abort_status - scan abort status * * @IWL_UMAC_SCAN_ABORT_STATUS_SUCCESS: scan was successfully aborted * @IWL_UMAC_SCAN_ABORT_STATUS_IN_PROGRESS: scan abort is in progress @@ -1153,7 +1157,7 @@ enum iwl_umac_scan_abort_status { }; /** - * struct iwl_umac_scan_complete + * struct iwl_umac_scan_complete - scan complete notification * @uid: scan id, &enum iwl_umac_scan_uid_offsets * @last_schedule: last scheduling line * @last_iter: last scan iteration number diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h b/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h index ecbcd5084cd8d..e6f9abdfa546d 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2012-2014, 2018-2021, 2023 Intel Corporation + * Copyright (C) 2012-2014, 2018-2021, 2023, 2025 Intel Corporation * Copyright (C) 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -428,7 +428,7 @@ struct iwl_mvm_rm_sta_cmd { } __packed; /* REMOVE_STA_CMD_API_S_VER_2 */ /** - * struct iwl_mvm_mgmt_mcast_key_cmd_v1 + * struct iwl_mvm_mgmt_mcast_key_cmd_v1 - IGTK command * ( MGMT_MCAST_KEY = 0x1f ) * @ctrl_flags: &enum iwl_sta_key_flag * @igtk: IGTK key material @@ -449,7 +449,7 @@ struct iwl_mvm_mgmt_mcast_key_cmd_v1 { } __packed; /* SEC_MGMT_MULTICAST_KEY_CMD_API_S_VER_1 */ /** - * struct iwl_mvm_mgmt_mcast_key_cmd + * struct iwl_mvm_mgmt_mcast_key_cmd - IGTK command * ( MGMT_MCAST_KEY = 0x1f ) * @ctrl_flags: &enum iwl_sta_key_flag * @igtk: IGTK master key diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/stats.h b/drivers/net/wireless/intel/iwlwifi/fw/api/stats.h index 00713a9918792..8d9a5058d5a54 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/stats.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/stats.h @@ -26,7 +26,7 @@ struct mvm_statistics_div { } __packed; /* STATISTICS_SLOW_DIV_API_S_VER_2 */ /** - * struct mvm_statistics_rx_non_phy + * struct mvm_statistics_rx_non_phy - non-PHY RX statistics * @bogus_cts: CTS received when not expecting CTS * @bogus_ack: ACK received when not expecting ACK * @non_channel_beacons: beacons with our bss id but not on our serving channel @@ -456,7 +456,7 @@ struct iwl_system_statistics_cmd { } __packed; /* STATISTICS_FW_CMD_API_S_VER_1 */ /** - * enum iwl_fw_statistics_type + * enum iwl_fw_statistics_type - statistics type * * @FW_STATISTICS_OPERATIONAL: operational statistics * @FW_STATISTICS_PHY: phy statistics @@ -478,7 +478,7 @@ enum iwl_fw_statistics_type { #define IWL_STATISTICS_TYPE_MSK 0x7f /** - * struct iwl_statistics_ntfy_hdr + * struct iwl_statistics_ntfy_hdr - statistics notification header * * @type: struct type * @version: version of the struct @@ -491,7 +491,7 @@ struct iwl_statistics_ntfy_hdr { }; /* STATISTICS_NTFY_HDR_API_S_VER_1 */ /** - * struct iwl_stats_ntfy_per_link + * struct iwl_stats_ntfy_per_link - per-link statistics * * @beacon_filter_average_energy: Average energy [-dBm] of the 2 * antennas. @@ -514,7 +514,7 @@ struct iwl_stats_ntfy_per_link { } __packed; /* STATISTICS_NTFY_PER_LINK_API_S_VER_1 */ /** - * struct iwl_stats_ntfy_part1_per_link + * struct iwl_stats_ntfy_part1_per_link - part1 per link statistics * * @rx_time: rx time * @tx_time: tx time @@ -533,7 +533,7 @@ struct iwl_stats_ntfy_part1_per_link { } __packed; /* STATISTICS_FW_NTFY_OPERATIONAL_PART1_PER_LINK_API_S_VER_1 */ /** - * struct iwl_stats_ntfy_per_mac + * struct iwl_stats_ntfy_per_mac - per MAC statistics * * @beacon_filter_average_energy: Average energy [-dBm] of the 2 * antennas. @@ -556,7 +556,8 @@ struct iwl_stats_ntfy_per_mac { } __packed; /* STATISTICS_NTFY_PER_MAC_API_S_VER_1 */ #define IWL_STATS_MAX_BW_INDEX 5 -/** struct iwl_stats_ntfy_per_phy +/** + * struct iwl_stats_ntfy_per_phy - per PHY statistics * @channel_load: channel load * @channel_load_by_us: device contribution to MCLM * @channel_load_not_by_us: other devices' contribution to MCLM @@ -588,7 +589,7 @@ struct iwl_stats_ntfy_per_phy { #define IWL_STATS_UNKNOWN_CHANNEL_LOAD 0xffffffff /** - * struct iwl_stats_ntfy_per_sta + * struct iwl_stats_ntfy_per_sta - per STA statistics * * @average_energy: in fact it is minus the energy.. */ @@ -600,7 +601,7 @@ struct iwl_stats_ntfy_per_sta { #define IWL_STATS_MAX_FW_LINKS (IWL_FW_MAX_LINK_ID + 1) /** - * struct iwl_system_statistics_notif_oper + * struct iwl_system_statistics_notif_oper - statistics notification * * @time_stamp: time when the notification is sent from firmware * @per_link: per link statistics, &struct iwl_stats_ntfy_per_link @@ -615,7 +616,7 @@ struct iwl_system_statistics_notif_oper { } __packed; /* STATISTICS_FW_NTFY_OPERATIONAL_API_S_VER_3 */ /** - * struct iwl_system_statistics_part1_notif_oper + * struct iwl_system_statistics_part1_notif_oper - part1 stats notification * * @time_stamp: time when the notification is sent from firmware * @per_link: per link statistics &struct iwl_stats_ntfy_part1_per_link @@ -628,7 +629,7 @@ struct iwl_system_statistics_part1_notif_oper { } __packed; /* STATISTICS_FW_NTFY_OPERATIONAL_PART1_API_S_VER_4 */ /** - * struct iwl_system_statistics_end_notif + * struct iwl_system_statistics_end_notif - statistics end notification * * @time_stamp: time when the notification is sent from firmware */ @@ -637,7 +638,7 @@ struct iwl_system_statistics_end_notif { } __packed; /* STATISTICS_FW_NTFY_END_API_S_VER_1 */ /** - * struct iwl_statistics_operational_ntfy + * struct iwl_statistics_operational_ntfy - operational stats notification * * @hdr: general statistics header * @flags: bitmap of possible notification structures @@ -662,7 +663,7 @@ struct iwl_statistics_operational_ntfy { } __packed; /* STATISTICS_OPERATIONAL_NTFY_API_S_VER_15 */ /** - * struct iwl_statistics_operational_ntfy_ver_14 + * struct iwl_statistics_operational_ntfy_ver_14 - operational stats notification * * @hdr: general statistics header * @flags: bitmap of possible notification structures @@ -707,7 +708,7 @@ struct iwl_statistics_operational_ntfy_ver_14 { } __packed; /* STATISTICS_OPERATIONAL_NTFY_API_S_VER_14 */ /** - * struct iwl_statistics_phy_ntfy + * struct iwl_statistics_phy_ntfy - PHY statistics notification * * @hdr: general statistics header * RX PHY related statistics @@ -808,7 +809,7 @@ struct iwl_statistics_phy_ntfy { } __packed; /* STATISTICS_PHY_NTFY_API_S_VER_1 */ /** - * struct iwl_statistics_mac_ntfy + * struct iwl_statistics_mac_ntfy - MAC statistics notification * * @hdr: general statistics header * @bcast_filter_passed_per_mac: bcast filter passed per mac @@ -827,7 +828,7 @@ struct iwl_statistics_mac_ntfy { } __packed; /* STATISTICS_MAC_NTFY_API_S_VER_1 */ /** - * struct iwl_statistics_rx_ntfy + * struct iwl_statistics_rx_ntfy - RX statistics notification * * @hdr: general statistics header * @rx_agg_mpdu_cnt: aggregation frame count (number of @@ -867,7 +868,7 @@ struct iwl_statistics_rx_ntfy { } __packed; /* STATISTICS_RX_NTFY_API_S_VER_1 */ /** - * struct iwl_statistics_tx_ntfy + * struct iwl_statistics_tx_ntfy - TX statistics notification * * @hdr: general statistics header * @cts_timeout: timeout when waiting for CTS @@ -976,7 +977,7 @@ struct iwl_statistics_tx_ntfy { } __packed; /* STATISTICS_TX_NTFY_API_S_VER_1 */ /** - * struct iwl_statistics_duration_ntfy + * struct iwl_statistics_duration_ntfy - burst/duration statistics * * @hdr: general statistics header * @cont_burst_chk_cnt: number of times continuation or @@ -995,7 +996,7 @@ struct iwl_statistics_duration_ntfy { } __packed; /* STATISTICS_DURATION_NTFY_API_S_VER_1 */ /** - * struct iwl_statistics_he_ntfy + * struct iwl_statistics_he_ntfy - HE statistics * * @hdr: general statistics header * received HE frames diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h b/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h index 26d2013905ed9..31d3336726b42 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h @@ -963,7 +963,7 @@ struct iwl_scd_txq_cfg_cmd { } __packed; /* SCD_QUEUE_CFG_CMD_API_S_VER_1 */ /** - * struct iwl_scd_txq_cfg_rsp + * struct iwl_scd_txq_cfg_rsp - scheduler TXQ configuration response * @token: taken from the command * @sta_id: station id from the command * @tid: tid from the command diff --git a/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h b/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h index c2a73cc85eff4..525a82030daa4 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h @@ -266,7 +266,7 @@ struct iwl_fw_ini_error_dump_data { } __packed; /** - * struct iwl_fw_ini_dump_entry + * struct iwl_fw_ini_dump_entry - dump entry descriptor * @list: list of dump entries * @size: size of the data * @data: entry data @@ -305,7 +305,7 @@ struct iwl_fw_ini_fifo_hdr { * @dram_base_addr: base address of dram monitor range * @page_num: page number of memory range * @fifo_hdr: fifo header of memory range - * @fw_pkt: FW packet header of memory range + * @fw_pkt_hdr: FW packet header of memory range * @data: the actual memory */ struct iwl_fw_ini_error_dump_range { diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h index b7c1ab7a30061..b9e0b69c66803 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/file.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h @@ -222,7 +222,10 @@ typedef unsigned int __bitwise iwl_ucode_tlv_api_t; * @IWL_UCODE_TLV_API_STA_TYPE: This ucode supports station type assignement. * @IWL_UCODE_TLV_API_NAN2_VER2: This ucode supports NAN API version 2 * @IWL_UCODE_TLV_API_ADAPTIVE_DWELL: support for adaptive dwell in scanning + * @IWL_UCODE_TLV_API_OCE: support for OCE + * @IWL_UCODE_TLV_API_NEW_BEACON_TEMPLATE: new beacon template * @IWL_UCODE_TLV_API_NEW_RX_STATS: should new RX STATISTICS API be used + * @IWL_UCODE_TLV_API_WOWLAN_KEY_MATERIAL: WoWLAN key material support * @IWL_UCODE_TLV_API_QUOTA_LOW_LATENCY: Quota command includes a field * indicating low latency direction. * @IWL_UCODE_TLV_API_DEPRECATE_TTAK: RX status flag TTAK ok (bit 7) is @@ -245,6 +248,7 @@ typedef unsigned int __bitwise iwl_ucode_tlv_api_t; * SCAN_OFFLOAD_PROFILES_QUERY_RSP_S. * @IWL_UCODE_TLV_API_MBSSID_HE: This ucode supports v2 of * STA_CONTEXT_DOT11AX_API_S + * @IWL_UCODE_TLV_API_WOWLAN_TCP_SYN_WAKE: WoWLAN TCP-SYN wake support * @IWL_UCODE_TLV_API_FTM_RTT_ACCURACY: version 7 of the range response API * is supported by FW, this indicates the RTT confidence value * @IWL_UCODE_TLV_API_SAR_TABLE_VER: This ucode supports different sar @@ -253,6 +257,7 @@ typedef unsigned int __bitwise iwl_ucode_tlv_api_t; * SCAN_CONFIG_DB_CMD_API_S. * @IWL_UCODE_TLV_API_ADWELL_HB_DEF_N_AP: support for setting adaptive dwell * number of APs in the 5 GHz band + * @IWL_UCODE_TLV_API_SCAN_EXT_CHAN_VER: extended channel config in scan * @IWL_UCODE_TLV_API_BAND_IN_RX_DATA: FW reports band number in RX notification * @IWL_UCODE_TLV_API_NO_HOST_DISABLE_TX: Firmware offloaded the station disable tx * logic. @@ -352,16 +357,24 @@ typedef unsigned int __bitwise iwl_ucode_tlv_capa_t; * @IWL_UCODE_TLV_CAPA_SOC_LATENCY_SUPPORT: the firmware supports setting * stabilization latency for SoCs. * @IWL_UCODE_TLV_CAPA_STA_PM_NOTIF: firmware will send STA PM notification + * @IWL_UCODE_TLV_CAPA_BINDING_CDB_SUPPORT: binding CDB support + * @IWL_UCODE_TLV_CAPA_CDB_SUPPORT: CDB support + * @IWL_UCODE_TLV_CAPA_D0I3_END_FIRST: D0I3 end command comes first * @IWL_UCODE_TLV_CAPA_TLC_OFFLOAD: firmware implements rate scaling algorithm * @IWL_UCODE_TLV_CAPA_DYNAMIC_QUOTA: firmware implements quota related * @IWL_UCODE_TLV_CAPA_COEX_SCHEMA_2: firmware implements Coex Schema 2 - * IWL_UCODE_TLV_CAPA_CHANNEL_SWITCH_CMD: firmware supports CSA command + * @IWL_UCODE_TLV_CAPA_CHANNEL_SWITCH_CMD: firmware supports CSA command * @IWL_UCODE_TLV_CAPA_ULTRA_HB_CHANNELS: firmware supports ultra high band * (6 GHz). * @IWL_UCODE_TLV_CAPA_CS_MODIFY: firmware supports modify action CSA command + * @IWL_UCODE_TLV_CAPA_SET_LTR_GEN2: LTR gen2 support + * @IWL_UCODE_TLV_CAPA_TAS_CFG: TAS configuration support + * @IWL_UCODE_TLV_CAPA_SESSION_PROT_CMD: session protection command + * @IWL_UCODE_TLV_CAPA_SET_PPAG: PPAG support * @IWL_UCODE_TLV_CAPA_EXTENDED_DTS_MEASURE: extended DTS measurement * @IWL_UCODE_TLV_CAPA_SHORT_PM_TIMEOUTS: supports short PM timeouts * @IWL_UCODE_TLV_CAPA_BT_MPLUT_SUPPORT: supports bt-coex Multi-priority LUT + * @IWL_UCODE_TLV_CAPA_MULTI_QUEUE_RX_SUPPORT: MQ RX support * @IWL_UCODE_TLV_CAPA_CSA_AND_TBTT_OFFLOAD: the firmware supports CSA * countdown offloading. Beacon notifications are not sent to the host. * The fw also offloads TBTT alignment. @@ -383,23 +396,46 @@ typedef unsigned int __bitwise iwl_ucode_tlv_capa_t; * command size (command version 4) that supports toggling ACK TX * power reduction. * @IWL_UCODE_TLV_CAPA_D3_DEBUG: supports debug recording during D3 + * @IWL_UCODE_TLV_CAPA_LED_CMD_SUPPORT: LED command support * @IWL_UCODE_TLV_CAPA_MCC_UPDATE_11AX_SUPPORT: MCC response support 11ax * capability. * @IWL_UCODE_TLV_CAPA_CSI_REPORTING: firmware is capable of being configured * to report the CSI information with (certain) RX frames + * @IWL_UCODE_TLV_CAPA_DBG_SUSPEND_RESUME_CMD_SUPP: suspend/resume command + * @IWL_UCODE_TLV_CAPA_DBG_BUF_ALLOC_CMD_SUPP: support for DBGC + * buffer allocation command * @IWL_UCODE_TLV_CAPA_FTM_CALIBRATED: has FTM calibrated and thus supports both * initiator and responder * @IWL_UCODE_TLV_CAPA_BIOS_OVERRIDE_UNII4_US_CA: supports (de)activating UNII-4 * for US/CA/WW from BIOS + * @IWL_UCODE_TLV_CAPA_PSC_CHAN_SUPPORT: supports PSC channels + * @IWL_UCODE_TLV_CAPA_BIGTK_SUPPORT: BIGTK support * @IWL_UCODE_TLV_CAPA_PROTECTED_TWT: Supports protection of TWT action frames * @IWL_UCODE_TLV_CAPA_FW_RESET_HANDSHAKE: Supports the firmware handshake in * reset flow * @IWL_UCODE_TLV_CAPA_PASSIVE_6GHZ_SCAN: Support for passive scan on 6GHz PSC * channels even when these are not enabled. + * @IWL_UCODE_TLV_CAPA_HIDDEN_6GHZ_SCAN: hidden SSID 6 GHz scan support + * @IWL_UCODE_TLV_CAPA_BROADCAST_TWT: broadcast TWT support + * @IWL_UCODE_TLV_CAPA_COEX_HIGH_PRIO: support for BT-coex high + * priority for 802.1X/4-way-HS + * @IWL_UCODE_TLV_CAPA_BAID_ML_SUPPORT: multi-link BAID support + * @IWL_UCODE_TLV_CAPA_SYNCED_TIME: synced time command support + * @IWL_UCODE_TLV_CAPA_TIME_SYNC_BOTH_FTM_TM: time sync support + * @IWL_UCODE_TLV_CAPA_BIGTK_TX_SUPPORT: BIGTK TX support + * @IWL_UCODE_TLV_CAPA_MLD_API_SUPPORT: MLD API support + * @IWL_UCODE_TLV_CAPA_SCAN_DONT_TOGGLE_ANT: fixed antenna scan support + * @IWL_UCODE_TLV_CAPA_PPAG_CHINA_BIOS_SUPPORT: PPAG China BIOS support + * @IWL_UCODE_TLV_CAPA_OFFLOAD_BTM_SUPPORT: BTM protocol offload support + * @IWL_UCODE_TLV_CAPA_STA_EXP_MFP_SUPPORT: STA command MFP support + * @IWL_UCODE_TLV_CAPA_SNIFF_VALIDATE_SUPPORT: sniffer validate bits support + * @IWL_UCODE_TLV_CAPA_CHINA_22_REG_SUPPORT: China 2022 regulator support * @IWL_UCODE_TLV_CAPA_DUMP_COMPLETE_SUPPORT: Support for indicating dump collection * complete to FW. * @IWL_UCODE_TLV_CAPA_SPP_AMSDU_SUPPORT: Support SPP (signaling and payload * protected) A-MSDU. + * @IWL_UCODE_TLV_CAPA_DRAM_FRAG_SUPPORT: support for DBGC fragmented + * DRAM buffers * @IWL_UCODE_TLV_CAPA_SECURE_LTF_SUPPORT: Support secure LTF measurement. * @IWL_UCODE_TLV_CAPA_MONITOR_PASSIVE_CHANS: Support monitor mode on otherwise * passive channels @@ -407,6 +443,8 @@ typedef unsigned int __bitwise iwl_ucode_tlv_capa_t; * for CA from BIOS. * @IWL_UCODE_TLV_CAPA_UHB_CANADA_TAS_SUPPORT: supports %TAS_UHB_ALLOWED_CANADA * @IWL_UCODE_TLV_CAPA_EXT_FSEQ_IMAGE_SUPPORT: external FSEQ image support + * @IWL_UCODE_TLV_CAPA_RESET_DURING_ASSERT: FW reset handshake is needed + * during assert handling even if the dump isn't split * @IWL_UCODE_TLV_CAPA_FW_ACCEPTS_RAW_DSM_TABLE: Firmware has capability of * handling raw DSM table data. * @@ -487,12 +525,7 @@ enum iwl_ucode_tlv_capa { /* set 3 */ IWL_UCODE_TLV_CAPA_BIOS_OVERRIDE_UNII4_US_CA = (__force iwl_ucode_tlv_capa_t)96, - - /* - * @IWL_UCODE_TLV_CAPA_PSC_CHAN_SUPPORT: supports PSC channels - */ IWL_UCODE_TLV_CAPA_PSC_CHAN_SUPPORT = (__force iwl_ucode_tlv_capa_t)98, - IWL_UCODE_TLV_CAPA_BIGTK_SUPPORT = (__force iwl_ucode_tlv_capa_t)100, IWL_UCODE_TLV_CAPA_SPP_AMSDU_SUPPORT = (__force iwl_ucode_tlv_capa_t)103, IWL_UCODE_TLV_CAPA_DRAM_FRAG_SUPPORT = (__force iwl_ucode_tlv_capa_t)104, @@ -514,11 +547,8 @@ enum iwl_ucode_tlv_capa { IWL_UCODE_TLV_CAPA_EXT_FSEQ_IMAGE_SUPPORT = (__force iwl_ucode_tlv_capa_t)125, /* set 4 */ - /** - * @IWL_UCODE_TLV_CAPA_RESET_DURING_ASSERT: FW reset handshake is needed - * during assert handling even if the dump isn't split - */ - IWL_UCODE_TLV_CAPA_RESET_DURING_ASSERT = (__force iwl_ucode_tlv_capa_t)(4 * 32 + 0), + + IWL_UCODE_TLV_CAPA_RESET_DURING_ASSERT = (__force iwl_ucode_tlv_capa_t)(4 * 32 + 0), IWL_UCODE_TLV_CAPA_FW_ACCEPTS_RAW_DSM_TABLE = (__force iwl_ucode_tlv_capa_t)(4 * 32 + 1), NUM_IWL_UCODE_TLV_CAPA /* @@ -852,6 +882,8 @@ struct iwl_fw_dbg_trigger_low_rssi { * @start_assoc_denied: number of denied association to start recording * @start_assoc_timeout: number of association timeout to start recording * @start_connection_loss: number of connection loss to start recording + * @reserved: reserved + * @reserved2: reserved */ struct iwl_fw_dbg_trigger_mlme { u8 stop_auth_denied; @@ -885,6 +917,7 @@ struct iwl_fw_dbg_trigger_mlme { * @p2p_device: timeout for the queues of a P2P device in ms * @ibss: timeout for the queues of an IBSS in ms * @tdls: timeout for the queues of a TDLS station in ms + * @reserved: reserved */ struct iwl_fw_dbg_trigger_txq_timer { __le32 command_queue; @@ -900,7 +933,7 @@ struct iwl_fw_dbg_trigger_txq_timer { /** * struct iwl_fw_dbg_trigger_time_event - configures a time event trigger - * time_Events: a list of tuples . The driver will issue a + * @time_events: a list of tuples . The driver will issue a * trigger each time a time event notification that relates to time event * id with one of the actions in the bitmap is received and * BIT(notif->status) is set in status_bitmap. @@ -916,19 +949,19 @@ struct iwl_fw_dbg_trigger_time_event { /** * struct iwl_fw_dbg_trigger_ba - configures BlockAck related trigger - * rx_ba_start: tid bitmap to configure on what tid the trigger should occur + * @rx_ba_start: tid bitmap to configure on what tid the trigger should occur * when an Rx BlockAck session is started. - * rx_ba_stop: tid bitmap to configure on what tid the trigger should occur + * @rx_ba_stop: tid bitmap to configure on what tid the trigger should occur * when an Rx BlockAck session is stopped. - * tx_ba_start: tid bitmap to configure on what tid the trigger should occur + * @tx_ba_start: tid bitmap to configure on what tid the trigger should occur * when a Tx BlockAck session is started. - * tx_ba_stop: tid bitmap to configure on what tid the trigger should occur + * @tx_ba_stop: tid bitmap to configure on what tid the trigger should occur * when a Tx BlockAck session is stopped. - * rx_bar: tid bitmap to configure on what tid the trigger should occur + * @rx_bar: tid bitmap to configure on what tid the trigger should occur * when a BAR is received (for a Tx BlockAck session). - * tx_bar: tid bitmap to configure on what tid the trigger should occur + * @tx_bar: tid bitmap to configure on what tid the trigger should occur * when a BAR is send (for an Rx BlocAck session). - * frame_timeout: tid bitmap to configure on what tid the trigger should occur + * @frame_timeout: tid bitmap to configure on what tid the trigger should occur * when a frame times out in the reordering buffer. */ struct iwl_fw_dbg_trigger_ba { @@ -946,6 +979,7 @@ struct iwl_fw_dbg_trigger_ba { * @action_bitmap: the TDLS action to trigger the collection upon * @peer_mode: trigger on specific peer or all * @peer: the TDLS peer to trigger the collection on + * @reserved: reserved */ struct iwl_fw_dbg_trigger_tdls { u8 action_bitmap; @@ -958,6 +992,7 @@ struct iwl_fw_dbg_trigger_tdls { * struct iwl_fw_dbg_trigger_tx_status - configures trigger for tx response * status. * @statuses: the list of statuses to trigger the collection on + * @reserved: reserved */ struct iwl_fw_dbg_trigger_tx_status { struct tx_status { @@ -971,6 +1006,7 @@ struct iwl_fw_dbg_trigger_tx_status { * struct iwl_fw_dbg_conf_tlv - a TLV that describes a debug configuration. * @id: conf id * @usniffer: should the uSniffer image be used + * @reserved: reserved * @num_of_hcmds: how many HCMDs to send are present here * @hcmd: a variable length host command to be sent to apply the configuration. * If there is more than one HCMD to send, they will appear one after the diff --git a/drivers/net/wireless/intel/iwlwifi/fw/img.h b/drivers/net/wireless/intel/iwlwifi/fw/img.h index 5256f20623e90..045a3e0094291 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/img.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/img.h @@ -14,14 +14,13 @@ #include "error-dump.h" /** - * enum iwl_ucode_type - * - * The type of ucode. + * enum iwl_ucode_type - type of ucode * * @IWL_UCODE_REGULAR: Normal runtime ucode * @IWL_UCODE_INIT: Initial ucode * @IWL_UCODE_WOWLAN: Wake on Wireless enabled ucode * @IWL_UCODE_REGULAR_USNIFFER: Normal runtime ucode when using usniffer image + * @IWL_UCODE_TYPE_MAX: (internal value) */ enum iwl_ucode_type { IWL_UCODE_REGULAR, @@ -122,7 +121,7 @@ struct fw_img { #define FW_ADDR_CACHE_CONTROL 0xC0000000UL /** - * struct iwl_fw_paging + * struct iwl_fw_paging - FW paging descriptor * @fw_paging_phys: page phy pointer * @fw_paging_block: pointer to the allocated block * @fw_paging_size: page size @@ -197,6 +196,11 @@ struct iwl_dump_exclude { * @dump_excl_wowlan: image dump exclusion areas for WoWLAN image * @pnvm_data: PNVM data embedded in the .ucode file, if any * @pnvm_size: size of the embedded PNVM data + * @dbg: debug data, see &struct iwl_fw_dbg + * @default_calib: default calibration data + * @phy_config: PHY configuration flags + * @valid_rx_ant: valid RX antenna bitmap + * @valid_tx_ant: valid TX antenna bitmap */ struct iwl_fw { u32 ucode_ver; diff --git a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h index 806f9bcdf4f50..57570ff15622e 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h @@ -45,6 +45,8 @@ struct iwl_fwrt_shared_mem_cfg { * struct iwl_fwrt_dump_data - dump data * @trig: trigger the worker was scheduled upon * @fw_pkt: packet received from FW + * @desc: dump descriptor + * @monitor_only: only dump for monitor * * Note that the decision which part of the union is used * is based on iwl_trans_dbg_ini_valid(): the 'trig' part @@ -68,6 +70,7 @@ struct iwl_fwrt_dump_data { * struct iwl_fwrt_wk_data - dump worker data struct * @idx: index of the worker * @wk: worker + * @dump_data: dump data */ struct iwl_fwrt_wk_data { u8 idx; @@ -91,8 +94,8 @@ struct iwl_txf_iter_data { /** * struct iwl_fw_runtime - runtime data for firmware + * @trans: transport pointer * @fw: firmware image - * @cfg: NIC configuration * @dev: device pointer * @ops: user ops * @ops_ctx: user ops context @@ -117,6 +120,23 @@ struct iwl_txf_iter_data { * zero (default initialization) means it hasn't been read yet, * and BIT(0) is set when it has since function 0 also has this * bitmap and is always supported + * @geo_enabled: WGDS table is present + * @geo_num_profiles: number of geo profiles + * @geo_rev: geo profiles table revision + * @ppag_chains: PPAG table data + * @ppag_flags: PPAG flags + * @reduced_power_flags: reduced power flags + * @sanitize_ctx: context for dump sanitizer + * @sanitize_ops: dump sanitizer ops + * @sar_chain_a_profile: SAR chain A profile + * @sar_chain_b_profile: SAR chain B profile + * @sgom_enabled: SGOM enabled + * @sgom_table: SGOM table + * @timestamp: timestamp marker data + * @timestamp.wk: timestamp marking worker + * @timestamp.seq: timestamp marking sequence + * @timestamp.delay: timestamp marking worker delay + * @tpc_enabled: TPC enabled */ struct iwl_fw_runtime { struct iwl_trans *trans; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index cfd1629f00163..0b34c9f90b3ff 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -385,7 +385,7 @@ struct iwl_mac_cfg { #define IWL_NUM_RBDS_EHT (512 * 8) /** - * struct iwl_rf_cfg + * struct iwl_rf_cfg - RF/CRF configuration data * @fw_name_pre: Firmware filename prefix. The api version and extension * (.ucode) will be added to filename before loading from disk. The * filename is constructed as -.ucode. diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.h b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.h index 7ed6329fd8ca1..fe4e46a0edbda 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2023, 2025 Intel Corporation */ #ifndef __iwl_dbg_tlv_h__ #define __iwl_dbg_tlv_h__ @@ -32,7 +32,7 @@ union iwl_dbg_tlv_tp_data { }; /** - * struct iwl_dbg_tlv_time_point_data + * struct iwl_dbg_tlv_time_point_data - debug time point data * @trig_list: list of triggers * @active_trig_list: list of active triggers * @hcmd_list: list of host commands diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.h b/drivers/net/wireless/intel/iwlwifi/iwl-drv.h index 595300a14639d..a0b67e8aba8da 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.h @@ -62,7 +62,8 @@ struct iwl_rf_cfg; * starts the driver: fetches the firmware. This should be called by bus * specific system flows implementations. For example, the bus specific probe * function should do bus related operations only, and then call to this - * function. It returns the driver object or %NULL if an error occurred. + * function. + * Return: the driver object or %NULL if an error occurred. */ struct iwl_drv *iwl_drv_start(struct iwl_trans *trans); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h b/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h index 21eabfc3ffc84..0476df7b7f179 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2005-2014, 2018-2022, 2024 Intel Corporation + * Copyright (C) 2005-2014, 2018-2022, 2024-2025 Intel Corporation */ #ifndef __iwl_modparams_h__ #define __iwl_modparams_h__ @@ -42,7 +42,7 @@ enum iwl_uapsd_disable { }; /** - * struct iwl_mod_params + * struct iwl_mod_params - module parameters for iwlwifi * * Holds the module parameters * diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h index cbc92abf9f87a..12f28bb0e859d 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h @@ -115,11 +115,12 @@ iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_rf_cfg *cfg, * iwl_parse_nvm_mcc_info - parse MCC (mobile country code) info coming from FW * * This function parses the regulatory channel data received as a - * MCC_UPDATE_CMD command. It returns a newly allocation regulatory domain, - * to be fed into the regulatory core. In case the geo_info is set handle - * accordingly. An ERR_PTR is returned on error. - * If not given to the regulatory core, the user is responsible for freeing - * the regdomain returned here with kfree. + * MCC_UPDATE_CMD command. + * + * Return: a newly allocation regulatory domain, to be given to the regulatory + * core. In case the geo_info is set handle accordingly. An ERR_PTR is + * returned on error. If not given to the regulatory core, the user is + * responsible for freeing the regdomain returned here with kfree(). * * @trans: the transport * @num_of_ch: the number of channels @@ -140,6 +141,8 @@ iwl_parse_nvm_mcc_info(struct iwl_trans *trans, * This struct holds an NVM section read from the NIC using NVM_ACCESS_CMD, * and saved for later use by the driver. Not all NVM sections are saved * this way, only the needed ones. + * @length: length of the section + * @data: section data */ struct iwl_nvm_section { u16 length; @@ -148,6 +151,10 @@ struct iwl_nvm_section { /** * iwl_read_external_nvm - Reads external NVM from a file into nvm_sections + * @trans: the transport + * @nvm_file_name: the filename to request + * @nvm_sections: sections data to fill + * Return: 0 on success or an error code */ int iwl_read_external_nvm(struct iwl_trans *trans, const char *nvm_file_name, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h b/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h index a146d0e399f22..df6341dfc4a12 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h @@ -185,6 +185,7 @@ void iwl_opmode_deregister(const char *name); /** * struct iwl_op_mode - operational mode * @ops: pointer to its own ops + * @op_mode_specific: per-opmode data * * This holds an implementation of the mac80211 / fw API. */ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index a0cc5d7745e8f..a552669db6e2a 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -121,7 +121,7 @@ enum CMD_MODE { #define DEF_CMD_PAYLOAD_SIZE 320 /** - * struct iwl_device_cmd + * struct iwl_device_cmd - device command structure * * For allocation of the command and tx queues, this establishes the overall * size of the largest command we send to uCode, except for commands that @@ -516,7 +516,7 @@ enum iwl_trans_state { */ /** - * enum iwl_ini_cfg_state + * enum iwl_ini_cfg_state - debug config state * @IWL_INI_CFG_STATE_NOT_LOADED: no debug cfg was given * @IWL_INI_CFG_STATE_LOADED: debug cfg was found and loaded * @IWL_INI_CFG_STATE_CORRUPTED: debug cfg was found and some of the TLVs @@ -532,7 +532,7 @@ enum iwl_ini_cfg_state { #define IWL_TRANS_NMI_TIMEOUT (HZ / 4) /** - * struct iwl_dram_data + * struct iwl_dram_data - DRAM data descriptor * @physical: page phy pointer * @block: pointer to the allocated block/page * @size: size of the block/page From d852e72d9425ef1401442c0226cd7b936c02a0ca Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Mon, 15 Sep 2025 11:34:30 +0300 Subject: [PATCH 313/867] wifi: iwlwifi: mvm: cleanup unsupported phy command versions The iwlmvm op mode is used for pre EHT HWs. Those HWs doesn't have wider OFDMA, so phy command versions 5+ (that added support for wider OFDMA) are not supported. Remove support for them. This means that we also don't need to set the IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW, as we don't care about the ap chandef anyway. Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250915113137.e7e26fe71132.I8ec95ff86521661118782ecee1be20ef6e8e48e1@changeid --- .../wireless/intel/iwlwifi/mvm/mld-mac80211.c | 3 --- .../net/wireless/intel/iwlwifi/mvm/phy-ctxt.c | 24 +++++-------------- 2 files changed, 6 insertions(+), 21 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c index b1dca76b7141e..380b6f8a53fde 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c @@ -102,9 +102,6 @@ static int iwl_mvm_mld_mac_add_interface(struct ieee80211_hw *hw, mvm->csme_vif = vif; } - if (vif->p2p || iwl_fw_lookup_cmd_ver(mvm->fw, PHY_CONTEXT_CMD, 1) < 5) - vif->driver_flags |= IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW; - return 0; out_free_bf: diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c index 5e7e2926be0ce..4f4111055ddd2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2012-2014, 2018-2024 Intel Corporation + * Copyright (C) 2012-2014, 2018-2025 Intel Corporation * Copyright (C) 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2017 Intel Deutschland GmbH */ @@ -202,17 +202,13 @@ int iwl_mvm_phy_send_rlc(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt, static int iwl_mvm_phy_ctxt_apply(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt, const struct cfg80211_chan_def *chandef, - const struct cfg80211_chan_def *ap, u8 chains_static, u8 chains_dynamic, u32 action) { int ret; int ver = iwl_fw_lookup_cmd_ver(mvm->fw, PHY_CONTEXT_CMD, 1); - if (ver < 5 || !ap || !ap->chan) - ap = NULL; - - if (ver >= 3 && ver <= 6) { + if (ver >= 3 && ver <= 4) { struct iwl_phy_context_cmd cmd = {}; /* Set the command header fields */ @@ -223,14 +219,6 @@ static int iwl_mvm_phy_ctxt_apply(struct iwl_mvm *mvm, chains_static, chains_dynamic); - if (ap) { - cmd.sbb_bandwidth = iwl_mvm_get_channel_width(ap); - cmd.sbb_ctrl_channel_loc = iwl_mvm_get_ctrl_pos(ap); - } - - if (ver == 6) - cmd.puncture_mask = cpu_to_le16(chandef->punctured); - ret = iwl_mvm_send_cmd_pdu(mvm, PHY_CONTEXT_CMD, 0, sizeof(cmd), &cmd); } else if (ver < 3) { @@ -284,7 +272,7 @@ int iwl_mvm_phy_ctxt_add(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt, ctxt->width = chandef->width; ctxt->center_freq1 = chandef->center_freq1; - ret = iwl_mvm_phy_ctxt_apply(mvm, ctxt, chandef, ap, + ret = iwl_mvm_phy_ctxt_apply(mvm, ctxt, chandef, chains_static, chains_dynamic, FW_CTXT_ACTION_ADD); @@ -342,7 +330,7 @@ int iwl_mvm_phy_ctxt_changed(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt, int ret; /* ... remove it here ...*/ - ret = iwl_mvm_phy_ctxt_apply(mvm, ctxt, chandef, NULL, + ret = iwl_mvm_phy_ctxt_apply(mvm, ctxt, chandef, chains_static, chains_dynamic, FW_CTXT_ACTION_REMOVE); if (ret) @@ -356,7 +344,7 @@ int iwl_mvm_phy_ctxt_changed(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt, ctxt->width = chandef->width; ctxt->center_freq1 = chandef->center_freq1; - return iwl_mvm_phy_ctxt_apply(mvm, ctxt, chandef, ap, + return iwl_mvm_phy_ctxt_apply(mvm, ctxt, chandef, chains_static, chains_dynamic, action); } @@ -376,7 +364,7 @@ void iwl_mvm_phy_ctxt_unref(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt) cfg80211_chandef_create(&chandef, ctxt->channel, NL80211_CHAN_NO_HT); - iwl_mvm_phy_ctxt_apply(mvm, ctxt, &chandef, NULL, 1, 1, + iwl_mvm_phy_ctxt_apply(mvm, ctxt, &chandef, 1, 1, FW_CTXT_ACTION_REMOVE); } From 355431679a910eedc3d1a01cfffaec590a02033b Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 15 Sep 2025 11:34:31 +0300 Subject: [PATCH 314/867] wifi: iwlwifi: mld: support get/set_antenna This allows to set the antennas from user space. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250915113137.5a45baf9513c.I5912e6b6d9a9ae6530d0ac45e9517d07f98b8d05@changeid --- .../net/wireless/intel/iwlwifi/mld/mac80211.c | 34 +++++++++++++++++++ drivers/net/wireless/intel/iwlwifi/mld/mld.h | 11 ++++++ 2 files changed, 45 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c index 5725104a53bf0..98d47fed84215 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c @@ -23,6 +23,7 @@ #include "roc.h" #include "mlo.h" #include "stats.h" +#include "iwl-nvm-parse.h" #include "ftm-initiator.h" #include "low_latency.h" #include "fw/api/scan.h" @@ -2591,11 +2592,44 @@ iwl_mld_can_neg_ttlm(struct ieee80211_hw *hw, struct ieee80211_vif *vif, return NEG_TTLM_RES_ACCEPT; } +static int iwl_mld_get_antenna(struct ieee80211_hw *hw, int radio_idx, + u32 *tx_ant, u32 *rx_ant) +{ + struct iwl_mld *mld = IWL_MAC80211_GET_MLD(hw); + + *tx_ant = iwl_mld_get_valid_tx_ant(mld); + *rx_ant = iwl_mld_get_valid_rx_ant(mld); + + return 0; +} + +static int iwl_mld_set_antenna(struct ieee80211_hw *hw, int radio_idx, + u32 tx_ant, u32 rx_ant) +{ + struct iwl_mld *mld = IWL_MAC80211_GET_MLD(hw); + + if (WARN_ON(!mld->nvm_data)) + return -EBUSY; + + /* mac80211 ensures the device is not started, + * so the firmware cannot be running + */ + + mld->set_tx_ant = tx_ant; + mld->set_rx_ant = rx_ant; + + iwl_reinit_cab(mld->trans, mld->nvm_data, tx_ant, rx_ant, mld->fw); + + return 0; +} + const struct ieee80211_ops iwl_mld_hw_ops = { .tx = iwl_mld_mac80211_tx, .start = iwl_mld_mac80211_start, .stop = iwl_mld_mac80211_stop, .config = iwl_mld_mac80211_config, + .get_antenna = iwl_mld_get_antenna, + .set_antenna = iwl_mld_set_antenna, .add_interface = iwl_mld_mac80211_add_interface, .remove_interface = iwl_mld_mac80211_remove_interface, .conf_tx = iwl_mld_mac80211_conf_tx, diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.h b/drivers/net/wireless/intel/iwlwifi/mld/mld.h index 94dc9da6360dc..b1d44fdaa61b0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.h @@ -180,6 +180,8 @@ * @mcast_filter_cmd: pointer to the multicast filter command. * @mgmt_tx_ant: stores the last TX antenna index; used for setting * TX rate_n_flags for non-STA mgmt frames (toggles on every TX failure). + * @set_tx_ant: stores the last TX antenna bitmask set by user space (if any) + * @set_rx_ant: stores the last RX antenna bitmask set by user space (if any) * @fw_rates_ver_3: FW rates are in version 3 * @low_latency: low-latency manager. * @tzone: thermal zone device's data @@ -279,6 +281,9 @@ struct iwl_mld { u8 mgmt_tx_ant; + u8 set_tx_ant; + u8 set_rx_ant; + bool fw_rates_ver_3; struct iwl_mld_low_latency low_latency; @@ -374,6 +379,9 @@ static inline u8 iwl_mld_get_valid_tx_ant(const struct iwl_mld *mld) if (mld->nvm_data && mld->nvm_data->valid_tx_ant) tx_ant &= mld->nvm_data->valid_tx_ant; + if (mld->set_tx_ant) + tx_ant &= mld->set_tx_ant; + return tx_ant; } @@ -384,6 +392,9 @@ static inline u8 iwl_mld_get_valid_rx_ant(const struct iwl_mld *mld) if (mld->nvm_data && mld->nvm_data->valid_rx_ant) rx_ant &= mld->nvm_data->valid_rx_ant; + if (mld->set_rx_ant) + rx_ant &= mld->set_rx_ant; + return rx_ant; } From d676e01357682c966b5e135aa3359fd90be85be9 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Mon, 15 Sep 2025 11:34:32 +0300 Subject: [PATCH 315/867] wifi: iwlwifi: mld: set wiphy::iftype_ext_capab dynamically Instead of having a static const array for each possible combination of features, build the extended capabilities dynamically. With this we will also stop setting EHT capabilities when it might actually be disabled. Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250915113137.b3c03b56d5a3.I38eaf8ebaf3256e78b4643bef7e3a54aeb4989df@changeid --- .../wireless/intel/iwlwifi/mld/constants.h | 2 + .../net/wireless/intel/iwlwifi/mld/mac80211.c | 63 ++++++++++--------- drivers/net/wireless/intel/iwlwifi/mld/mld.h | 4 ++ 3 files changed, 39 insertions(+), 30 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/constants.h b/drivers/net/wireless/intel/iwlwifi/mld/constants.h index 49accf96f44b6..5d23a618ae3ca 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/constants.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/constants.h @@ -75,5 +75,7 @@ #define IWL_MLD_FTM_RESP_LMR_FEEDBACK_SUPPORT true #define IWL_MLD_FTM_NON_TB_MIN_TIME_BETWEEN_MSR 7 #define IWL_MLD_FTM_NON_TB_MAX_TIME_BETWEEN_MSR 1000 +#define IWL_MLD_STA_EXT_CAPA_SIZE 9 +#define IWL_MLD_EXT_CAPA_NUM_IFTYPES 1 #endif /* __iwl_mld_constants_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c index 98d47fed84215..5323c73ac8277 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c @@ -76,13 +76,12 @@ iwl_mld_iface_combinations[] = { }, }; -static const u8 if_types_ext_capa_sta[] = { - [0] = WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING, - [2] = WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT, - [7] = WLAN_EXT_CAPA8_OPMODE_NOTIF | - WLAN_EXT_CAPA8_MAX_MSDU_IN_AMSDU_LSB, - [8] = WLAN_EXT_CAPA9_MAX_MSDU_IN_AMSDU_MSB, - [9] = WLAN_EXT_CAPA10_TWT_REQUESTER_SUPPORT, +static const u8 ext_capa_base[IWL_MLD_STA_EXT_CAPA_SIZE] = { + [0] = WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING, + [2] = WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT, + [7] = WLAN_EXT_CAPA8_OPMODE_NOTIF | + WLAN_EXT_CAPA8_MAX_MSDU_IN_AMSDU_LSB, + [8] = WLAN_EXT_CAPA9_MAX_MSDU_IN_AMSDU_MSB, }; #define IWL_MLD_EMLSR_CAPA (IEEE80211_EML_CAP_EMLSR_SUPP | \ @@ -95,18 +94,6 @@ static const u8 if_types_ext_capa_sta[] = { IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP_SAME) | \ IEEE80211_MLD_CAP_OP_LINK_RECONF_SUPPORT) -static const struct wiphy_iftype_ext_capab iftypes_ext_capa[] = { - { - .iftype = NL80211_IFTYPE_STATION, - .extended_capabilities = if_types_ext_capa_sta, - .extended_capabilities_mask = if_types_ext_capa_sta, - .extended_capabilities_len = sizeof(if_types_ext_capa_sta), - /* relevant only if EHT is supported */ - .eml_capabilities = IWL_MLD_EMLSR_CAPA, - .mld_capa_and_ops = IWL_MLD_CAPA_OPS, - }, -}; - static void iwl_mld_hw_set_addresses(struct iwl_mld *mld) { struct wiphy *wiphy = mld->wiphy; @@ -336,21 +323,37 @@ static void iwl_mac_hw_set_wiphy(struct iwl_mld *mld) if (fw_has_capa(ucode_capa, IWL_UCODE_TLV_CAPA_PROTECTED_TWT)) wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_PROTECTED_TWT); - wiphy->iftype_ext_capab = NULL; - wiphy->num_iftype_ext_capab = 0; - - if (!iwlwifi_mod_params.disable_11ax) { - wiphy->iftype_ext_capab = iftypes_ext_capa; - wiphy->num_iftype_ext_capab = ARRAY_SIZE(iftypes_ext_capa); - - ieee80211_hw_set(hw, SUPPORTS_MULTI_BSSID); - ieee80211_hw_set(hw, SUPPORTS_ONLY_HE_MULTI_BSSID); - } - if (iwlmld_mod_params.power_scheme != IWL_POWER_SCHEME_CAM) wiphy->flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT; else wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT; + + /* We are done for non-HE */ + if (iwlwifi_mod_params.disable_11ax) + return; + + ieee80211_hw_set(hw, SUPPORTS_MULTI_BSSID); + ieee80211_hw_set(hw, SUPPORTS_ONLY_HE_MULTI_BSSID); + + wiphy->iftype_ext_capab = mld->ext_capab; + wiphy->num_iftype_ext_capab = ARRAY_SIZE(mld->ext_capab); + + BUILD_BUG_ON(sizeof(mld->sta_ext_capab) < sizeof(ext_capa_base)); + + memcpy(mld->sta_ext_capab, ext_capa_base, sizeof(ext_capa_base)); + + mld->ext_capab[0].iftype = NL80211_IFTYPE_STATION; + mld->ext_capab[0].extended_capabilities = mld->sta_ext_capab; + mld->ext_capab[0].extended_capabilities_mask = mld->sta_ext_capab; + mld->ext_capab[0].extended_capabilities_len = sizeof(mld->sta_ext_capab); + + if (!mld->nvm_data->sku_cap_11be_enable || + iwlwifi_mod_params.disable_11be) + return; + + mld->ext_capab[0].eml_capabilities = IWL_MLD_EMLSR_CAPA; + mld->ext_capab[0].mld_capa_and_ops = IWL_MLD_CAPA_OPS; + } static void iwl_mac_hw_set_misc(struct iwl_mld *mld) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.h b/drivers/net/wireless/intel/iwlwifi/mld/mld.h index b1d44fdaa61b0..ceda12c1672d9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.h @@ -134,6 +134,8 @@ * @fw: a pointer to the fw object * @hw: pointer to the hw object. * @wiphy: a pointer to the wiphy struct, for easier access to it. + * @ext_capab: extended capabilities that will be set to wiphy on registration. + * @sta_ext_capab: extended capabilities for the station interface. * @nvm_data: pointer to the nvm_data that includes all our capabilities * @fwrt: fw runtime data * @debugfs_dir: debugfs directory @@ -227,6 +229,8 @@ struct iwl_mld { const struct iwl_fw *fw; struct ieee80211_hw *hw; struct wiphy *wiphy; + struct wiphy_iftype_ext_capab ext_capab[IWL_MLD_EXT_CAPA_NUM_IFTYPES]; + u8 sta_ext_capab[IWL_MLD_STA_EXT_CAPA_SIZE]; struct iwl_nvm_data *nvm_data; struct iwl_fw_runtime fwrt; struct dentry *debugfs_dir; From 92e87cee465cb5c293aa0a88c4cb5fe5e618583c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 19 Oct 2025 11:45:02 +0300 Subject: [PATCH 316/867] wifi: iwlwifi: mld: update to new sniffer API This will break current sniffer functionality for firmware versions that don't have the new API, but supporting both would be very complex. Convert the code to use only the new sniffer notification. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251019114304.85b75a084a2f.I4a18b2043703c1f9a8f55c108dcaaeca7891e19c@changeid --- .../wireless/intel/iwlwifi/fw/api/datapath.h | 5 + .../net/wireless/intel/iwlwifi/fw/api/rx.h | 286 +++ .../net/wireless/intel/iwlwifi/mld/mac80211.c | 6 +- drivers/net/wireless/intel/iwlwifi/mld/mld.c | 1 + drivers/net/wireless/intel/iwlwifi/mld/mld.h | 10 +- .../net/wireless/intel/iwlwifi/mld/notif.c | 4 +- drivers/net/wireless/intel/iwlwifi/mld/rx.c | 1649 +++++++++-------- drivers/net/wireless/intel/iwlwifi/mld/rx.h | 5 +- 8 files changed, 1171 insertions(+), 795 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h b/drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h index b1c6ee8ae2dfd..6a6e11a57dbf3 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h @@ -123,6 +123,11 @@ enum iwl_data_path_subcmd_ids { */ BEACON_FILTER_IN_NOTIF = 0xF8, + /** + * @PHY_AIR_SNIFFER_NOTIF: &struct iwl_rx_phy_air_sniffer_ntfy + */ + PHY_AIR_SNIFFER_NOTIF = 0xF9, + /** * @STA_PM_NOTIF: &struct iwl_mvm_pm_state_notification */ diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h index d751789998ac8..3ed7e0807b90b 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h @@ -262,6 +262,7 @@ enum iwl_rx_mpdu_reorder_data { }; enum iwl_rx_mpdu_phy_info { + IWL_RX_MPDU_PHY_EOF_INDICATION = BIT(0), IWL_RX_MPDU_PHY_AMPDU = BIT(5), IWL_RX_MPDU_PHY_AMPDU_TOGGLE = BIT(6), IWL_RX_MPDU_PHY_SHORT_PREAMBLE = BIT(7), @@ -1041,4 +1042,289 @@ struct iwl_beacon_filter_notif { __le32 link_id; } __packed; /* BEACON_FILTER_IN_NTFY_API_S_VER_2 */ +union iwl_legacy_sig { +#define OFDM_RX_LEGACY_LENGTH 0x00000fff +#define OFDM_RX_RATE 0x0000f000 + __le32 ofdm; +#define CCK_CRFR_SHORT_PREAMBLE 0x00000040 + __le32 cck; +}; + +struct iwl_ht_sigs { +#define OFDM_RX_FRAME_HT_MCS 0x0000007f +#define OFDM_RX_FRAME_HT_BANDWIDTH 0x00000080 +#define OFDM_RX_FRAME_HT_LENGTH 0x03ffff00 + __le32 a1; + __le32 a2; +}; + +struct iwl_vht_sigs { +#define OFDM_RX_FRAME_VHT_NUM_OF_DATA_SYM 0x000007ff +#define OFDM_RX_FRAME_VHT_NUM_OF_DATA_SYM_VALID 0x80000000 + __le32 a0; + __le32 a1, a2; +}; + +struct iwl_he_sigs { +#define OFDM_RX_FRAME_HE_BEAM_CHANGE 0x00000001 +#define OFDM_RX_FRAME_HE_UL_FLAG 0x00000002 +#define OFDM_RX_FRAME_HE_MCS 0x0000003c +#define OFDM_RX_FRAME_HE_DCM 0x00000040 +#define OFDM_RX_FRAME_HE_BSS_COLOR 0x00001f80 +#define OFDM_RX_FRAME_HE_SPATIAL_REUSE 0x0001e000 +#define OFDM_RX_FRAME_HE_BANDWIDTH 0x00060000 +#define OFDM_RX_FRAME_HE_SU_EXT_BW10 0x00080000 +#define OFDM_RX_FRAME_HE_GI_LTF_TYPE 0x00700000 +#define OFDM_RX_FRAME_HE_NSTS 0x03800000 +#define OFDM_RX_FRAME_HE_PRMBL_PUNC_TYPE 0x0c000000 + __le32 a1; +#define OFDM_RX_FRAME_HE_TXOP_DURATION 0x0000007f +#define OFDM_RX_FRAME_HE_CODING 0x00000080 +#define OFDM_RX_FRAME_HE_CODING_EXTRA_SYM 0x00000100 +#define OFDM_RX_FRAME_HE_STBC 0x00000200 +#define OFDM_RX_FRAME_HE_BF 0x00000400 +#define OFDM_RX_FRAME_HE_PRE_FEC_PAD_FACTOR 0x00001800 +#define OFDM_RX_FRAME_HE_PE_DISAMBIG 0x00002000 +#define OFDM_RX_FRAME_HE_DOPPLER 0x00004000 +#define OFDM_RX_FRAME_HE_TYPE 0x00038000 +#define OFDM_RX_FRAME_HE_MU_NUM_OF_SIGB_SYM_OR_USER_NUM 0x003c0000 +#define OFDM_RX_FRAME_HE_MU_SIGB_COMP 0x00400000 +#define OFDM_RX_FRAME_HE_MU_NUM_OF_LTF_SYM 0x03800000 + __le32 a2; +#define OFDM_RX_FRAME_HE_NUM_OF_DATA_SYM 0x000007ff +#define OFDM_RX_FRAME_HE_PE_DURATION 0x00003800 +#define OFDM_RX_FRAME_HE_NUM_OF_DATA_SYM_VALID 0x80000000 + __le32 a3; +#define OFDM_RX_FRAME_HE_SIGB_STA_ID_FOUND 0x00000001 +#define OFDM_RX_FRAME_HE_SIGB_STA_ID_INDX 0x0000000e +#define OFDM_RX_FRAME_HE_SIGB_NSTS 0x00000070 +#define OFDM_RX_FRAME_HE_SIGB_BF 0x00000080 +#define OFDM_RX_FRAME_HE_SIGB_MCS 0x00000f00 +#define OFDM_RX_FRAME_HE_SIGB_DCM 0x00001000 +#define OFDM_RX_FRAME_HE_SIGB_CODING 0x00002000 +#define OFDM_RX_FRAME_HE_SIGB_SPATIAL_CONFIG 0x0003c000 +#define OFDM_RX_FRAME_HE_SIGB_STA_RU 0x03fc0000 +#define OFDM_RX_FRAME_HE_SIGB_NUM_OF_SYM 0x3c000000 +#define OFDM_RX_FRAME_HE_SIGB_CRC_OK 0x40000000 + __le32 b; +/* index 0 */ +#define OFDM_RX_FRAME_HE_RU_ALLOC_0_A1 0x000000ff +#define OFDM_RX_FRAME_HE_RU_ALLOC_0_A2 0x0000ff00 +#define OFDM_RX_FRAME_HE_RU_ALLOC_0_B1 0x00ff0000 +#define OFDM_RX_FRAME_HE_RU_ALLOC_0_B2 0xff000000 +/* index 1 */ +#define OFDM_RX_FRAME_HE_RU_ALLOC_1_C1 0x000000ff +#define OFDM_RX_FRAME_HE_RU_ALLOC_1_C2 0x0000ff00 +#define OFDM_RX_FRAME_HE_RU_ALLOC_1_D1 0x00ff0000 +#define OFDM_RX_FRAME_HE_RU_ALLOC_1_D2 0xff000000 +/* index 2 */ +#define OFDM_RX_FRAME_HE_CENTER_RU_CC1 0x00000001 +#define OFDM_RX_FRAME_HE_CENTER_RU_CC2 0x00000002 +#define OFDM_RX_FRAME_HE_COMMON_CC1_CRC_OK 0x00000004 +#define OFDM_RX_FRAME_HE_COMMON_CC2_CRC_OK 0x00000008 + __le32 cmn[3]; +}; + +struct iwl_he_tb_sigs { +#define OFDM_RX_HE_TRIG_FORMAT 0x00000001 +#define OFDM_RX_HE_TRIG_BSS_COLOR 0x0000007e +#define OFDM_RX_HE_TRIG_SPATIAL_REUSE_1 0x00000780 +#define OFDM_RX_HE_TRIG_SPATIAL_REUSE_2 0x00007800 +#define OFDM_RX_HE_TRIG_SPATIAL_REUSE_3 0x00078000 +#define OFDM_RX_HE_TRIG_SPATIAL_REUSE_4 0x00780000 +#define OFDM_RX_HE_TRIG_BANDWIDTH 0x03000000 + __le32 a1; +#define OFDM_RX_HE_TRIG_TXOP_DURATION 0x0000007f +#define OFDM_RX_HE_TRIG_SIG2_RESERVED 0x0000ff80 +#define OFDM_RX_HE_TRIG_FORMAT_ERR 0x08000000 +#define OFDM_RX_HE_TRIG_BW_ERR 0x10000000 +#define OFDM_RX_HE_TRIG_LEGACY_LENGTH_ERR 0x20000000 +#define OFDM_RX_HE_TRIG_CRC_OK 0x40000000 + __le32 a2; +#define OFDM_UCODE_TRIG_BASE_RX_LGCY_LENGTH 0x00000fff +#define OFDM_UCODE_TRIG_BASE_RX_BANDWIDTH 0x00007000 +#define OFDM_UCODE_TRIG_BASE_PS160 0x00008000 +#define OFDM_UCODE_EHT_TRIG_CONTROL_CHANNEL 0x000f0000 + __le32 tb_rx0; +#define OFDM_UCODE_TRIG_BASE_RX_MCS 0x0000000f +#define OFDM_UCODE_TRIG_BASE_RX_DCM 0x00000010 +#define OFDM_UCODE_TRIG_BASE_RX_GI_LTF_TYPE 0x00000060 +#define OFDM_UCODE_TRIG_BASE_RX_NSTS 0x00000380 +#define OFDM_UCODE_TRIG_BASE_RX_CODING 0x00000400 +#define OFDM_UCODE_TRIG_BASE_RX_CODING_EXTRA_SYM 0x00000800 +#define OFDM_UCODE_TRIG_BASE_RX_STBC 0x00001000 +#define OFDM_UCODE_TRIG_BASE_RX_PRE_FEC_PAD_FACTOR 0x00006000 +#define OFDM_UCODE_TRIG_BASE_RX_PE_DISAMBIG 0x00008000 +#define OFDM_UCODE_TRIG_BASE_RX_DOPPLER 0x00010000 +#define OFDM_UCODE_TRIG_BASE_RX_RU 0x01fe0000 +#define OFDM_UCODE_TRIG_BASE_RX_RU_P80 0x00020000 +#define OFDM_UCODE_TRIG_BASE_RX_NUM_OF_LTF_SYM 0x0e000000 +#define OFDM_UCODE_TRIG_BASE_RX_LTF_PILOT_TYPE 0x10000000 +#define OFDM_UCODE_TRIG_BASE_RX_LOWEST_SS_ALLOCATION 0xe0000000 + __le32 tb_rx1; +}; + +struct iwl_eht_sigs { +#define OFDM_RX_FRAME_ENHANCED_WIFI_VER_ID 0x00000007 +#define OFDM_RX_FRAME_ENHANCED_WIFI_BANDWIDTH 0x00000038 +#define OFDM_RX_FRAME_ENHANCED_WIFI_UL_FLAG 0x00000040 +#define OFDM_RX_FRAME_ENHANCED_WIFI_BSS_COLOR 0x00001f80 +#define OFDM_RX_FRAME_ENHANCED_WIFI_TXOP_DURATION 0x000fe000 +#define OFDM_RX_FRAME_EHT_USIG1_DISREGARD 0x01f00000 +#define OFDM_RX_FRAME_EHT_USIG1_VALIDATE 0x02000000 +#define OFDM_RX_FRAME_EHT_BW320_SLOT 0x04000000 +#define OFDM_RX_FRAME_EHT_TYPE 0x18000000 +#define OFDM_RX_FRAME_ENHANCED_ER_NO_STREAMS 0x20000000 + __le32 usig_a1; +#define OFDM_RX_FRAME_EHT_PPDU_TYPE 0x00000003 +#define OFDM_RX_FRAME_EHT_USIG2_VALIDATE_B2 0x00000004 +#define OFDM_RX_FRAME_EHT_PUNC_CHANNEL 0x000000f8 +#define OFDM_RX_FRAME_EHT_USIG2_VALIDATE_B8 0x00000100 +#define OFDM_RX_FRAME_EHT_SIG_MCS 0x00000600 +#define OFDM_RX_FRAME_EHT_SIG_SYM_NUM 0x0000f800 +#define OFDM_RX_FRAME_EHT_TRIG_SPATIAL_REUSE_1 0x000f0000 +#define OFDM_RX_FRAME_EHT_TRIG_SPATIAL_REUSE_2 0x00f00000 +#define OFDM_RX_FRAME_EHT_TRIG_USIG2_DISREGARD 0x1f000000 +#define OFDM_RX_FRAME_EHT_TRIG_NO_STREAMS 0x20000000 +#define OFDM_RX_USIG_CRC_OK 0x40000000 + __le32 usig_a2_eht; +#define OFDM_RX_FRAME_EHT_SPATIAL_REUSE 0x0000000f +#define OFDM_RX_FRAME_EHT_GI_LTF_TYPE 0x00000030 +#define OFDM_RX_FRAME_EHT_NUM_OF_LTF_SYM 0x000001c0 +#define OFDM_RX_FRAME_EHT_CODING_EXTRA_SYM 0x00000200 +#define OFDM_RX_FRAME_EHT_PRE_FEC_PAD_FACTOR 0x00000c00 +#define OFDM_RX_FRAME_EHT_PE_DISAMBIG 0x00001000 +#define OFDM_RX_FRAME_EHT_USIG_OVF_DISREGARD 0x0001e000 +#define OFDM_RX_FRAME_EHT_NUM_OF_USERS 0x000e0000 +#define OFDM_RX_FRAME_EHT_NSTS 0x00f00000 +#define OFDM_RX_FRAME_EHT_BF 0x01000000 +#define OFDM_RX_FRAME_EHT_USIG_OVF_NDP_DISREGARD 0x06000000 +#define OFDM_RX_FRAME_EHTSIG_COMM_CC1_CRC_OK 0x08000000 +#define OFDM_RX_FRAME_EHTSIG_COMM_CC2_CRC_OK 0x10000000 +#define OFDM_RX_FRAME_EHT_NON_VALID_RU_ALLOC 0x20000000 +#define OFDM_RX_FRAME_EHT_NO_STREAMS 0x40000000 + __le32 b1; +#define OFDM_RX_FRAME_EHT_MATCH_ID_FOUND 0x00000001 +#define OFDM_RX_FRAME_EHT_ID_INDX 0x0000000e +#define OFDM_RX_FRAME_EHT_MCS 0x000000f0 +#define OFDM_RX_FRAME_EHT_CODING 0x00000100 +#define OFDM_RX_FRAME_EHT_SPATIAL_CONFIG 0x00007e00 +#define OFDM_RX_FRAME_EHT_STA_RU 0x007f8000 +#define OFDM_RX_FRAME_EHT_STA_RU_P80 0x00008000 +#define OFDM_RX_FRAME_EHT_STA_RU_PS160 0x00800000 +#define OFDM_RX_FRAME_EHT_USER_FIELD_CRC_OK 0x40000000 + __le32 b2; +#define OFDM_RX_FRAME_EHT_NUM_OF_DATA_SYM 0x000007ff +#define OFDM_RX_FRAME_EHT_PE_DURATION 0x00003800 +#define OFDM_RX_FRAME_EHT_NUM_OF_DATA_SYM_VALID 0x80000000 + __le32 sig2; +#define OFDM_RX_FRAME_EHT_RU_ALLOC_0_A1 0x000001ff +#define OFDM_RX_FRAME_EHT_RU_ALLOC_0_A2 0x0003fe00 +#define OFDM_RX_FRAME_EHT_RU_ALLOC_0_A3 0x07fc0000 +#define OFDM_RX_FRAME_EHT_RU_ALLOC_1_B1 0x000001ff +#define OFDM_RX_FRAME_EHT_RU_ALLOC_1_B2 0x0003fe00 +#define OFDM_RX_FRAME_EHT_RU_ALLOC_1_B3 0x07fc0000 +#define OFDM_RX_FRAME_EHT_RU_ALLOC_2_C1 0x000001ff +#define OFDM_RX_FRAME_EHT_RU_ALLOC_2_C2 0x0003fe00 +#define OFDM_RX_FRAME_EHT_RU_ALLOC_2_C3 0x07fc0000 +#define OFDM_RX_FRAME_EHT_RU_ALLOC_3_D1 0x000001ff +#define OFDM_RX_FRAME_EHT_RU_ALLOC_3_D2 0x0003fe00 +#define OFDM_RX_FRAME_EHT_RU_ALLOC_3_D3 0x07fc0000 +#define OFDM_RX_FRAME_EHT_RU_ALLOC_4_A4 0x000001ff +#define OFDM_RX_FRAME_EHT_RU_ALLOC_4_B4 0x0003fe00 +#define OFDM_RX_FRAME_EHT_RU_ALLOC_5_C4 0x000001ff +#define OFDM_RX_FRAME_EHT_RU_ALLOC_5_D4 0x0003fe00 + __le32 cmn[6]; +#define OFDM_RX_FRAME_EHT_USER_FIELD_ID 0x000007ff + __le32 user_id; +}; + +struct iwl_eht_tb_sigs { + /* same as non-TB above */ + __le32 usig_a1, usig_a2_eht; + /* same as HE TB above */ + __le32 tb_rx0, tb_rx1; +}; + +struct iwl_uhr_sigs { + __le32 usig_a1, usig_a1_uhr, usig_a2_uhr, b1, b2; + __le32 sig2; + __le32 cmn[6]; + __le32 user_id; +}; + +struct iwl_uhr_tb_sigs { + __le32 usig_a1, usig_a2_uhr, tb_rx0, tb_rx1; +}; + +struct iwl_uhr_elr_sigs { + __le32 usig_a1, usig_a2_uhr; + __le32 uhr_sig_elr1, uhr_sig_elr2; +}; + +union iwl_sigs { + struct iwl_ht_sigs ht; + struct iwl_vht_sigs vht; + struct iwl_he_sigs he; + struct iwl_he_tb_sigs he_tb; + struct iwl_eht_sigs eht; + struct iwl_eht_tb_sigs eht_tb; + struct iwl_uhr_sigs uhr; + struct iwl_uhr_tb_sigs uhr_tb; + struct iwl_uhr_elr_sigs uhr_elr; +}; + +enum iwl_sniffer_status { + IWL_SNIF_STAT_PLCP_RX_OK = 0, + IWL_SNIF_STAT_AID_NOT_FOR_US = 1, + IWL_SNIF_STAT_PLCP_RX_LSIG_ERR = 2, + IWL_SNIF_STAT_PLCP_RX_SIGA_ERR = 3, + IWL_SNIF_STAT_PLCP_RX_SIGB_ERR = 4, + IWL_SNIF_STAT_UNEXPECTED_TB = 5, + IWL_SNIF_STAT_UNSUPPORTED_RATE = 6, + IWL_SNIF_STAT_UNKNOWN_ERROR = 7, +}; /* AIR_SNIFFER_STATUS_E_VER_1 */ + +enum iwl_sniffer_flags { + IWL_SNIF_FLAG_VALID_TB_RX = BIT(0), + IWL_SNIF_FLAG_VALID_RU = BIT(1), +}; /* AIR_SNIFFER_FLAGS_E_VER_1 */ + +/** + * struct iwl_rx_phy_air_sniffer_ntfy - air sniffer notification + * + * @status: &enum iwl_sniffer_status + * @flags: &enum iwl_sniffer_flags + * @reserved1: reserved + * @rssi_a: energy chain-A in negative dBm, measured at FINA time + * @rssi_b: energy chain-B in negative dBm, measured at FINA time + * @channel: channel number + * @band: band information, PHY_BAND_* + * @on_air_rise_time: GP2 at on air rise + * @frame_time: frame time in us + * @rate: RATE_MCS_* + * @bytecount: byte count for legay and HT, otherwise number of symbols + * @legacy_sig: CCK signal information if %RATE_MCS_MOD_TYPE_MSK in @rate is + * %RATE_MCS_MOD_TYPE_CCK, otherwise OFDM signal information + * @sigs: PHY signal information, depending on %RATE_MCS_MOD_TYPE_MSK in @rate + * @reserved2: reserved + * + * Sent for every frame and before the normal RX command if data is included. + */ +struct iwl_rx_phy_air_sniffer_ntfy { + u8 status; + u8 flags; + u8 reserved1[2]; + u8 rssi_a, rssi_b; + u8 channel, band; + __le32 on_air_rise_time; + __le32 frame_time; + /* note: MCS in rate is not valid for MU-VHT */ + __le32 rate; + __le32 bytecount; + union iwl_legacy_sig legacy_sig; + union iwl_sigs sigs; + __le32 reserved2; +}; /* RX_PHY_AIR_SNIFFER_NTFY_API_S_VER_1 */ + #endif /* __iwl_fw_api_rx_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c index 5323c73ac8277..55b484c162807 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c @@ -397,11 +397,9 @@ static int iwl_mld_hw_verify_preconditions(struct iwl_mld *mld) TLC_MNG_UPDATE_NOTIF, 0) >= 4) + (iwl_fw_lookup_notif_ver(mld->fw, LEGACY_GROUP, REPLY_RX_MPDU_CMD, 0) >= 6) + - (iwl_fw_lookup_notif_ver(mld->fw, DATA_PATH_GROUP, - RX_NO_DATA_NOTIF, 0) >= 4) + (iwl_fw_lookup_notif_ver(mld->fw, LONG_GROUP, TX_CMD, 0) >= 9); - if (ratecheck != 0 && ratecheck != 5) { + if (ratecheck != 0 && ratecheck != 4) { IWL_ERR(mld, "Firmware has inconsistent rates\n"); return -EINVAL; } @@ -684,6 +682,8 @@ void iwl_mld_mac80211_remove_interface(struct ieee80211_hw *hw, #endif iwl_mld_rm_vif(mld, vif); + + mld->monitor.phy.valid = false; } struct iwl_mld_mc_iter_data { diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.c b/drivers/net/wireless/intel/iwlwifi/mld/mld.c index a6962256bdd12..8a4c96385640b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.c @@ -259,6 +259,7 @@ static const struct iwl_hcmd_names iwl_mld_data_path_names[] = { HCMD_NAME(MONITOR_NOTIF), HCMD_NAME(TLC_MNG_UPDATE_NOTIF), HCMD_NAME(BEACON_FILTER_IN_NOTIF), + HCMD_NAME(PHY_AIR_SNIFFER_NOTIF), HCMD_NAME(MU_GROUP_MGMT_NOTIF), }; diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.h b/drivers/net/wireless/intel/iwlwifi/mld/mld.h index ceda12c1672d9..22efe8e10f531 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.h @@ -118,7 +118,11 @@ * @monitor.cur_bssid: current bssid tracked by the sniffer * @monitor.ptp_time: set the Rx mactime using the device's PTP clock time * @monitor.p80: primary channel position relative to he whole bandwidth, in - * steps of 80 MHz + * steps of 80 MHz + * @monitor.phy: PHY data information + * @monitor.phy.data: PHY data (&struct iwl_rx_phy_air_sniffer_ntfy) received + * @monitor.phy.valid: PHY data is valid (was received) + * @monitor.phy.used: PHY data was used by an RX * @fw_id_to_link_sta: maps a fw id of a sta to the corresponding * ieee80211_link_sta. This is not cleaned up on restart since we want to * preserve the fw sta ids during a restart (for SN/PN restoring). @@ -209,6 +213,10 @@ struct iwl_mld { u32 ampdu_ref; bool ampdu_toggle; u8 p80; + struct { + struct iwl_rx_phy_air_sniffer_ntfy data; + u8 valid:1, used:1; + } phy; #ifdef CONFIG_IWLWIFI_DEBUGFS __le16 cur_aid; u8 cur_bssid[ETH_ALEN]; diff --git a/drivers/net/wireless/intel/iwlwifi/mld/notif.c b/drivers/net/wireless/intel/iwlwifi/mld/notif.c index 884973d0b3440..4cf3920b005fe 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/notif.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/notif.c @@ -589,8 +589,8 @@ void iwl_mld_rx(struct iwl_op_mode *op_mode, struct napi_struct *napi, else if (unlikely(cmd_id == WIDE_ID(DATA_PATH_GROUP, RX_QUEUES_NOTIFICATION))) iwl_mld_handle_rx_queues_sync_notif(mld, napi, pkt, 0); - else if (cmd_id == WIDE_ID(DATA_PATH_GROUP, RX_NO_DATA_NOTIF)) - iwl_mld_rx_monitor_no_data(mld, napi, pkt, 0); + else if (cmd_id == WIDE_ID(DATA_PATH_GROUP, PHY_AIR_SNIFFER_NOTIF)) + iwl_mld_handle_phy_air_sniffer_notif(mld, napi, pkt); else iwl_mld_rx_notif(mld, rxb, pkt); } diff --git a/drivers/net/wireless/intel/iwlwifi/mld/rx.c b/drivers/net/wireless/intel/iwlwifi/mld/rx.c index 20d866dd92c2e..052a19bb85b45 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/rx.c @@ -18,41 +18,32 @@ /* stores relevant PHY data fields extracted from iwl_rx_mpdu_desc */ struct iwl_mld_rx_phy_data { - enum iwl_rx_phy_info_type info_type; - __le32 data0; - __le32 data1; - __le32 data2; - __le32 data3; - __le32 eht_data4; - __le32 data5; - __le16 data4; + struct iwl_rx_phy_air_sniffer_ntfy *ntfy; bool first_subframe; bool with_data; - __le32 rx_vec[4]; u32 rate_n_flags; u32 gp2_on_air_rise; + /* phy_info is only valid when we have a frame, i.e. with_data=true */ u16 phy_info; u8 energy_a, energy_b; }; static void -iwl_mld_fill_phy_data(struct iwl_mld *mld, - struct iwl_rx_mpdu_desc *desc, - struct iwl_mld_rx_phy_data *phy_data) +iwl_mld_fill_phy_data_from_mpdu(struct iwl_mld *mld, + struct iwl_rx_mpdu_desc *desc, + struct iwl_mld_rx_phy_data *phy_data) { + if (unlikely(mld->monitor.phy.valid)) { + mld->monitor.phy.used = true; + phy_data->ntfy = &mld->monitor.phy.data; + } + phy_data->phy_info = le16_to_cpu(desc->phy_info); phy_data->rate_n_flags = iwl_v3_rate_from_v2_v3(desc->v3.rate_n_flags, mld->fw_rates_ver_3); phy_data->gp2_on_air_rise = le32_to_cpu(desc->v3.gp2_on_air_rise); phy_data->energy_a = desc->v3.energy_a; phy_data->energy_b = desc->v3.energy_b; - phy_data->data0 = desc->v3.phy_data0; - phy_data->data1 = desc->v3.phy_data1; - phy_data->data2 = desc->v3.phy_data2; - phy_data->data3 = desc->v3.phy_data3; - phy_data->data4 = desc->phy_data4; - phy_data->eht_data4 = desc->phy_eht_data4; - phy_data->data5 = desc->v3.phy_data5; phy_data->with_data = true; } @@ -217,26 +208,19 @@ static void iwl_mld_fill_signal(struct iwl_mld *mld, int link_id, } static void -iwl_mld_decode_he_phy_ru_alloc(struct iwl_mld_rx_phy_data *phy_data, - struct ieee80211_radiotap_he *he, - struct ieee80211_radiotap_he_mu *he_mu, - struct ieee80211_rx_status *rx_status) +iwl_mld_he_set_ru_alloc(struct ieee80211_rx_status *rx_status, + struct ieee80211_radiotap_he *he, + u8 ru_with_p80) { - /* Unfortunately, we have to leave the mac80211 data - * incorrect for the case that we receive an HE-MU - * transmission and *don't* have the HE phy data (due - * to the bits being used for TSF). This shouldn't - * happen though as management frames where we need - * the TSF/timers are not be transmitted in HE-MU. - */ - u8 ru = le32_get_bits(phy_data->data1, IWL_RX_PHY_DATA1_HE_RU_ALLOC_MASK); - u32 rate_n_flags = phy_data->rate_n_flags; - u32 he_type = rate_n_flags & RATE_MCS_HE_TYPE_MSK; + u8 ru = ru_with_p80 >> 1; + u8 p80 = ru_with_p80 & 1; u8 offs = 0; rx_status->bw = RATE_INFO_BW_HE_RU; he->data1 |= cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA1_BW_RU_ALLOC_KNOWN); + he->data2 |= cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA2_PRISEC_80_KNOWN | + IEEE80211_RADIOTAP_HE_DATA2_RU_OFFSET_KNOWN); switch (ru) { case 0 ... 36: @@ -266,227 +250,262 @@ iwl_mld_decode_he_phy_ru_alloc(struct iwl_mld_rx_phy_data *phy_data, rx_status->he_ru = NL80211_RATE_INFO_HE_RU_ALLOC_2x996; break; } + he->data2 |= le16_encode_bits(offs, IEEE80211_RADIOTAP_HE_DATA2_RU_OFFSET); - he->data2 |= cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA2_PRISEC_80_KNOWN | - IEEE80211_RADIOTAP_HE_DATA2_RU_OFFSET_KNOWN); - if (phy_data->data1 & cpu_to_le32(IWL_RX_PHY_DATA1_HE_RU_ALLOC_SEC80)) - he->data2 |= - cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA2_PRISEC_80_SEC); - -#define CHECK_BW(bw) \ - BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW_ ## bw ## MHZ != \ - RATE_MCS_CHAN_WIDTH_##bw >> RATE_MCS_CHAN_WIDTH_POS); \ - BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW_ ## bw ## MHZ != \ - RATE_MCS_CHAN_WIDTH_##bw >> RATE_MCS_CHAN_WIDTH_POS) - CHECK_BW(20); - CHECK_BW(40); - CHECK_BW(80); - CHECK_BW(160); - if (he_mu) - he_mu->flags2 |= - le16_encode_bits(u32_get_bits(rate_n_flags, - RATE_MCS_CHAN_WIDTH_MSK), - IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW); - else if (he_type == RATE_MCS_HE_TYPE_TRIG) - he->data6 |= - cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW_KNOWN) | - le16_encode_bits(u32_get_bits(rate_n_flags, - RATE_MCS_CHAN_WIDTH_MSK), - IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW); + he->data2 |= le16_encode_bits(p80, IEEE80211_RADIOTAP_HE_DATA2_PRISEC_80_SEC); } +#define RTAP_ENC_HE(src, src_msk, dst_msk) \ + le16_encode_bits(le32_get_bits(src, src_msk), dst_msk) + static void -iwl_mld_decode_he_mu_ext(struct iwl_mld_rx_phy_data *phy_data, - struct ieee80211_radiotap_he_mu *he_mu) +iwl_mld_decode_he_mu(struct iwl_mld_rx_phy_data *phy_data, + struct ieee80211_radiotap_he *he, + struct ieee80211_radiotap_he_mu *he_mu, + struct ieee80211_rx_status *rx_status) { - u32 phy_data2 = le32_to_cpu(phy_data->data2); - u32 phy_data3 = le32_to_cpu(phy_data->data3); - u16 phy_data4 = le16_to_cpu(phy_data->data4); u32 rate_n_flags = phy_data->rate_n_flags; - if (u32_get_bits(phy_data4, IWL_RX_PHY_DATA4_HE_MU_EXT_CH1_CRC_OK)) { + he_mu->flags1 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he.b, + OFDM_RX_FRAME_HE_SIGB_DCM, + IEEE80211_RADIOTAP_HE_MU_FLAGS1_SIG_B_DCM); + he_mu->flags1 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he.b, + OFDM_RX_FRAME_HE_SIGB_MCS, + IEEE80211_RADIOTAP_HE_MU_FLAGS1_SIG_B_MCS); + he_mu->flags2 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he.a1, + OFDM_RX_FRAME_HE_PRMBL_PUNC_TYPE, + IEEE80211_RADIOTAP_HE_MU_FLAGS2_PUNC_FROM_SIG_A_BW); + he_mu->flags2 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he.a2, + OFDM_RX_FRAME_HE_MU_NUM_OF_SIGB_SYM_OR_USER_NUM, + IEEE80211_RADIOTAP_HE_MU_FLAGS2_SIG_B_SYMS_USERS); + he_mu->flags2 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he.b, + OFDM_RX_FRAME_HE_MU_SIGB_COMP, + IEEE80211_RADIOTAP_HE_MU_FLAGS2_SIG_B_COMP); + + if (phy_data->ntfy->flags & IWL_SNIF_FLAG_VALID_RU && + le32_get_bits(phy_data->ntfy->sigs.he.cmn[2], + OFDM_RX_FRAME_HE_COMMON_CC1_CRC_OK)) { he_mu->flags1 |= cpu_to_le16(IEEE80211_RADIOTAP_HE_MU_FLAGS1_CH1_RU_KNOWN | IEEE80211_RADIOTAP_HE_MU_FLAGS1_CH1_CTR_26T_RU_KNOWN); he_mu->flags1 |= - le16_encode_bits(u32_get_bits(phy_data4, - IWL_RX_PHY_DATA4_HE_MU_EXT_CH1_CTR_RU), - IEEE80211_RADIOTAP_HE_MU_FLAGS1_CH1_CTR_26T_RU); - - he_mu->ru_ch1[0] = u32_get_bits(phy_data2, - IWL_RX_PHY_DATA2_HE_MU_EXT_CH1_RU0); - he_mu->ru_ch1[1] = u32_get_bits(phy_data3, - IWL_RX_PHY_DATA3_HE_MU_EXT_CH1_RU1); - he_mu->ru_ch1[2] = u32_get_bits(phy_data2, - IWL_RX_PHY_DATA2_HE_MU_EXT_CH1_RU2); - he_mu->ru_ch1[3] = u32_get_bits(phy_data3, - IWL_RX_PHY_DATA3_HE_MU_EXT_CH1_RU3); + RTAP_ENC_HE(phy_data->ntfy->sigs.he.cmn[2], + OFDM_RX_FRAME_HE_CENTER_RU_CC1, + IEEE80211_RADIOTAP_HE_MU_FLAGS1_CH1_CTR_26T_RU); + + he_mu->ru_ch1[0] = le32_get_bits(phy_data->ntfy->sigs.he.cmn[0], + OFDM_RX_FRAME_HE_RU_ALLOC_0_A1); + he_mu->ru_ch1[1] = le32_get_bits(phy_data->ntfy->sigs.he.cmn[1], + OFDM_RX_FRAME_HE_RU_ALLOC_1_C1); + he_mu->ru_ch1[2] = le32_get_bits(phy_data->ntfy->sigs.he.cmn[0], + OFDM_RX_FRAME_HE_RU_ALLOC_0_A2); + he_mu->ru_ch1[3] = le32_get_bits(phy_data->ntfy->sigs.he.cmn[1], + OFDM_RX_FRAME_HE_RU_ALLOC_1_C2); } - if (u32_get_bits(phy_data4, IWL_RX_PHY_DATA4_HE_MU_EXT_CH2_CRC_OK) && + if (phy_data->ntfy->flags & IWL_SNIF_FLAG_VALID_RU && + le32_get_bits(phy_data->ntfy->sigs.he.cmn[2], + OFDM_RX_FRAME_HE_COMMON_CC2_CRC_OK) && (rate_n_flags & RATE_MCS_CHAN_WIDTH_MSK) != RATE_MCS_CHAN_WIDTH_20) { he_mu->flags1 |= cpu_to_le16(IEEE80211_RADIOTAP_HE_MU_FLAGS1_CH2_RU_KNOWN | IEEE80211_RADIOTAP_HE_MU_FLAGS1_CH2_CTR_26T_RU_KNOWN); he_mu->flags2 |= - le16_encode_bits(u32_get_bits(phy_data4, - IWL_RX_PHY_DATA4_HE_MU_EXT_CH2_CTR_RU), - IEEE80211_RADIOTAP_HE_MU_FLAGS2_CH2_CTR_26T_RU); - - he_mu->ru_ch2[0] = u32_get_bits(phy_data2, - IWL_RX_PHY_DATA2_HE_MU_EXT_CH2_RU0); - he_mu->ru_ch2[1] = u32_get_bits(phy_data3, - IWL_RX_PHY_DATA3_HE_MU_EXT_CH2_RU1); - he_mu->ru_ch2[2] = u32_get_bits(phy_data2, - IWL_RX_PHY_DATA2_HE_MU_EXT_CH2_RU2); - he_mu->ru_ch2[3] = u32_get_bits(phy_data3, - IWL_RX_PHY_DATA3_HE_MU_EXT_CH2_RU3); + RTAP_ENC_HE(phy_data->ntfy->sigs.he.cmn[2], + OFDM_RX_FRAME_HE_CENTER_RU_CC2, + IEEE80211_RADIOTAP_HE_MU_FLAGS2_CH2_CTR_26T_RU); + + he_mu->ru_ch2[0] = le32_get_bits(phy_data->ntfy->sigs.he.cmn[0], + OFDM_RX_FRAME_HE_RU_ALLOC_0_B1); + he_mu->ru_ch2[1] = le32_get_bits(phy_data->ntfy->sigs.he.cmn[1], + OFDM_RX_FRAME_HE_RU_ALLOC_1_D1); + he_mu->ru_ch2[2] = le32_get_bits(phy_data->ntfy->sigs.he.cmn[0], + OFDM_RX_FRAME_HE_RU_ALLOC_0_B2); + he_mu->ru_ch2[3] = le32_get_bits(phy_data->ntfy->sigs.he.cmn[1], + OFDM_RX_FRAME_HE_RU_ALLOC_1_D2); } + +#define CHECK_BW(bw) \ + BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW_ ## bw ## MHZ != \ + RATE_MCS_CHAN_WIDTH_##bw >> RATE_MCS_CHAN_WIDTH_POS) + CHECK_BW(20); + CHECK_BW(40); + CHECK_BW(80); + CHECK_BW(160); +#undef CHECK_BW + + he_mu->flags2 |= + le16_encode_bits(u32_get_bits(rate_n_flags, RATE_MCS_CHAN_WIDTH_MSK), + IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW); + + iwl_mld_he_set_ru_alloc(rx_status, he, + le32_get_bits(phy_data->ntfy->sigs.he.b, + OFDM_RX_FRAME_HE_SIGB_STA_RU)); +} + +static void +iwl_mld_decode_he_tb_phy_data(struct iwl_mld_rx_phy_data *phy_data, + struct ieee80211_radiotap_he *he, + struct ieee80211_rx_status *rx_status) +{ + u32 rate_n_flags = phy_data->rate_n_flags; + u32 nsts; + + he->data1 |= cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA1_BSS_COLOR_KNOWN | + IEEE80211_RADIOTAP_HE_DATA1_SPTL_REUSE_KNOWN | + IEEE80211_RADIOTAP_HE_DATA1_SPTL_REUSE2_KNOWN | + IEEE80211_RADIOTAP_HE_DATA1_SPTL_REUSE3_KNOWN | + IEEE80211_RADIOTAP_HE_DATA1_SPTL_REUSE4_KNOWN); + + he->data4 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he_tb.a1, + OFDM_RX_HE_TRIG_SPATIAL_REUSE_1, + IEEE80211_RADIOTAP_HE_DATA4_TB_SPTL_REUSE1); + he->data4 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he_tb.a1, + OFDM_RX_HE_TRIG_SPATIAL_REUSE_2, + IEEE80211_RADIOTAP_HE_DATA4_TB_SPTL_REUSE2); + he->data4 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he_tb.a1, + OFDM_RX_HE_TRIG_SPATIAL_REUSE_3, + IEEE80211_RADIOTAP_HE_DATA4_TB_SPTL_REUSE3); + he->data4 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he_tb.a1, + OFDM_RX_HE_TRIG_SPATIAL_REUSE_4, + IEEE80211_RADIOTAP_HE_DATA4_TB_SPTL_REUSE4); + he->data3 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he_tb.a1, + OFDM_RX_HE_TRIG_BSS_COLOR, + IEEE80211_RADIOTAP_HE_DATA3_BSS_COLOR); + +#define CHECK_BW(bw) \ + BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW_ ## bw ## MHZ != \ + RATE_MCS_CHAN_WIDTH_##bw >> RATE_MCS_CHAN_WIDTH_POS) + CHECK_BW(20); + CHECK_BW(40); + CHECK_BW(80); + CHECK_BW(160); +#undef CHECK_BW + + he->data6 |= cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW_KNOWN) | + le16_encode_bits(u32_get_bits(rate_n_flags, RATE_MCS_CHAN_WIDTH_MSK), + IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW); + + if (!(phy_data->ntfy->flags & IWL_SNIF_FLAG_VALID_TB_RX)) + return; + + he->data1 |= cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA1_LDPC_XSYMSEG_KNOWN | + IEEE80211_RADIOTAP_HE_DATA1_DOPPLER_KNOWN); + he->data2 |= cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA2_PRE_FEC_PAD_KNOWN | + IEEE80211_RADIOTAP_HE_DATA2_PE_DISAMBIG_KNOWN | + IEEE80211_RADIOTAP_HE_DATA2_TXOP_KNOWN | + IEEE80211_RADIOTAP_HE_DATA2_NUM_LTF_SYMS_KNOWN); + + he->data3 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he_tb.tb_rx1, + OFDM_UCODE_TRIG_BASE_RX_CODING_EXTRA_SYM, + IEEE80211_RADIOTAP_HE_DATA3_LDPC_XSYMSEG); + he->data6 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he_tb.tb_rx1, + OFDM_UCODE_TRIG_BASE_RX_DOPPLER, + IEEE80211_RADIOTAP_HE_DATA6_DOPPLER); + he->data5 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he_tb.tb_rx1, + OFDM_UCODE_TRIG_BASE_RX_PRE_FEC_PAD_FACTOR, + IEEE80211_RADIOTAP_HE_DATA5_PRE_FEC_PAD); + he->data5 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he_tb.tb_rx1, + OFDM_UCODE_TRIG_BASE_RX_PE_DISAMBIG, + IEEE80211_RADIOTAP_HE_DATA5_PE_DISAMBIG); + he->data5 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he_tb.tb_rx1, + OFDM_UCODE_TRIG_BASE_RX_NUM_OF_LTF_SYM, + IEEE80211_RADIOTAP_HE_DATA5_NUM_LTF_SYMS); + he->data6 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he_tb.a2, + OFDM_RX_HE_TRIG_TXOP_DURATION, + IEEE80211_RADIOTAP_HE_DATA6_TXOP); + + iwl_mld_he_set_ru_alloc(rx_status, he, + le32_get_bits(phy_data->ntfy->sigs.he_tb.tb_rx1, + OFDM_UCODE_TRIG_BASE_RX_RU)); + + nsts = le32_get_bits(phy_data->ntfy->sigs.he_tb.tb_rx1, + OFDM_UCODE_TRIG_BASE_RX_NSTS) + 1; + rx_status->nss = nsts >> !!(rate_n_flags & RATE_MCS_STBC_MSK); } static void iwl_mld_decode_he_phy_data(struct iwl_mld_rx_phy_data *phy_data, struct ieee80211_radiotap_he *he, struct ieee80211_radiotap_he_mu *he_mu, - struct ieee80211_rx_status *rx_status, - int queue) + struct ieee80211_rx_status *rx_status) { - switch (phy_data->info_type) { - case IWL_RX_PHY_INFO_TYPE_NONE: - case IWL_RX_PHY_INFO_TYPE_CCK: - case IWL_RX_PHY_INFO_TYPE_OFDM_LGCY: - case IWL_RX_PHY_INFO_TYPE_HT: - case IWL_RX_PHY_INFO_TYPE_VHT_SU: - case IWL_RX_PHY_INFO_TYPE_VHT_MU: - case IWL_RX_PHY_INFO_TYPE_EHT_MU: - case IWL_RX_PHY_INFO_TYPE_EHT_TB: - case IWL_RX_PHY_INFO_TYPE_EHT_MU_EXT: - case IWL_RX_PHY_INFO_TYPE_EHT_TB_EXT: - return; - case IWL_RX_PHY_INFO_TYPE_HE_TB_EXT: - he->data1 |= cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA1_SPTL_REUSE_KNOWN | - IEEE80211_RADIOTAP_HE_DATA1_SPTL_REUSE2_KNOWN | - IEEE80211_RADIOTAP_HE_DATA1_SPTL_REUSE3_KNOWN | - IEEE80211_RADIOTAP_HE_DATA1_SPTL_REUSE4_KNOWN); - he->data4 |= le16_encode_bits(le32_get_bits(phy_data->data2, - IWL_RX_PHY_DATA2_HE_TB_EXT_SPTL_REUSE1), - IEEE80211_RADIOTAP_HE_DATA4_TB_SPTL_REUSE1); - he->data4 |= le16_encode_bits(le32_get_bits(phy_data->data2, - IWL_RX_PHY_DATA2_HE_TB_EXT_SPTL_REUSE2), - IEEE80211_RADIOTAP_HE_DATA4_TB_SPTL_REUSE2); - he->data4 |= le16_encode_bits(le32_get_bits(phy_data->data2, - IWL_RX_PHY_DATA2_HE_TB_EXT_SPTL_REUSE3), - IEEE80211_RADIOTAP_HE_DATA4_TB_SPTL_REUSE3); - he->data4 |= le16_encode_bits(le32_get_bits(phy_data->data2, - IWL_RX_PHY_DATA2_HE_TB_EXT_SPTL_REUSE4), - IEEE80211_RADIOTAP_HE_DATA4_TB_SPTL_REUSE4); - fallthrough; - case IWL_RX_PHY_INFO_TYPE_HE_SU: - case IWL_RX_PHY_INFO_TYPE_HE_MU: - case IWL_RX_PHY_INFO_TYPE_HE_MU_EXT: - case IWL_RX_PHY_INFO_TYPE_HE_TB: - /* HE common */ - he->data1 |= cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA1_LDPC_XSYMSEG_KNOWN | - IEEE80211_RADIOTAP_HE_DATA1_DOPPLER_KNOWN | - IEEE80211_RADIOTAP_HE_DATA1_BSS_COLOR_KNOWN); - he->data2 |= cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA2_PRE_FEC_PAD_KNOWN | - IEEE80211_RADIOTAP_HE_DATA2_PE_DISAMBIG_KNOWN | - IEEE80211_RADIOTAP_HE_DATA2_TXOP_KNOWN | - IEEE80211_RADIOTAP_HE_DATA2_NUM_LTF_SYMS_KNOWN); - he->data3 |= le16_encode_bits(le32_get_bits(phy_data->data0, - IWL_RX_PHY_DATA0_HE_BSS_COLOR_MASK), - IEEE80211_RADIOTAP_HE_DATA3_BSS_COLOR); - if (phy_data->info_type != IWL_RX_PHY_INFO_TYPE_HE_TB && - phy_data->info_type != IWL_RX_PHY_INFO_TYPE_HE_TB_EXT) { - he->data1 |= cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA1_UL_DL_KNOWN); - he->data3 |= le16_encode_bits(le32_get_bits(phy_data->data0, - IWL_RX_PHY_DATA0_HE_UPLINK), - IEEE80211_RADIOTAP_HE_DATA3_UL_DL); - } - he->data3 |= le16_encode_bits(le32_get_bits(phy_data->data0, - IWL_RX_PHY_DATA0_HE_LDPC_EXT_SYM), - IEEE80211_RADIOTAP_HE_DATA3_LDPC_XSYMSEG); - he->data5 |= le16_encode_bits(le32_get_bits(phy_data->data0, - IWL_RX_PHY_DATA0_HE_PRE_FEC_PAD_MASK), - IEEE80211_RADIOTAP_HE_DATA5_PRE_FEC_PAD); - he->data5 |= le16_encode_bits(le32_get_bits(phy_data->data0, - IWL_RX_PHY_DATA0_HE_PE_DISAMBIG), - IEEE80211_RADIOTAP_HE_DATA5_PE_DISAMBIG); - he->data5 |= le16_encode_bits(le32_get_bits(phy_data->data1, - IWL_RX_PHY_DATA1_HE_LTF_NUM_MASK), - IEEE80211_RADIOTAP_HE_DATA5_NUM_LTF_SYMS); - he->data6 |= le16_encode_bits(le32_get_bits(phy_data->data0, - IWL_RX_PHY_DATA0_HE_TXOP_DUR_MASK), - IEEE80211_RADIOTAP_HE_DATA6_TXOP); - he->data6 |= le16_encode_bits(le32_get_bits(phy_data->data0, - IWL_RX_PHY_DATA0_HE_DOPPLER), - IEEE80211_RADIOTAP_HE_DATA6_DOPPLER); - break; - } + u32 rate_n_flags = phy_data->rate_n_flags; + u32 he_type = rate_n_flags & RATE_MCS_HE_TYPE_MSK; + u32 nsts; - switch (phy_data->info_type) { - case IWL_RX_PHY_INFO_TYPE_HE_MU_EXT: - case IWL_RX_PHY_INFO_TYPE_HE_MU: - case IWL_RX_PHY_INFO_TYPE_HE_SU: - he->data1 |= cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA1_SPTL_REUSE_KNOWN); - he->data4 |= le16_encode_bits(le32_get_bits(phy_data->data0, - IWL_RX_PHY_DATA0_HE_SPATIAL_REUSE_MASK), - IEEE80211_RADIOTAP_HE_DATA4_SU_MU_SPTL_REUSE); - break; - default: - /* nothing here */ - break; - } + switch (he_type) { + case RATE_MCS_HE_TYPE_TRIG: + iwl_mld_decode_he_tb_phy_data(phy_data, he, rx_status); + /* that's it, below is only for SU/MU */ + return; + case RATE_MCS_HE_TYPE_MU: + iwl_mld_decode_he_mu(phy_data, he, he_mu, rx_status); - switch (phy_data->info_type) { - case IWL_RX_PHY_INFO_TYPE_HE_MU_EXT: - he_mu->flags1 |= - le16_encode_bits(le16_get_bits(phy_data->data4, - IWL_RX_PHY_DATA4_HE_MU_EXT_SIGB_DCM), - IEEE80211_RADIOTAP_HE_MU_FLAGS1_SIG_B_DCM); - he_mu->flags1 |= - le16_encode_bits(le16_get_bits(phy_data->data4, - IWL_RX_PHY_DATA4_HE_MU_EXT_SIGB_MCS_MASK), - IEEE80211_RADIOTAP_HE_MU_FLAGS1_SIG_B_MCS); - he_mu->flags2 |= - le16_encode_bits(le16_get_bits(phy_data->data4, - IWL_RX_PHY_DATA4_HE_MU_EXT_PREAMBLE_PUNC_TYPE_MASK), - IEEE80211_RADIOTAP_HE_MU_FLAGS2_PUNC_FROM_SIG_A_BW); - iwl_mld_decode_he_mu_ext(phy_data, he_mu); - fallthrough; - case IWL_RX_PHY_INFO_TYPE_HE_MU: - he_mu->flags2 |= - le16_encode_bits(le32_get_bits(phy_data->data1, - IWL_RX_PHY_DATA1_HE_MU_SIBG_SYM_OR_USER_NUM_MASK), - IEEE80211_RADIOTAP_HE_MU_FLAGS2_SIG_B_SYMS_USERS); - he_mu->flags2 |= - le16_encode_bits(le32_get_bits(phy_data->data1, - IWL_RX_PHY_DATA1_HE_MU_SIGB_COMPRESSION), - IEEE80211_RADIOTAP_HE_MU_FLAGS2_SIG_B_COMP); - fallthrough; - case IWL_RX_PHY_INFO_TYPE_HE_TB: - case IWL_RX_PHY_INFO_TYPE_HE_TB_EXT: - iwl_mld_decode_he_phy_ru_alloc(phy_data, he, he_mu, rx_status); + nsts = le32_get_bits(phy_data->ntfy->sigs.he.b, + OFDM_RX_FRAME_HE_SIGB_NSTS) + 1; break; - case IWL_RX_PHY_INFO_TYPE_HE_SU: + case RATE_MCS_HE_TYPE_SU: + case RATE_MCS_HE_TYPE_EXT_SU: he->data1 |= cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA1_BEAM_CHANGE_KNOWN); - he->data3 |= le16_encode_bits(le32_get_bits(phy_data->data0, - IWL_RX_PHY_DATA0_HE_BEAM_CHNG), - IEEE80211_RADIOTAP_HE_DATA3_BEAM_CHANGE); - break; - default: - /* nothing */ + he->data3 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he.a1, + OFDM_RX_FRAME_HE_BEAM_CHANGE, + IEEE80211_RADIOTAP_HE_DATA3_BEAM_CHANGE); + + nsts = le32_get_bits(phy_data->ntfy->sigs.he.a1, + OFDM_RX_FRAME_HE_NSTS) + 1; break; } + + rx_status->nss = nsts >> !!(rate_n_flags & RATE_MCS_STBC_MSK); + + he->data1 |= cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA1_LDPC_XSYMSEG_KNOWN | + IEEE80211_RADIOTAP_HE_DATA1_DOPPLER_KNOWN); + he->data2 |= cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA2_PRE_FEC_PAD_KNOWN | + IEEE80211_RADIOTAP_HE_DATA2_PE_DISAMBIG_KNOWN | + IEEE80211_RADIOTAP_HE_DATA2_TXOP_KNOWN | + IEEE80211_RADIOTAP_HE_DATA2_NUM_LTF_SYMS_KNOWN); + + he->data3 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he.a2, + OFDM_RX_FRAME_HE_CODING_EXTRA_SYM, + IEEE80211_RADIOTAP_HE_DATA3_LDPC_XSYMSEG); + he->data5 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he.a2, + OFDM_RX_FRAME_HE_PRE_FEC_PAD_FACTOR, + IEEE80211_RADIOTAP_HE_DATA5_PRE_FEC_PAD); + he->data5 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he.a2, + OFDM_RX_FRAME_HE_PE_DISAMBIG, + IEEE80211_RADIOTAP_HE_DATA5_PE_DISAMBIG); + he->data5 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he.a2, + OFDM_RX_FRAME_HE_MU_NUM_OF_LTF_SYM, + IEEE80211_RADIOTAP_HE_DATA5_NUM_LTF_SYMS); + he->data6 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he.a2, + OFDM_RX_FRAME_HE_TXOP_DURATION, + IEEE80211_RADIOTAP_HE_DATA6_TXOP); + he->data6 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he.a2, + OFDM_RX_FRAME_HE_DOPPLER, + IEEE80211_RADIOTAP_HE_DATA6_DOPPLER); + + he->data1 |= cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA1_UL_DL_KNOWN | + IEEE80211_RADIOTAP_HE_DATA1_BSS_COLOR_KNOWN | + IEEE80211_RADIOTAP_HE_DATA1_SPTL_REUSE_KNOWN); + + he->data3 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he.a1, + OFDM_RX_FRAME_HE_BSS_COLOR, + IEEE80211_RADIOTAP_HE_DATA3_BSS_COLOR); + he->data3 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he.a1, + OFDM_RX_FRAME_HE_UL_FLAG, + IEEE80211_RADIOTAP_HE_DATA3_UL_DL); + he->data4 |= RTAP_ENC_HE(phy_data->ntfy->sigs.he.a1, + OFDM_RX_FRAME_HE_SPATIAL_REUSE, + IEEE80211_RADIOTAP_HE_DATA4_SU_MU_SPTL_REUSE); } -static void iwl_mld_rx_he(struct iwl_mld *mld, struct sk_buff *skb, - struct iwl_mld_rx_phy_data *phy_data, - int queue) +static void iwl_mld_rx_he(struct sk_buff *skb, + struct iwl_mld_rx_phy_data *phy_data) { struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); struct ieee80211_radiotap_he *he = NULL; @@ -510,48 +529,28 @@ static void iwl_mld_rx_he(struct iwl_mld *mld, struct sk_buff *skb, .flags2 = cpu_to_le16(IEEE80211_RADIOTAP_HE_MU_FLAGS2_PUNC_FROM_SIG_A_BW_KNOWN | IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW_KNOWN), }; - u16 phy_info = phy_data->phy_info; he = skb_put_data(skb, &known, sizeof(known)); rx_status->flag |= RX_FLAG_RADIOTAP_HE; - if (phy_data->info_type == IWL_RX_PHY_INFO_TYPE_HE_MU || - phy_data->info_type == IWL_RX_PHY_INFO_TYPE_HE_MU_EXT) { - he_mu = skb_put_data(skb, &mu_known, sizeof(mu_known)); - rx_status->flag |= RX_FLAG_RADIOTAP_HE_MU; - } - - /* report the AMPDU-EOF bit on single frames */ - if (!queue && !(phy_info & IWL_RX_MPDU_PHY_AMPDU)) { - rx_status->flag |= RX_FLAG_AMPDU_DETAILS; - rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT_KNOWN; - if (phy_data->data0 & cpu_to_le32(IWL_RX_PHY_DATA0_HE_DELIM_EOF)) - rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT; - } - - if (phy_info & IWL_RX_MPDU_PHY_TSF_OVERLOAD) - iwl_mld_decode_he_phy_data(phy_data, he, he_mu, rx_status, - queue); - - /* update aggregation data for monitor sake on default queue */ - if (!queue && (phy_info & IWL_RX_MPDU_PHY_TSF_OVERLOAD) && - (phy_info & IWL_RX_MPDU_PHY_AMPDU) && phy_data->first_subframe) { - rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT_KNOWN; - if (phy_data->data0 & cpu_to_le32(IWL_RX_PHY_DATA0_EHT_DELIM_EOF)) - rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT; - } - - if (he_type == RATE_MCS_HE_TYPE_EXT_SU && - rate_n_flags & RATE_MCS_HE_106T_MSK) { - rx_status->bw = RATE_INFO_BW_HE_RU; - rx_status->he_ru = NL80211_RATE_INFO_HE_RU_ALLOC_106; - } - - /* actually data is filled in mac80211 */ - if (he_type == RATE_MCS_HE_TYPE_SU || - he_type == RATE_MCS_HE_TYPE_EXT_SU) + switch (he_type) { + case RATE_MCS_HE_TYPE_EXT_SU: + /* + * Except for this special case we won't have + * HE RU allocation info outside of monitor mode + * since we don't get the PHY notif. + */ + if (rate_n_flags & RATE_MCS_HE_106T_MSK) { + rx_status->bw = RATE_INFO_BW_HE_RU; + rx_status->he_ru = NL80211_RATE_INFO_HE_RU_ALLOC_106; + } + fallthrough; + case RATE_MCS_HE_TYPE_SU: + /* actual data is filled in mac80211 */ he->data1 |= cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA1_BW_RU_ALLOC_KNOWN); + break; + } #define CHECK_TYPE(F) \ BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA1_FORMAT_ ## F != \ @@ -567,8 +566,7 @@ static void iwl_mld_rx_he(struct iwl_mld *mld, struct sk_buff *skb, if (rate_n_flags & RATE_MCS_BF_MSK) he->data5 |= cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA5_TXBF); - switch ((rate_n_flags & RATE_MCS_HE_GI_LTF_MSK) >> - RATE_MCS_HE_GI_LTF_POS) { + switch (u32_get_bits(rate_n_flags, RATE_MCS_HE_GI_LTF_MSK)) { case 0: if (he_type == RATE_MCS_HE_TYPE_TRIG) rx_status->he_gi = NL80211_RATE_INFO_HE_GI_1_6; @@ -609,37 +607,52 @@ static void iwl_mld_rx_he(struct iwl_mld *mld, struct sk_buff *skb, he->data5 |= le16_encode_bits(ltf, IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE); + + if (likely(!phy_data->ntfy)) + return; + + if (he_type == RATE_MCS_HE_TYPE_MU) { + he_mu = skb_put_data(skb, &mu_known, sizeof(mu_known)); + rx_status->flag |= RX_FLAG_RADIOTAP_HE_MU; + } + + iwl_mld_decode_he_phy_data(phy_data, he, he_mu, rx_status); } static void iwl_mld_decode_lsig(struct sk_buff *skb, struct iwl_mld_rx_phy_data *phy_data) { struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); + u32 format = phy_data->rate_n_flags & RATE_MCS_MOD_TYPE_MSK; struct ieee80211_radiotap_lsig *lsig; + u32 lsig_len, rate; - switch (phy_data->info_type) { - case IWL_RX_PHY_INFO_TYPE_HT: - case IWL_RX_PHY_INFO_TYPE_VHT_SU: - case IWL_RX_PHY_INFO_TYPE_VHT_MU: - case IWL_RX_PHY_INFO_TYPE_HE_TB_EXT: - case IWL_RX_PHY_INFO_TYPE_HE_SU: - case IWL_RX_PHY_INFO_TYPE_HE_MU: - case IWL_RX_PHY_INFO_TYPE_HE_MU_EXT: - case IWL_RX_PHY_INFO_TYPE_HE_TB: - case IWL_RX_PHY_INFO_TYPE_EHT_MU: - case IWL_RX_PHY_INFO_TYPE_EHT_TB: - case IWL_RX_PHY_INFO_TYPE_EHT_MU_EXT: - case IWL_RX_PHY_INFO_TYPE_EHT_TB_EXT: - lsig = skb_put(skb, sizeof(*lsig)); - lsig->data1 = cpu_to_le16(IEEE80211_RADIOTAP_LSIG_DATA1_LENGTH_KNOWN); - lsig->data2 = le16_encode_bits(le32_get_bits(phy_data->data1, - IWL_RX_PHY_DATA1_LSIG_LEN_MASK), - IEEE80211_RADIOTAP_LSIG_DATA2_LENGTH); - rx_status->flag |= RX_FLAG_RADIOTAP_LSIG; - break; - default: - break; - } + if (likely(!phy_data->ntfy)) + return; + + /* + * Technically legacy CCK/OFDM frames don't have an L-SIG + * since that's the compat format for HT (non-greenfield) + * and up. However, it's meant to be compatible with the + * LENGTH and RATE fields in Clause 17 and 18 OFDM frames + * so include the field for any non-CCK frame. For CCK it + * cannot work, since the LENGTH field for them is 16-bit + * and the radiotap field only has 12 bits. + */ + if (format == RATE_MCS_MOD_TYPE_CCK) + return; + + lsig_len = le32_get_bits(phy_data->ntfy->legacy_sig.ofdm, + OFDM_RX_LEGACY_LENGTH); + rate = le32_get_bits(phy_data->ntfy->legacy_sig.ofdm, OFDM_RX_RATE); + + lsig = skb_put(skb, sizeof(*lsig)); + lsig->data1 = cpu_to_le16(IEEE80211_RADIOTAP_LSIG_DATA1_LENGTH_KNOWN) | + cpu_to_le16(IEEE80211_RADIOTAP_LSIG_DATA1_RATE_KNOWN); + lsig->data2 = le16_encode_bits(lsig_len, + IEEE80211_RADIOTAP_LSIG_DATA2_LENGTH) | + le16_encode_bits(rate, IEEE80211_RADIOTAP_LSIG_DATA2_RATE); + rx_status->flag |= RX_FLAG_RADIOTAP_LSIG; } /* Put a TLV on the skb and return data pointer @@ -667,209 +680,144 @@ iwl_mld_radiotap_put_tlv(struct sk_buff *skb, u16 type, u16 len) (_usig)->value |= LE32_DEC_ENC(in_value, dec_bits, _enc_bits); \ } while (0) -#define __IWL_MLD_ENC_EHT_RU(rt_data, rt_ru, fw_data, fw_ru) \ - eht->data[(rt_data)] |= \ - (cpu_to_le32 \ - (IEEE80211_RADIOTAP_EHT_DATA ## rt_data ## _RU_ALLOC_CC_ ## rt_ru ## _KNOWN) | \ - LE32_DEC_ENC(data ## fw_data, \ - IWL_RX_PHY_DATA ## fw_data ## _EHT_MU_EXT_RU_ALLOC_ ## fw_ru, \ - IEEE80211_RADIOTAP_EHT_DATA ## rt_data ## _RU_ALLOC_CC_ ## rt_ru)) +static void iwl_mld_decode_eht_usig_tb(struct iwl_mld_rx_phy_data *phy_data, + struct ieee80211_radiotap_eht_usig *usig) +{ + __le32 usig_a1 = phy_data->ntfy->sigs.eht_tb.usig_a1; + __le32 usig_a2 = phy_data->ntfy->sigs.eht_tb.usig_a2_eht; + + IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a1, + OFDM_RX_FRAME_EHT_USIG1_DISREGARD, + IEEE80211_RADIOTAP_EHT_USIG1_TB_B20_B25_DISREGARD); + IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a2, + OFDM_RX_FRAME_EHT_PPDU_TYPE, + IEEE80211_RADIOTAP_EHT_USIG2_TB_B0_B1_PPDU_TYPE); + IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a2, + OFDM_RX_FRAME_EHT_USIG2_VALIDATE_B2, + IEEE80211_RADIOTAP_EHT_USIG2_TB_B2_VALIDATE); + IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a2, + OFDM_RX_FRAME_EHT_TRIG_SPATIAL_REUSE_1, + IEEE80211_RADIOTAP_EHT_USIG2_TB_B3_B6_SPATIAL_REUSE_1); + IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a2, + OFDM_RX_FRAME_EHT_TRIG_SPATIAL_REUSE_2, + IEEE80211_RADIOTAP_EHT_USIG2_TB_B7_B10_SPATIAL_REUSE_2); + IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a2, + OFDM_RX_FRAME_EHT_TRIG_USIG2_DISREGARD, + IEEE80211_RADIOTAP_EHT_USIG2_TB_B11_B15_DISREGARD); +} -#define _IWL_MLD_ENC_EHT_RU(rt_data, rt_ru, fw_data, fw_ru) \ - __IWL_MLD_ENC_EHT_RU(rt_data, rt_ru, fw_data, fw_ru) +static void iwl_mld_decode_eht_usig_non_tb(struct iwl_mld_rx_phy_data *phy_data, + struct ieee80211_radiotap_eht_usig *usig) +{ + __le32 usig_a1 = phy_data->ntfy->sigs.eht.usig_a1; + __le32 usig_a2 = phy_data->ntfy->sigs.eht.usig_a2_eht; + + IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a1, + OFDM_RX_FRAME_EHT_USIG1_DISREGARD, + IEEE80211_RADIOTAP_EHT_USIG1_MU_B20_B24_DISREGARD); + IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a1, + OFDM_RX_FRAME_EHT_USIG1_VALIDATE, + IEEE80211_RADIOTAP_EHT_USIG1_MU_B25_VALIDATE); + IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a2, + OFDM_RX_FRAME_EHT_PPDU_TYPE, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B0_B1_PPDU_TYPE); + IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a2, + OFDM_RX_FRAME_EHT_USIG2_VALIDATE_B2, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B2_VALIDATE); + IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a2, + OFDM_RX_FRAME_EHT_PUNC_CHANNEL, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B3_B7_PUNCTURED_INFO); + IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a2, + OFDM_RX_FRAME_EHT_USIG2_VALIDATE_B8, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B8_VALIDATE); + IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a2, + OFDM_RX_FRAME_EHT_SIG_MCS, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B9_B10_SIG_MCS); + IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a2, + OFDM_RX_FRAME_EHT_SIG_SYM_NUM, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B11_B15_EHT_SIG_SYMBOLS); +} -#define IEEE80211_RADIOTAP_RU_DATA_1_1_1 1 -#define IEEE80211_RADIOTAP_RU_DATA_2_1_1 2 -#define IEEE80211_RADIOTAP_RU_DATA_1_1_2 2 -#define IEEE80211_RADIOTAP_RU_DATA_2_1_2 2 -#define IEEE80211_RADIOTAP_RU_DATA_1_2_1 3 -#define IEEE80211_RADIOTAP_RU_DATA_2_2_1 3 -#define IEEE80211_RADIOTAP_RU_DATA_1_2_2 3 -#define IEEE80211_RADIOTAP_RU_DATA_2_2_2 4 +static void iwl_mld_decode_eht_usig(struct iwl_mld_rx_phy_data *phy_data, + struct sk_buff *skb) +{ + u32 he_type = phy_data->rate_n_flags & RATE_MCS_HE_TYPE_MSK; + __le32 usig_a1 = phy_data->ntfy->sigs.eht.usig_a1; + __le32 usig_a2 = phy_data->ntfy->sigs.eht.usig_a2_eht; + struct ieee80211_radiotap_eht_usig *usig; + u32 bw; -#define IWL_RX_RU_DATA_A1 2 -#define IWL_RX_RU_DATA_A2 2 -#define IWL_RX_RU_DATA_B1 2 -#define IWL_RX_RU_DATA_B2 4 -#define IWL_RX_RU_DATA_C1 3 -#define IWL_RX_RU_DATA_C2 3 -#define IWL_RX_RU_DATA_D1 4 -#define IWL_RX_RU_DATA_D2 4 + usig = iwl_mld_radiotap_put_tlv(skb, IEEE80211_RADIOTAP_EHT_USIG, + sizeof(*usig)); -#define IWL_MLD_ENC_EHT_RU(rt_ru, fw_ru) \ - _IWL_MLD_ENC_EHT_RU(IEEE80211_RADIOTAP_RU_DATA_ ## rt_ru, \ - rt_ru, \ - IWL_RX_RU_DATA_ ## fw_ru, \ - fw_ru) + BUILD_BUG_ON(offsetof(union iwl_sigs, eht.usig_a1) != + offsetof(union iwl_sigs, eht_tb.usig_a1)); + BUILD_BUG_ON(offsetof(union iwl_sigs, eht.usig_a2_eht) != + offsetof(union iwl_sigs, eht_tb.usig_a2_eht)); -static void iwl_mld_decode_eht_ext_mu(struct iwl_mld *mld, - struct iwl_mld_rx_phy_data *phy_data, - struct ieee80211_rx_status *rx_status, - struct ieee80211_radiotap_eht *eht, - struct ieee80211_radiotap_eht_usig *usig) -{ - if (phy_data->with_data) { - __le32 data1 = phy_data->data1; - __le32 data2 = phy_data->data2; - __le32 data3 = phy_data->data3; - __le32 data4 = phy_data->eht_data4; - __le32 data5 = phy_data->data5; - u32 phy_bw = phy_data->rate_n_flags & RATE_MCS_CHAN_WIDTH_MSK; - - IWL_MLD_ENC_USIG_VALUE_MASK(usig, data5, - IWL_RX_PHY_DATA5_EHT_TYPE_AND_COMP, - IEEE80211_RADIOTAP_EHT_USIG2_MU_B0_B1_PPDU_TYPE); - IWL_MLD_ENC_USIG_VALUE_MASK(usig, data5, - IWL_RX_PHY_DATA5_EHT_MU_PUNC_CH_CODE, - IEEE80211_RADIOTAP_EHT_USIG2_MU_B3_B7_PUNCTURED_INFO); - IWL_MLD_ENC_USIG_VALUE_MASK(usig, data4, - IWL_RX_PHY_DATA4_EHT_MU_EXT_SIGB_MCS, - IEEE80211_RADIOTAP_EHT_USIG2_MU_B9_B10_SIG_MCS); - IWL_MLD_ENC_USIG_VALUE_MASK - (usig, data1, IWL_RX_PHY_DATA1_EHT_MU_NUM_SIG_SYM_USIGA2, - IEEE80211_RADIOTAP_EHT_USIG2_MU_B11_B15_EHT_SIG_SYMBOLS); + usig->common |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL_KNOWN | + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR_KNOWN | + IEEE80211_RADIOTAP_EHT_USIG_COMMON_VALIDATE_BITS_CHECKED | + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_KNOWN | + IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP_KNOWN); - eht->user_info[0] |= - cpu_to_le32(IEEE80211_RADIOTAP_EHT_USER_INFO_STA_ID_KNOWN) | - LE32_DEC_ENC(data5, IWL_RX_PHY_DATA5_EHT_MU_STA_ID_USR, - IEEE80211_RADIOTAP_EHT_USER_INFO_STA_ID); +#define CHECK_BW(bw) \ + BUILD_BUG_ON(IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_ ## bw ## MHZ != \ + RATE_MCS_CHAN_WIDTH_ ## bw ## _VAL) + CHECK_BW(20); + CHECK_BW(40); + CHECK_BW(80); + CHECK_BW(160); +#undef CHECK_BW + BUILD_BUG_ON(IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_320MHZ_1 != + RATE_MCS_CHAN_WIDTH_320_VAL); + bw = u32_get_bits(phy_data->rate_n_flags, RATE_MCS_CHAN_WIDTH_MSK); + /* specific handling for 320MHz-1/320MHz-2 */ + if (bw == RATE_MCS_CHAN_WIDTH_320_VAL) + bw += le32_get_bits(usig_a1, OFDM_RX_FRAME_EHT_BW320_SLOT); + usig->common |= le32_encode_bits(bw, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW); - eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_NR_NON_OFDMA_USERS_M); - eht->data[7] |= LE32_DEC_ENC - (data5, IWL_RX_PHY_DATA5_EHT_MU_NUM_USR_NON_OFDMA, - IEEE80211_RADIOTAP_EHT_DATA7_NUM_OF_NON_OFDMA_USERS); + usig->common |= LE32_DEC_ENC(usig_a1, OFDM_RX_FRAME_ENHANCED_WIFI_UL_FLAG, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL); + usig->common |= LE32_DEC_ENC(usig_a1, OFDM_RX_FRAME_ENHANCED_WIFI_BSS_COLOR, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR); - /* - * Hardware labels the content channels/RU allocation values - * as follows: - * Content Channel 1 Content Channel 2 - * 20 MHz: A1 - * 40 MHz: A1 B1 - * 80 MHz: A1 C1 B1 D1 - * 160 MHz: A1 C1 A2 C2 B1 D1 B2 D2 - * 320 MHz: A1 C1 A2 C2 A3 C3 A4 C4 B1 D1 B2 D2 B3 D3 B4 D4 - * - * However firmware can only give us A1-D2, so the higher - * frequencies are missing. - */ + if (le32_get_bits(usig_a1, OFDM_RX_FRAME_EHT_USIG1_VALIDATE) && + le32_get_bits(usig_a2, OFDM_RX_FRAME_EHT_USIG2_VALIDATE_B2) && + le32_get_bits(usig_a2, OFDM_RX_FRAME_EHT_USIG2_VALIDATE_B8)) + usig->common |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_USIG_COMMON_VALIDATE_BITS_OK); - switch (phy_bw) { - case RATE_MCS_CHAN_WIDTH_320: - /* additional values are missing in RX metadata */ - fallthrough; - case RATE_MCS_CHAN_WIDTH_160: - /* content channel 1 */ - IWL_MLD_ENC_EHT_RU(1_2_1, A2); - IWL_MLD_ENC_EHT_RU(1_2_2, C2); - /* content channel 2 */ - IWL_MLD_ENC_EHT_RU(2_2_1, B2); - IWL_MLD_ENC_EHT_RU(2_2_2, D2); - fallthrough; - case RATE_MCS_CHAN_WIDTH_80: - /* content channel 1 */ - IWL_MLD_ENC_EHT_RU(1_1_2, C1); - /* content channel 2 */ - IWL_MLD_ENC_EHT_RU(2_1_2, D1); - fallthrough; - case RATE_MCS_CHAN_WIDTH_40: - /* content channel 2 */ - IWL_MLD_ENC_EHT_RU(2_1_1, B1); - fallthrough; - case RATE_MCS_CHAN_WIDTH_20: - IWL_MLD_ENC_EHT_RU(1_1_1, A1); - break; - } - } else { - __le32 usig_a1 = phy_data->rx_vec[0]; - __le32 usig_a2 = phy_data->rx_vec[1]; - - IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a1, - IWL_RX_USIG_A1_DISREGARD, - IEEE80211_RADIOTAP_EHT_USIG1_MU_B20_B24_DISREGARD); - IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a1, - IWL_RX_USIG_A1_VALIDATE, - IEEE80211_RADIOTAP_EHT_USIG1_MU_B25_VALIDATE); - IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a2, - IWL_RX_USIG_A2_EHT_PPDU_TYPE, - IEEE80211_RADIOTAP_EHT_USIG2_MU_B0_B1_PPDU_TYPE); - IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a2, - IWL_RX_USIG_A2_EHT_USIG2_VALIDATE_B2, - IEEE80211_RADIOTAP_EHT_USIG2_MU_B2_VALIDATE); - IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a2, - IWL_RX_USIG_A2_EHT_PUNC_CHANNEL, - IEEE80211_RADIOTAP_EHT_USIG2_MU_B3_B7_PUNCTURED_INFO); - IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a2, - IWL_RX_USIG_A2_EHT_USIG2_VALIDATE_B8, - IEEE80211_RADIOTAP_EHT_USIG2_MU_B8_VALIDATE); - IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a2, - IWL_RX_USIG_A2_EHT_SIG_MCS, - IEEE80211_RADIOTAP_EHT_USIG2_MU_B9_B10_SIG_MCS); - IWL_MLD_ENC_USIG_VALUE_MASK - (usig, usig_a2, IWL_RX_USIG_A2_EHT_SIG_SYM_NUM, - IEEE80211_RADIOTAP_EHT_USIG2_MU_B11_B15_EHT_SIG_SYMBOLS); - IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a2, - IWL_RX_USIG_A2_EHT_CRC_OK, - IEEE80211_RADIOTAP_EHT_USIG2_MU_B16_B19_CRC); - } -} + usig->common |= LE32_DEC_ENC(usig_a1, + OFDM_RX_FRAME_ENHANCED_WIFI_TXOP_DURATION, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP); -static void iwl_mld_decode_eht_ext_tb(struct iwl_mld *mld, - struct iwl_mld_rx_phy_data *phy_data, - struct ieee80211_rx_status *rx_status, - struct ieee80211_radiotap_eht *eht, - struct ieee80211_radiotap_eht_usig *usig) -{ - if (phy_data->with_data) { - __le32 data5 = phy_data->data5; - - IWL_MLD_ENC_USIG_VALUE_MASK(usig, data5, - IWL_RX_PHY_DATA5_EHT_TYPE_AND_COMP, - IEEE80211_RADIOTAP_EHT_USIG2_TB_B0_B1_PPDU_TYPE); - IWL_MLD_ENC_USIG_VALUE_MASK(usig, data5, - IWL_RX_PHY_DATA5_EHT_TB_SPATIAL_REUSE1, - IEEE80211_RADIOTAP_EHT_USIG2_TB_B3_B6_SPATIAL_REUSE_1); - - IWL_MLD_ENC_USIG_VALUE_MASK(usig, data5, - IWL_RX_PHY_DATA5_EHT_TB_SPATIAL_REUSE2, - IEEE80211_RADIOTAP_EHT_USIG2_TB_B7_B10_SPATIAL_REUSE_2); - } else { - __le32 usig_a1 = phy_data->rx_vec[0]; - __le32 usig_a2 = phy_data->rx_vec[1]; - - IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a1, - IWL_RX_USIG_A1_DISREGARD, - IEEE80211_RADIOTAP_EHT_USIG1_TB_B20_B25_DISREGARD); - IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a2, - IWL_RX_USIG_A2_EHT_PPDU_TYPE, - IEEE80211_RADIOTAP_EHT_USIG2_TB_B0_B1_PPDU_TYPE); - IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a2, - IWL_RX_USIG_A2_EHT_USIG2_VALIDATE_B2, - IEEE80211_RADIOTAP_EHT_USIG2_TB_B2_VALIDATE); - IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a2, - IWL_RX_USIG_A2_EHT_TRIG_SPATIAL_REUSE_1, - IEEE80211_RADIOTAP_EHT_USIG2_TB_B3_B6_SPATIAL_REUSE_1); - IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a2, - IWL_RX_USIG_A2_EHT_TRIG_SPATIAL_REUSE_2, - IEEE80211_RADIOTAP_EHT_USIG2_TB_B7_B10_SPATIAL_REUSE_2); - IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a2, - IWL_RX_USIG_A2_EHT_TRIG_USIG2_DISREGARD, - IEEE80211_RADIOTAP_EHT_USIG2_TB_B11_B15_DISREGARD); - IWL_MLD_ENC_USIG_VALUE_MASK(usig, usig_a2, - IWL_RX_USIG_A2_EHT_CRC_OK, - IEEE80211_RADIOTAP_EHT_USIG2_TB_B16_B19_CRC); - } + if (!le32_get_bits(usig_a2, OFDM_RX_USIG_CRC_OK)) + usig->common |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_USIG_COMMON_BAD_USIG_CRC); + + usig->common |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_USIG_COMMON_PHY_VER_KNOWN); + usig->common |= LE32_DEC_ENC(usig_a1, + OFDM_RX_FRAME_ENHANCED_WIFI_VER_ID, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_PHY_VER); + + if (he_type == RATE_MCS_HE_TYPE_TRIG) + iwl_mld_decode_eht_usig_tb(phy_data, usig); + else + iwl_mld_decode_eht_usig_non_tb(phy_data, usig); } -static void iwl_mld_decode_eht_ru(struct iwl_mld *mld, - struct ieee80211_rx_status *rx_status, - struct ieee80211_radiotap_eht *eht) +static void +iwl_mld_eht_set_ru_alloc(struct ieee80211_rx_status *rx_status, + u32 ru_with_p80) { - u32 ru = le32_get_bits(eht->data[8], - IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_B7_B1); enum nl80211_eht_ru_alloc nl_ru; + u32 ru = ru_with_p80 >> 1; - /* Using D1.5 Table 9-53a - Encoding of PS160 and RU Allocation subfields - * in an EHT variant User Info field + /* + * HW always uses trigger frame format: + * + * Draft PIEEE802.11be D7.0 Table 9-46l - Encoding of the PS160 and + * RU Allocation subfields in an EHT variant User Info field */ switch (ru) { @@ -929,135 +877,228 @@ static void iwl_mld_decode_eht_ru(struct iwl_mld *mld, rx_status->eht.ru = nl_ru; } -static void iwl_mld_decode_eht_phy_data(struct iwl_mld *mld, - struct iwl_mld_rx_phy_data *phy_data, - struct ieee80211_rx_status *rx_status, - struct ieee80211_radiotap_eht *eht, - struct ieee80211_radiotap_eht_usig *usig) - +static void iwl_mld_decode_eht_tb(struct iwl_mld_rx_phy_data *phy_data, + struct ieee80211_rx_status *rx_status, + struct ieee80211_radiotap_eht *eht) { - __le32 data0 = phy_data->data0; - __le32 data1 = phy_data->data1; - __le32 usig_a1 = phy_data->rx_vec[0]; - u8 info_type = phy_data->info_type; - - /* Not in EHT range */ - if (info_type < IWL_RX_PHY_INFO_TYPE_EHT_MU || - info_type > IWL_RX_PHY_INFO_TYPE_EHT_TB_EXT) + if (!(phy_data->ntfy->flags & IWL_SNIF_FLAG_VALID_TB_RX)) return; - usig->common |= cpu_to_le32 - (IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL_KNOWN | - IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR_KNOWN); - if (phy_data->with_data) { - usig->common |= LE32_DEC_ENC(data0, - IWL_RX_PHY_DATA0_EHT_UPLINK, - IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL); - usig->common |= LE32_DEC_ENC(data0, - IWL_RX_PHY_DATA0_EHT_BSS_COLOR_MASK, - IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR); - } else { - usig->common |= LE32_DEC_ENC(usig_a1, - IWL_RX_USIG_A1_UL_FLAG, - IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL); - usig->common |= LE32_DEC_ENC(usig_a1, - IWL_RX_USIG_A1_BSS_COLOR, - IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR); - } + eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_RU_ALLOC_TB_FMT | + IEEE80211_RADIOTAP_EHT_KNOWN_LDPC_EXTRA_SYM_OM | + IEEE80211_RADIOTAP_EHT_KNOWN_PRE_PADD_FACOR_OM | + IEEE80211_RADIOTAP_EHT_KNOWN_PE_DISAMBIGUITY_OM | + IEEE80211_RADIOTAP_EHT_KNOWN_EHT_LTF | + IEEE80211_RADIOTAP_EHT_KNOWN_PRIMARY_80); - usig->common |= - cpu_to_le32(IEEE80211_RADIOTAP_EHT_USIG_COMMON_VALIDATE_BITS_CHECKED); - usig->common |= - LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_VALIDATE, - IEEE80211_RADIOTAP_EHT_USIG_COMMON_VALIDATE_BITS_OK); - - eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_SPATIAL_REUSE); - eht->data[0] |= LE32_DEC_ENC(data0, - IWL_RX_PHY_DATA0_ETH_SPATIAL_REUSE_MASK, - IEEE80211_RADIOTAP_EHT_DATA0_SPATIAL_REUSE); - - /* All RU allocating size/index is in TB format */ - eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_RU_ALLOC_TB_FMT); - eht->data[8] |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_PS160, + eht->data[8] |= LE32_DEC_ENC(phy_data->ntfy->sigs.eht_tb.tb_rx0, + OFDM_UCODE_TRIG_BASE_PS160, IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_PS_160); - eht->data[8] |= LE32_DEC_ENC(data1, IWL_RX_PHY_DATA1_EHT_RU_ALLOC_B0, - IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_B0); - eht->data[8] |= LE32_DEC_ENC(data1, IWL_RX_PHY_DATA1_EHT_RU_ALLOC_B1_B7, + eht->data[8] |= LE32_DEC_ENC(phy_data->ntfy->sigs.eht_tb.tb_rx1, + OFDM_UCODE_TRIG_BASE_RX_RU, + IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_B0 | IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_B7_B1); + eht->data[0] |= LE32_DEC_ENC(phy_data->ntfy->sigs.eht_tb.tb_rx1, + OFDM_UCODE_TRIG_BASE_RX_CODING_EXTRA_SYM, + IEEE80211_RADIOTAP_EHT_DATA0_LDPC_EXTRA_SYM_OM); + eht->data[0] |= LE32_DEC_ENC(phy_data->ntfy->sigs.eht_tb.tb_rx1, + OFDM_UCODE_TRIG_BASE_RX_PRE_FEC_PAD_FACTOR, + IEEE80211_RADIOTAP_EHT_DATA0_PRE_PADD_FACOR_OM); + eht->data[0] |= LE32_DEC_ENC(phy_data->ntfy->sigs.eht_tb.tb_rx1, + OFDM_UCODE_TRIG_BASE_RX_PE_DISAMBIG, + IEEE80211_RADIOTAP_EHT_DATA0_PE_DISAMBIGUITY_OM); + eht->data[0] |= LE32_DEC_ENC(phy_data->ntfy->sigs.eht_tb.tb_rx1, + OFDM_UCODE_TRIG_BASE_RX_NUM_OF_LTF_SYM, + IEEE80211_RADIOTAP_EHT_DATA0_EHT_LTF); + eht->data[1] |= LE32_DEC_ENC(phy_data->ntfy->sigs.eht_tb.tb_rx0, + OFDM_UCODE_TRIG_BASE_RX_RU_P80, + IEEE80211_RADIOTAP_EHT_DATA1_PRIMARY_80); - iwl_mld_decode_eht_ru(mld, rx_status, eht); - - /* We only get here in case of IWL_RX_MPDU_PHY_TSF_OVERLOAD is set - * which is on only in case of monitor mode so no need to check monitor - * mode - */ - eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_PRIMARY_80); - eht->data[1] |= - le32_encode_bits(mld->monitor.p80, - IEEE80211_RADIOTAP_EHT_DATA1_PRIMARY_80); - - usig->common |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP_KNOWN); - if (phy_data->with_data) - usig->common |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_TXOP_DUR_MASK, - IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP); - else - usig->common |= LE32_DEC_ENC(usig_a1, IWL_RX_USIG_A1_TXOP_DURATION, - IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP); + iwl_mld_eht_set_ru_alloc(rx_status, + le32_get_bits(phy_data->ntfy->sigs.eht_tb.tb_rx1, + OFDM_UCODE_TRIG_BASE_RX_RU)); +} - eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_LDPC_EXTRA_SYM_OM); - eht->data[0] |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_LDPC_EXT_SYM, - IEEE80211_RADIOTAP_EHT_DATA0_LDPC_EXTRA_SYM_OM); +static void iwl_mld_eht_decode_user_ru(struct iwl_mld_rx_phy_data *phy_data, + struct ieee80211_radiotap_eht *eht) +{ + u32 phy_bw = phy_data->rate_n_flags & RATE_MCS_CHAN_WIDTH_MSK; - eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_PRE_PADD_FACOR_OM); - eht->data[0] |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_PRE_FEC_PAD_MASK, - IEEE80211_RADIOTAP_EHT_DATA0_PRE_PADD_FACOR_OM); + if (!(phy_data->ntfy->flags & IWL_SNIF_FLAG_VALID_RU)) + return; - eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_PE_DISAMBIGUITY_OM); - eht->data[0] |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_PE_DISAMBIG, - IEEE80211_RADIOTAP_EHT_DATA0_PE_DISAMBIGUITY_OM); +#define __IWL_MLD_ENC_EHT_RU(rt_data, rt_ru, fw_data, fw_ru) \ + eht->data[(rt_data)] |= \ + (cpu_to_le32(IEEE80211_RADIOTAP_EHT_DATA ## rt_data ## _RU_ALLOC_CC_ ## rt_ru ## _KNOWN) | \ + LE32_DEC_ENC(phy_data->ntfy->sigs.eht.cmn[fw_data], \ + OFDM_RX_FRAME_EHT_RU_ALLOC_ ## fw_data ## _ ## fw_ru, \ + IEEE80211_RADIOTAP_EHT_DATA ## rt_data ## _RU_ALLOC_CC_ ## rt_ru)) - /* TODO: what about IWL_RX_PHY_DATA0_EHT_BW320_SLOT */ +#define _IWL_MLD_ENC_EHT_RU(rt_data, rt_ru, fw_data, fw_ru) \ + __IWL_MLD_ENC_EHT_RU(rt_data, rt_ru, fw_data, fw_ru) - if (!le32_get_bits(data0, IWL_RX_PHY_DATA0_EHT_SIGA_CRC_OK)) - usig->common |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_USIG_COMMON_BAD_USIG_CRC); +#define IEEE80211_RADIOTAP_RU_DATA_1_1_1 1 +#define IEEE80211_RADIOTAP_RU_DATA_2_1_1 2 +#define IEEE80211_RADIOTAP_RU_DATA_1_1_2 2 +#define IEEE80211_RADIOTAP_RU_DATA_2_1_2 2 +#define IEEE80211_RADIOTAP_RU_DATA_1_2_1 3 +#define IEEE80211_RADIOTAP_RU_DATA_2_2_1 3 +#define IEEE80211_RADIOTAP_RU_DATA_1_2_2 3 +#define IEEE80211_RADIOTAP_RU_DATA_2_2_2 4 +#define IEEE80211_RADIOTAP_RU_DATA_1_2_3 4 +#define IEEE80211_RADIOTAP_RU_DATA_2_2_3 4 +#define IEEE80211_RADIOTAP_RU_DATA_1_2_4 5 +#define IEEE80211_RADIOTAP_RU_DATA_2_2_4 5 +#define IEEE80211_RADIOTAP_RU_DATA_1_2_5 5 +#define IEEE80211_RADIOTAP_RU_DATA_2_2_5 6 +#define IEEE80211_RADIOTAP_RU_DATA_1_2_6 6 +#define IEEE80211_RADIOTAP_RU_DATA_2_2_6 6 + +#define IWL_RX_RU_DATA_A1 0 +#define IWL_RX_RU_DATA_A2 0 +#define IWL_RX_RU_DATA_A3 0 +#define IWL_RX_RU_DATA_A4 4 +#define IWL_RX_RU_DATA_B1 1 +#define IWL_RX_RU_DATA_B2 1 +#define IWL_RX_RU_DATA_B3 1 +#define IWL_RX_RU_DATA_B4 4 +#define IWL_RX_RU_DATA_C1 2 +#define IWL_RX_RU_DATA_C2 2 +#define IWL_RX_RU_DATA_C3 2 +#define IWL_RX_RU_DATA_C4 5 +#define IWL_RX_RU_DATA_D1 3 +#define IWL_RX_RU_DATA_D2 3 +#define IWL_RX_RU_DATA_D3 3 +#define IWL_RX_RU_DATA_D4 5 - usig->common |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_USIG_COMMON_PHY_VER_KNOWN); - usig->common |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_PHY_VER, - IEEE80211_RADIOTAP_EHT_USIG_COMMON_PHY_VER); +#define IWL_MLD_ENC_EHT_RU(rt_ru, fw_ru) \ + _IWL_MLD_ENC_EHT_RU(IEEE80211_RADIOTAP_RU_DATA_ ## rt_ru, \ + rt_ru, \ + IWL_RX_RU_DATA_ ## fw_ru, \ + fw_ru) /* - * TODO: what about TB - IWL_RX_PHY_DATA1_EHT_TB_PILOT_TYPE, - * IWL_RX_PHY_DATA1_EHT_TB_LOW_SS + * Hardware labels the content channels/RU allocation values + * as follows: + * + * Content Channel 1 Content Channel 2 + * 20 MHz: A1 + * 40 MHz: A1 B1 + * 80 MHz: A1 C1 B1 D1 + * 160 MHz: A1 C1 A2 C2 B1 D1 B2 D2 + * 320 MHz: A1 C1 A2 C2 A3 C3 A4 C4 B1 D1 B2 D2 B3 D3 B4 D4 */ - eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_EHT_LTF); - eht->data[0] |= LE32_DEC_ENC(data1, IWL_RX_PHY_DATA1_EHT_SIG_LTF_NUM, + switch (phy_bw) { + case RATE_MCS_CHAN_WIDTH_320: + /* content channel 1 */ + IWL_MLD_ENC_EHT_RU(1_2_3, A3); + IWL_MLD_ENC_EHT_RU(1_2_4, C3); + IWL_MLD_ENC_EHT_RU(1_2_5, A4); + IWL_MLD_ENC_EHT_RU(1_2_6, C4); + /* content channel 2 */ + IWL_MLD_ENC_EHT_RU(2_2_3, B3); + IWL_MLD_ENC_EHT_RU(2_2_4, D3); + IWL_MLD_ENC_EHT_RU(2_2_5, B4); + IWL_MLD_ENC_EHT_RU(2_2_6, D4); + fallthrough; + case RATE_MCS_CHAN_WIDTH_160: + /* content channel 1 */ + IWL_MLD_ENC_EHT_RU(1_2_1, A2); + IWL_MLD_ENC_EHT_RU(1_2_2, C2); + /* content channel 2 */ + IWL_MLD_ENC_EHT_RU(2_2_1, B2); + IWL_MLD_ENC_EHT_RU(2_2_2, D2); + fallthrough; + case RATE_MCS_CHAN_WIDTH_80: + /* content channel 1 */ + IWL_MLD_ENC_EHT_RU(1_1_2, C1); + /* content channel 2 */ + IWL_MLD_ENC_EHT_RU(2_1_2, D1); + fallthrough; + case RATE_MCS_CHAN_WIDTH_40: + /* content channel 2 */ + IWL_MLD_ENC_EHT_RU(2_1_1, B1); + fallthrough; + case RATE_MCS_CHAN_WIDTH_20: + /* content channel 1 */ + IWL_MLD_ENC_EHT_RU(1_1_1, A1); + break; + } +} + +static void iwl_mld_decode_eht_non_tb(struct iwl_mld_rx_phy_data *phy_data, + struct ieee80211_rx_status *rx_status, + struct ieee80211_radiotap_eht *eht) +{ + eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_SPATIAL_REUSE | + /* All RU allocating size/index is in TB format */ + IEEE80211_RADIOTAP_EHT_KNOWN_RU_ALLOC_TB_FMT | + IEEE80211_RADIOTAP_EHT_KNOWN_LDPC_EXTRA_SYM_OM | + IEEE80211_RADIOTAP_EHT_KNOWN_PRE_PADD_FACOR_OM | + IEEE80211_RADIOTAP_EHT_KNOWN_PE_DISAMBIGUITY_OM | + IEEE80211_RADIOTAP_EHT_KNOWN_EHT_LTF | + IEEE80211_RADIOTAP_EHT_KNOWN_PRIMARY_80 | + IEEE80211_RADIOTAP_EHT_KNOWN_NR_NON_OFDMA_USERS_M); + + eht->data[0] |= LE32_DEC_ENC(phy_data->ntfy->sigs.eht.b1, + OFDM_RX_FRAME_EHT_SPATIAL_REUSE, + IEEE80211_RADIOTAP_EHT_DATA0_SPATIAL_REUSE); + eht->data[8] |= LE32_DEC_ENC(phy_data->ntfy->sigs.eht.b2, + OFDM_RX_FRAME_EHT_STA_RU_PS160, + IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_PS_160); + eht->data[8] |= LE32_DEC_ENC(phy_data->ntfy->sigs.eht.b2, + OFDM_RX_FRAME_EHT_STA_RU, + IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_B0 | + IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_B7_B1); + eht->data[0] |= LE32_DEC_ENC(phy_data->ntfy->sigs.eht.b1, + OFDM_RX_FRAME_EHT_CODING_EXTRA_SYM, + IEEE80211_RADIOTAP_EHT_DATA0_LDPC_EXTRA_SYM_OM); + eht->data[0] |= LE32_DEC_ENC(phy_data->ntfy->sigs.eht.b1, + OFDM_RX_FRAME_EHT_PRE_FEC_PAD_FACTOR, + IEEE80211_RADIOTAP_EHT_DATA0_PRE_PADD_FACOR_OM); + eht->data[0] |= LE32_DEC_ENC(phy_data->ntfy->sigs.eht.b1, + OFDM_RX_FRAME_EHT_PE_DISAMBIG, + IEEE80211_RADIOTAP_EHT_DATA0_PE_DISAMBIGUITY_OM); + eht->data[0] |= LE32_DEC_ENC(phy_data->ntfy->sigs.eht.b1, + OFDM_RX_FRAME_EHT_NUM_OF_LTF_SYM, IEEE80211_RADIOTAP_EHT_DATA0_EHT_LTF); + eht->data[1] |= LE32_DEC_ENC(phy_data->ntfy->sigs.eht.b2, + OFDM_RX_FRAME_EHT_STA_RU_P80, + IEEE80211_RADIOTAP_EHT_DATA1_PRIMARY_80); + eht->data[7] |= LE32_DEC_ENC(phy_data->ntfy->sigs.eht.b1, + OFDM_RX_FRAME_EHT_NUM_OF_USERS, + IEEE80211_RADIOTAP_EHT_DATA7_NUM_OF_NON_OFDMA_USERS); + + iwl_mld_eht_decode_user_ru(phy_data, eht); + + iwl_mld_eht_set_ru_alloc(rx_status, + le32_get_bits(phy_data->ntfy->sigs.eht.b2, + OFDM_RX_FRAME_EHT_STA_RU)); +} - if (info_type == IWL_RX_PHY_INFO_TYPE_EHT_TB_EXT || - info_type == IWL_RX_PHY_INFO_TYPE_EHT_TB) - iwl_mld_decode_eht_ext_tb(mld, phy_data, rx_status, eht, usig); +static void iwl_mld_decode_eht_phy_data(struct iwl_mld_rx_phy_data *phy_data, + struct ieee80211_rx_status *rx_status, + struct ieee80211_radiotap_eht *eht) +{ + u32 he_type = phy_data->rate_n_flags & RATE_MCS_HE_TYPE_MSK; - if (info_type == IWL_RX_PHY_INFO_TYPE_EHT_MU_EXT || - info_type == IWL_RX_PHY_INFO_TYPE_EHT_MU) - iwl_mld_decode_eht_ext_mu(mld, phy_data, rx_status, eht, usig); + if (he_type == RATE_MCS_HE_TYPE_TRIG) + iwl_mld_decode_eht_tb(phy_data, rx_status, eht); + else + iwl_mld_decode_eht_non_tb(phy_data, rx_status, eht); } static void iwl_mld_rx_eht(struct iwl_mld *mld, struct sk_buff *skb, - struct iwl_mld_rx_phy_data *phy_data, - int queue) + struct iwl_mld_rx_phy_data *phy_data) { struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); struct ieee80211_radiotap_eht *eht; - struct ieee80211_radiotap_eht_usig *usig; size_t eht_len = sizeof(*eht); - u32 rate_n_flags = phy_data->rate_n_flags; u32 he_type = rate_n_flags & RATE_MCS_HE_TYPE_MSK; /* EHT and HE have the same values for LTF */ u8 ltf = IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_UNKNOWN; - u16 phy_info = phy_data->phy_info; - u32 bw; /* u32 for 1 user_info */ if (phy_data->with_data) @@ -1065,50 +1106,7 @@ static void iwl_mld_rx_eht(struct iwl_mld *mld, struct sk_buff *skb, eht = iwl_mld_radiotap_put_tlv(skb, IEEE80211_RADIOTAP_EHT, eht_len); - usig = iwl_mld_radiotap_put_tlv(skb, IEEE80211_RADIOTAP_EHT_USIG, - sizeof(*usig)); rx_status->flag |= RX_FLAG_RADIOTAP_TLV_AT_END; - usig->common |= - cpu_to_le32(IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_KNOWN); - - /* specific handling for 320MHz */ - bw = u32_get_bits(rate_n_flags, RATE_MCS_CHAN_WIDTH_MSK); - if (bw == RATE_MCS_CHAN_WIDTH_320_VAL) - bw += le32_get_bits(phy_data->data0, - IWL_RX_PHY_DATA0_EHT_BW320_SLOT); - - usig->common |= cpu_to_le32 - (FIELD_PREP(IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW, bw)); - - /* report the AMPDU-EOF bit on single frames */ - if (!queue && !(phy_info & IWL_RX_MPDU_PHY_AMPDU)) { - rx_status->flag |= RX_FLAG_AMPDU_DETAILS; - rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT_KNOWN; - if (phy_data->data0 & - cpu_to_le32(IWL_RX_PHY_DATA0_EHT_DELIM_EOF)) - rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT; - } - - /* update aggregation data for monitor sake on default queue */ - if (!queue && (phy_info & IWL_RX_MPDU_PHY_TSF_OVERLOAD) && - (phy_info & IWL_RX_MPDU_PHY_AMPDU) && phy_data->first_subframe) { - rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT_KNOWN; - if (phy_data->data0 & - cpu_to_le32(IWL_RX_PHY_DATA0_EHT_DELIM_EOF)) - rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT; - } - - if (phy_info & IWL_RX_MPDU_PHY_TSF_OVERLOAD) - iwl_mld_decode_eht_phy_data(mld, phy_data, rx_status, eht, usig); - -#define CHECK_TYPE(F) \ - BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA1_FORMAT_ ## F != \ - (RATE_MCS_HE_TYPE_ ## F >> RATE_MCS_HE_TYPE_POS)) - - CHECK_TYPE(SU); - CHECK_TYPE(EXT_SU); - CHECK_TYPE(MU); - CHECK_TYPE(TRIG); switch (u32_get_bits(rate_n_flags, RATE_MCS_HE_GI_LTF_MSK)) { case 0: @@ -1144,20 +1142,18 @@ static void iwl_mld_rx_eht(struct iwl_mld *mld, struct sk_buff *skb, if (ltf != IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_UNKNOWN) { eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_GI); - eht->data[0] |= cpu_to_le32 - (FIELD_PREP(IEEE80211_RADIOTAP_EHT_DATA0_LTF, - ltf) | - FIELD_PREP(IEEE80211_RADIOTAP_EHT_DATA0_GI, - rx_status->eht.gi)); + eht->data[0] |= le32_encode_bits(ltf, + IEEE80211_RADIOTAP_EHT_DATA0_LTF) | + le32_encode_bits(rx_status->eht.gi, + IEEE80211_RADIOTAP_EHT_DATA0_GI); } if (!phy_data->with_data) { eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_NSS_S | IEEE80211_RADIOTAP_EHT_KNOWN_BEAMFORMED_S); - eht->data[7] |= - le32_encode_bits(le32_get_bits(phy_data->rx_vec[2], - RX_NO_DATA_RX_VEC2_EHT_NSTS_MSK), - IEEE80211_RADIOTAP_EHT_DATA7_NSS_S); + eht->data[7] |= LE32_DEC_ENC(phy_data->ntfy->sigs.eht.b1, + OFDM_RX_FRAME_EHT_NSTS, + IEEE80211_RADIOTAP_EHT_DATA7_NSS_S); if (rate_n_flags & RATE_MCS_BF_MSK) eht->data[7] |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_DATA7_BEAMFORMED_S); @@ -1177,14 +1173,28 @@ static void iwl_mld_rx_eht(struct iwl_mld *mld, struct sk_buff *skb, eht->user_info[0] |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_USER_INFO_CODING); - eht->user_info[0] |= cpu_to_le32 - (FIELD_PREP(IEEE80211_RADIOTAP_EHT_USER_INFO_MCS, - u32_get_bits(rate_n_flags, - RATE_VHT_MCS_RATE_CODE_MSK)) | - FIELD_PREP(IEEE80211_RADIOTAP_EHT_USER_INFO_NSS_O, - u32_get_bits(rate_n_flags, - RATE_MCS_NSS_MSK))); + eht->user_info[0] |= + le32_encode_bits(u32_get_bits(rate_n_flags, + RATE_VHT_MCS_RATE_CODE_MSK), + IEEE80211_RADIOTAP_EHT_USER_INFO_MCS) | + le32_encode_bits(u32_get_bits(rate_n_flags, + RATE_MCS_NSS_MSK), + IEEE80211_RADIOTAP_EHT_USER_INFO_NSS_O); } + + if (likely(!phy_data->ntfy)) + return; + + if (phy_data->with_data) { + eht->user_info[0] |= + cpu_to_le32(IEEE80211_RADIOTAP_EHT_USER_INFO_STA_ID_KNOWN) | + LE32_DEC_ENC(phy_data->ntfy->sigs.eht.user_id, + OFDM_RX_FRAME_EHT_USER_FIELD_ID, + IEEE80211_RADIOTAP_EHT_USER_INFO_STA_ID); + } + + iwl_mld_decode_eht_usig(phy_data, skb); + iwl_mld_decode_eht_phy_data(phy_data, rx_status, eht); } #ifdef CONFIG_IWLWIFI_DEBUGFS @@ -1219,34 +1229,30 @@ static void iwl_mld_add_rtap_sniffer_config(struct iwl_mld *mld, } #endif -/* Note: hdr can be NULL */ -static void iwl_mld_rx_fill_status(struct iwl_mld *mld, int link_id, - struct ieee80211_hdr *hdr, - struct sk_buff *skb, - struct iwl_mld_rx_phy_data *phy_data, - int queue) +static void +iwl_mld_set_rx_nonlegacy_rate_info(u32 rate_n_flags, + struct ieee80211_rx_status *rx_status) { - struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); - u32 format = phy_data->rate_n_flags & RATE_MCS_MOD_TYPE_MSK; - u32 rate_n_flags = phy_data->rate_n_flags; u8 stbc = u32_get_bits(rate_n_flags, RATE_MCS_STBC_MSK); - bool is_sgi = rate_n_flags & RATE_MCS_SGI_MSK; - - phy_data->info_type = IWL_RX_PHY_INFO_TYPE_NONE; - if (phy_data->phy_info & IWL_RX_MPDU_PHY_TSF_OVERLOAD) - phy_data->info_type = - le32_get_bits(phy_data->data1, - IWL_RX_PHY_DATA1_INFO_TYPE_MASK); - - /* set the preamble flag if appropriate */ - if (format == RATE_MCS_MOD_TYPE_CCK && - phy_data->phy_info & IWL_RX_MPDU_PHY_SHORT_PREAMBLE) - rx_status->enc_flags |= RX_ENC_FLAG_SHORTPRE; + /* NSS may be overridden by PHY ntfy with full value */ + rx_status->nss = u32_get_bits(rate_n_flags, RATE_MCS_NSS_MSK) + 1; + rx_status->rate_idx = rate_n_flags & RATE_MCS_CODE_MSK; + rx_status->enc_flags |= stbc << RX_ENC_FLAG_STBC_SHIFT; + if (rate_n_flags & RATE_MCS_LDPC_MSK) + rx_status->enc_flags |= RX_ENC_FLAG_LDPC; +} - iwl_mld_fill_signal(mld, link_id, hdr, rx_status, phy_data); +static void iwl_mld_set_rx_rate(struct iwl_mld *mld, + struct iwl_mld_rx_phy_data *phy_data, + struct ieee80211_rx_status *rx_status) +{ + u32 rate_n_flags = phy_data->rate_n_flags; + u8 stbc = u32_get_bits(rate_n_flags, RATE_MCS_STBC_MSK); + u32 format = rate_n_flags & RATE_MCS_MOD_TYPE_MSK; + bool is_sgi = rate_n_flags & RATE_MCS_SGI_MSK; - /* This may be overridden by iwl_mld_rx_he() to HE_RU */ + /* bandwidth may be overridden to RU by PHY ntfy */ switch (rate_n_flags & RATE_MCS_CHAN_WIDTH_MSK) { case RATE_MCS_CHAN_WIDTH_20: break; @@ -1264,66 +1270,12 @@ static void iwl_mld_rx_fill_status(struct iwl_mld *mld, int link_id, break; } - /* must be before L-SIG data */ - if (format == RATE_MCS_MOD_TYPE_HE) - iwl_mld_rx_he(mld, skb, phy_data, queue); - - iwl_mld_decode_lsig(skb, phy_data); - - rx_status->device_timestamp = phy_data->gp2_on_air_rise; - - /* using TLV format and must be after all fixed len fields */ - if (format == RATE_MCS_MOD_TYPE_EHT) - iwl_mld_rx_eht(mld, skb, phy_data, queue); - -#ifdef CONFIG_IWLWIFI_DEBUGFS - if (unlikely(mld->monitor.on)) { - iwl_mld_add_rtap_sniffer_config(mld, skb); - - if (mld->monitor.ptp_time) { - u64 adj_time = - iwl_mld_ptp_get_adj_time(mld, - phy_data->gp2_on_air_rise * - NSEC_PER_USEC); - - rx_status->mactime = div64_u64(adj_time, NSEC_PER_USEC); - rx_status->flag |= RX_FLAG_MACTIME_IS_RTAP_TS64; - rx_status->flag &= ~RX_FLAG_MACTIME; - } - } -#endif - - if (format != RATE_MCS_MOD_TYPE_CCK && is_sgi) - rx_status->enc_flags |= RX_ENC_FLAG_SHORT_GI; - - if (rate_n_flags & RATE_MCS_LDPC_MSK) - rx_status->enc_flags |= RX_ENC_FLAG_LDPC; - switch (format) { - case RATE_MCS_MOD_TYPE_HT: - rx_status->encoding = RX_ENC_HT; - rx_status->rate_idx = RATE_HT_MCS_INDEX(rate_n_flags); - rx_status->enc_flags |= stbc << RX_ENC_FLAG_STBC_SHIFT; - break; - case RATE_MCS_MOD_TYPE_VHT: - case RATE_MCS_MOD_TYPE_HE: - case RATE_MCS_MOD_TYPE_EHT: - if (format == RATE_MCS_MOD_TYPE_VHT) { - rx_status->encoding = RX_ENC_VHT; - } else if (format == RATE_MCS_MOD_TYPE_HE) { - rx_status->encoding = RX_ENC_HE; - rx_status->he_dcm = - !!(rate_n_flags & RATE_HE_DUAL_CARRIER_MODE_MSK); - } else if (format == RATE_MCS_MOD_TYPE_EHT) { - rx_status->encoding = RX_ENC_EHT; - } - - rx_status->nss = u32_get_bits(rate_n_flags, - RATE_MCS_NSS_MSK) + 1; - rx_status->rate_idx = rate_n_flags & RATE_MCS_CODE_MSK; - rx_status->enc_flags |= stbc << RX_ENC_FLAG_STBC_SHIFT; - break; - default: { + case RATE_MCS_MOD_TYPE_CCK: + if (phy_data->phy_info & IWL_RX_MPDU_PHY_SHORT_PREAMBLE) + rx_status->enc_flags |= RX_ENC_FLAG_SHORTPRE; + fallthrough; + case RATE_MCS_MOD_TYPE_LEGACY_OFDM: { int rate = iwl_mld_legacy_hw_idx_to_mac80211_idx(rate_n_flags, rx_status->band); @@ -1337,12 +1289,88 @@ static void iwl_mld_rx_fill_status(struct iwl_mld *mld, int link_id, /* invalid rate */ rx_status->rate_idx = 0; - if (net_ratelimit()) + /* + * In monitor mode we can see CCK frames on 5 or 6 GHz, usually + * just the (possibly malformed) PHY header by accident, since + * the decoder doesn't seem to turn off CCK. We cannot correctly + * encode the rate to mac80211 (and therefore not in radiotap) + * since we give the per-band index which doesn't cover those + * rates. + */ + if (!mld->monitor.on && net_ratelimit()) IWL_ERR(mld, "invalid rate_n_flags=0x%x, band=%d\n", rate_n_flags, rx_status->band); break; } + case RATE_MCS_MOD_TYPE_HT: + rx_status->encoding = RX_ENC_HT; + rx_status->rate_idx = RATE_HT_MCS_INDEX(rate_n_flags); + rx_status->enc_flags |= stbc << RX_ENC_FLAG_STBC_SHIFT; + break; + case RATE_MCS_MOD_TYPE_VHT: + rx_status->encoding = RX_ENC_VHT; + iwl_mld_set_rx_nonlegacy_rate_info(rate_n_flags, rx_status); + break; + case RATE_MCS_MOD_TYPE_HE: + rx_status->encoding = RX_ENC_HE; + rx_status->he_dcm = + !!(rate_n_flags & RATE_HE_DUAL_CARRIER_MODE_MSK); + iwl_mld_set_rx_nonlegacy_rate_info(rate_n_flags, rx_status); + break; + case RATE_MCS_MOD_TYPE_EHT: + rx_status->encoding = RX_ENC_EHT; + iwl_mld_set_rx_nonlegacy_rate_info(rate_n_flags, rx_status); + break; + default: + WARN_ON_ONCE(1); + } + + if (format != RATE_MCS_MOD_TYPE_CCK && is_sgi) + rx_status->enc_flags |= RX_ENC_FLAG_SHORT_GI; +} + +/* Note: hdr can be NULL */ +static void iwl_mld_rx_fill_status(struct iwl_mld *mld, int link_id, + struct ieee80211_hdr *hdr, + struct sk_buff *skb, + struct iwl_mld_rx_phy_data *phy_data) +{ + struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); + u32 rate_n_flags = phy_data->rate_n_flags; + u32 format = rate_n_flags & RATE_MCS_MOD_TYPE_MSK; + + iwl_mld_fill_signal(mld, link_id, hdr, rx_status, phy_data); + + rx_status->device_timestamp = phy_data->gp2_on_air_rise; + + iwl_mld_set_rx_rate(mld, phy_data, rx_status); + + /* must be before L-SIG data (radiotap field order) */ + if (format == RATE_MCS_MOD_TYPE_HE) + iwl_mld_rx_he(skb, phy_data); + + iwl_mld_decode_lsig(skb, phy_data); + + /* TLVs - must be after radiotap fixed fields */ + if (format == RATE_MCS_MOD_TYPE_EHT) + iwl_mld_rx_eht(mld, skb, phy_data); + +#ifdef CONFIG_IWLWIFI_DEBUGFS + if (unlikely(mld->monitor.on)) { + iwl_mld_add_rtap_sniffer_config(mld, skb); + + if (mld->monitor.ptp_time) { + u64 adj_time = + iwl_mld_ptp_get_adj_time(mld, + phy_data->gp2_on_air_rise * + NSEC_PER_USEC); + + rx_status->mactime = div64_u64(adj_time, NSEC_PER_USEC); + rx_status->flag |= RX_FLAG_MACTIME_IS_RTAP_TS64; + rx_status->flag &= ~RX_FLAG_MACTIME; + } } +#endif } /* iwl_mld_create_skb adds the rxb to a new skb */ @@ -1763,13 +1791,36 @@ static int iwl_mld_rx_crypto(struct iwl_mld *mld, return 0; } -static void iwl_mld_rx_update_ampdu_ref(struct iwl_mld *mld, - struct iwl_mld_rx_phy_data *phy_data, - struct ieee80211_rx_status *rx_status) +static void iwl_mld_rx_update_ampdu_data(struct iwl_mld *mld, + struct iwl_mld_rx_phy_data *phy_data, + struct ieee80211_rx_status *rx_status) { + u32 format = phy_data->rate_n_flags & RATE_MCS_MOD_TYPE_MSK; bool toggle_bit = phy_data->phy_info & IWL_RX_MPDU_PHY_AMPDU_TOGGLE; + switch (format) { + case RATE_MCS_MOD_TYPE_CCK: + case RATE_MCS_MOD_TYPE_LEGACY_OFDM: + /* no aggregation possible */ + return; + case RATE_MCS_MOD_TYPE_HT: + case RATE_MCS_MOD_TYPE_VHT: + /* single frames are not A-MPDU format */ + if (!(phy_data->phy_info & IWL_RX_MPDU_PHY_AMPDU)) + return; + break; + default: + /* HE/EHT/UHR have A-MPDU format for single frames */ + if (!(phy_data->phy_info & IWL_RX_MPDU_PHY_AMPDU)) { + rx_status->flag |= RX_FLAG_AMPDU_DETAILS; + rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT_KNOWN; + if (phy_data->phy_info & IWL_RX_MPDU_PHY_EOF_INDICATION) + rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT; + return; + } + } + rx_status->flag |= RX_FLAG_AMPDU_DETAILS; /* Toggle is switched whenever new aggregation starts. Make * sure ampdu_reference is never 0 so we can later use it to @@ -1781,6 +1832,11 @@ static void iwl_mld_rx_update_ampdu_ref(struct iwl_mld *mld, mld->monitor.ampdu_ref++; mld->monitor.ampdu_toggle = toggle_bit; phy_data->first_subframe = true; + + /* report EOF bit on the first subframe */ + rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT_KNOWN; + if (phy_data->phy_info & IWL_RX_MPDU_PHY_EOF_INDICATION) + rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT; } rx_status->ampdu_reference = mld->monitor.ampdu_ref; } @@ -1824,6 +1880,8 @@ void iwl_mld_rx_mpdu(struct iwl_mld *mld, struct napi_struct *napi, "FW lied about packet len (%d)\n", pkt_len)) return; + iwl_mld_fill_phy_data_from_mpdu(mld, mpdu_desc, &phy_data); + /* Don't use dev_alloc_skb(), we'll have enough headroom once * ieee80211_hdr pulled. */ @@ -1835,8 +1893,6 @@ void iwl_mld_rx_mpdu(struct iwl_mld *mld, struct napi_struct *napi, hdr = (void *)(pkt->data + mpdu_desc_size); - iwl_mld_fill_phy_data(mld, mpdu_desc, &phy_data); - if (mpdu_desc->mac_flags2 & IWL_RX_MPDU_MFLG2_PAD) { /* If the device inserted padding it means that (it thought) * the 802.11 header wasn't a multiple of 4 bytes long. In @@ -1861,9 +1917,8 @@ void iwl_mld_rx_mpdu(struct iwl_mld *mld, struct napi_struct *napi, if (drop) goto drop; - /* update aggregation data for monitor sake on default queue */ - if (!queue && (phy_data.phy_info & IWL_RX_MPDU_PHY_AMPDU)) - iwl_mld_rx_update_ampdu_ref(mld, &phy_data, rx_status); + if (unlikely(mld->monitor.on)) + iwl_mld_rx_update_ampdu_data(mld, &phy_data, rx_status); /* Keep packets with CRC errors (and with overrun) for monitor mode * (otherwise the firmware discards them) but mark them as bad. @@ -1897,7 +1952,7 @@ void iwl_mld_rx_mpdu(struct iwl_mld *mld, struct napi_struct *napi, link_id = u8_get_bits(mpdu_desc->mac_phy_band, IWL_RX_MPDU_MAC_PHY_BAND_LINK_MASK); - iwl_mld_rx_fill_status(mld, link_id, hdr, skb, &phy_data, queue); + iwl_mld_rx_fill_status(mld, link_id, hdr, skb, &phy_data); if (iwl_mld_rx_crypto(mld, sta, hdr, rx_status, mpdu_desc, queue, le32_to_cpu(pkt->len_n_flags), &crypto_len)) @@ -2031,87 +2086,64 @@ void iwl_mld_handle_rx_queues_sync_notif(struct iwl_mld *mld, wake_up(&mld->rxq_sync.waitq); } -void iwl_mld_rx_monitor_no_data(struct iwl_mld *mld, struct napi_struct *napi, - struct iwl_rx_packet *pkt, int queue) +static void iwl_mld_no_data_rx(struct iwl_mld *mld, + struct napi_struct *napi, + struct iwl_rx_phy_air_sniffer_ntfy *ntfy) { - struct iwl_rx_no_data_ver_3 *desc; - struct iwl_mld_rx_phy_data phy_data; struct ieee80211_rx_status *rx_status; + struct iwl_mld_rx_phy_data phy_data = { + .ntfy = ntfy, + .phy_info = 0, /* short preamble set below */ + .rate_n_flags = le32_to_cpu(ntfy->rate), + .gp2_on_air_rise = le32_to_cpu(ntfy->on_air_rise_time), + .energy_a = ntfy->rssi_a, + .energy_b = ntfy->rssi_b, + }; + u32 format = phy_data.rate_n_flags & RATE_MCS_MOD_TYPE_MSK; struct sk_buff *skb; - u32 format, rssi; - u8 channel; - if (unlikely(mld->fw_status.in_hw_restart)) - return; - - if (IWL_FW_CHECK(mld, iwl_rx_packet_payload_len(pkt) < sizeof(*desc), - "Bad RX_NO_DATA_NOTIF size (%d)\n", - iwl_rx_packet_payload_len(pkt))) - return; - - desc = (void *)pkt->data; - - rssi = le32_to_cpu(desc->rssi); - channel = u32_get_bits(rssi, RX_NO_DATA_CHANNEL_MSK); - - phy_data.energy_a = u32_get_bits(rssi, RX_NO_DATA_CHAIN_A_MSK); - phy_data.energy_b = u32_get_bits(rssi, RX_NO_DATA_CHAIN_B_MSK); - phy_data.data0 = desc->phy_info[0]; - phy_data.data1 = desc->phy_info[1]; - phy_data.phy_info = IWL_RX_MPDU_PHY_TSF_OVERLOAD; - phy_data.gp2_on_air_rise = le32_to_cpu(desc->on_air_rise_time); - phy_data.rate_n_flags = iwl_v3_rate_from_v2_v3(desc->rate, - mld->fw_rates_ver_3); - phy_data.with_data = false; - - BUILD_BUG_ON(sizeof(phy_data.rx_vec) != sizeof(desc->rx_vec)); - memcpy(phy_data.rx_vec, desc->rx_vec, sizeof(phy_data.rx_vec)); - - format = phy_data.rate_n_flags & RATE_MCS_MOD_TYPE_MSK; - - /* Don't use dev_alloc_skb(), we'll have enough headroom once - * ieee80211_hdr pulled. - */ skb = alloc_skb(128, GFP_ATOMIC); - if (!skb) { - IWL_ERR(mld, "alloc_skb failed\n"); + if (!skb) return; - } rx_status = IEEE80211_SKB_RXCB(skb); /* 0-length PSDU */ rx_status->flag |= RX_FLAG_NO_PSDU; - /* mark as failed PLCP on any errors to skip checks in mac80211 */ - if (le32_get_bits(desc->info, RX_NO_DATA_INFO_ERR_MSK) != - RX_NO_DATA_INFO_ERR_NONE) - rx_status->flag |= RX_FLAG_FAILED_PLCP_CRC; - - switch (le32_get_bits(desc->info, RX_NO_DATA_INFO_TYPE_MSK)) { - case RX_NO_DATA_INFO_TYPE_NDP: + switch (ntfy->status) { + case IWL_SNIF_STAT_PLCP_RX_OK: + /* we only get here with sounding PPDUs */ rx_status->zero_length_psdu_type = IEEE80211_RADIOTAP_ZERO_LEN_PSDU_SOUNDING; break; - case RX_NO_DATA_INFO_TYPE_MU_UNMATCHED: - case RX_NO_DATA_INFO_TYPE_TB_UNMATCHED: + case IWL_SNIF_STAT_AID_NOT_FOR_US: rx_status->zero_length_psdu_type = IEEE80211_RADIOTAP_ZERO_LEN_PSDU_NOT_CAPTURED; break; + case IWL_SNIF_STAT_PLCP_RX_LSIG_ERR: + case IWL_SNIF_STAT_PLCP_RX_SIGA_ERR: + case IWL_SNIF_STAT_PLCP_RX_SIGB_ERR: + case IWL_SNIF_STAT_UNKNOWN_ERROR: default: + rx_status->flag |= RX_FLAG_FAILED_PLCP_CRC; + fallthrough; + case IWL_SNIF_STAT_UNEXPECTED_TB: + case IWL_SNIF_STAT_UNSUPPORTED_RATE: rx_status->zero_length_psdu_type = IEEE80211_RADIOTAP_ZERO_LEN_PSDU_VENDOR; - break; + /* we could include the real reason in a vendor TLV */ } - rx_status->band = channel > 14 ? NL80211_BAND_5GHZ : - NL80211_BAND_2GHZ; + if (format == RATE_MCS_MOD_TYPE_CCK && + ntfy->legacy_sig.cck & cpu_to_le32(CCK_CRFR_SHORT_PREAMBLE)) + phy_data.phy_info |= IWL_RX_MPDU_PHY_SHORT_PREAMBLE; - rx_status->freq = ieee80211_channel_to_frequency(channel, - rx_status->band); + iwl_mld_fill_rx_status_band_freq(IEEE80211_SKB_RXCB(skb), + ntfy->band, ntfy->channel); /* link ID is ignored for NULL header */ - iwl_mld_rx_fill_status(mld, -1, NULL, skb, &phy_data, queue); + iwl_mld_rx_fill_status(mld, -1, NULL, skb, &phy_data); /* No more radiotap info should be added after this point. * Mark it as mac header for upper layers to know where @@ -2119,29 +2151,72 @@ void iwl_mld_rx_monitor_no_data(struct iwl_mld *mld, struct napi_struct *napi, */ skb_set_mac_header(skb, skb->len); - /* Override the nss from the rx_vec since the rate_n_flags has - * only 1 bit for the nss which gives a max of 2 ss but there - * may be up to 8 spatial streams. - */ - switch (format) { + /* pass the packet to mac80211 */ + rcu_read_lock(); + ieee80211_rx_napi(mld->hw, NULL, skb, napi); + rcu_read_unlock(); +} + +void iwl_mld_handle_phy_air_sniffer_notif(struct iwl_mld *mld, + struct napi_struct *napi, + struct iwl_rx_packet *pkt) +{ + struct iwl_rx_phy_air_sniffer_ntfy *ntfy = (void *)pkt->data; + bool is_ndp = false; + u32 he_type; + + if (IWL_FW_CHECK(mld, iwl_rx_packet_payload_len(pkt) < sizeof(*ntfy), + "invalid air sniffer notification size\n")) + return; + + /* check if there's an old one to release as errored */ + if (mld->monitor.phy.valid && !mld->monitor.phy.used) { + /* didn't capture data, so override status */ + mld->monitor.phy.data.status = IWL_SNIF_STAT_AID_NOT_FOR_US; + iwl_mld_no_data_rx(mld, napi, &mld->monitor.phy.data); + } + + /* old data is no longer valid now */ + mld->monitor.phy.valid = false; + + he_type = le32_to_cpu(ntfy->rate) & RATE_MCS_HE_TYPE_MSK; + + switch (le32_to_cpu(ntfy->rate) & RATE_MCS_MOD_TYPE_MSK) { + case RATE_MCS_MOD_TYPE_HT: + is_ndp = !le32_get_bits(ntfy->sigs.ht.a1, + OFDM_RX_FRAME_HT_LENGTH); + break; case RATE_MCS_MOD_TYPE_VHT: - rx_status->nss = - le32_get_bits(desc->rx_vec[0], - RX_NO_DATA_RX_VEC0_VHT_NSTS_MSK) + 1; + is_ndp = le32_get_bits(ntfy->sigs.vht.a0, + OFDM_RX_FRAME_VHT_NUM_OF_DATA_SYM_VALID) && + !le32_get_bits(ntfy->sigs.vht.a0, + OFDM_RX_FRAME_VHT_NUM_OF_DATA_SYM); break; case RATE_MCS_MOD_TYPE_HE: - rx_status->nss = - le32_get_bits(desc->rx_vec[0], - RX_NO_DATA_RX_VEC0_HE_NSTS_MSK) + 1; + if (he_type == RATE_MCS_HE_TYPE_TRIG) + break; + is_ndp = le32_get_bits(ntfy->sigs.he.a3, + OFDM_RX_FRAME_HE_NUM_OF_DATA_SYM_VALID) && + !le32_get_bits(ntfy->sigs.he.a3, + OFDM_RX_FRAME_HE_NUM_OF_DATA_SYM); break; case RATE_MCS_MOD_TYPE_EHT: - rx_status->nss = - le32_get_bits(desc->rx_vec[2], - RX_NO_DATA_RX_VEC2_EHT_NSTS_MSK) + 1; + if (he_type == RATE_MCS_HE_TYPE_TRIG) + break; + is_ndp = le32_get_bits(ntfy->sigs.eht.sig2, + OFDM_RX_FRAME_EHT_NUM_OF_DATA_SYM_VALID) && + !le32_get_bits(ntfy->sigs.eht.sig2, + OFDM_RX_FRAME_EHT_NUM_OF_DATA_SYM); + break; } - /* pass the packet to mac80211 */ - rcu_read_lock(); - ieee80211_rx_napi(mld->hw, NULL, skb, napi); - rcu_read_unlock(); + if (ntfy->status != IWL_SNIF_STAT_PLCP_RX_OK || is_ndp) { + iwl_mld_no_data_rx(mld, napi, ntfy); + return; + } + + /* hang on to it for the RX_MPDU data packet(s) */ + mld->monitor.phy.data = *ntfy; + mld->monitor.phy.valid = true; + mld->monitor.phy.used = false; } diff --git a/drivers/net/wireless/intel/iwlwifi/mld/rx.h b/drivers/net/wireless/intel/iwlwifi/mld/rx.h index 2beabd7e70b15..09dddbd40f553 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/rx.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/rx.h @@ -66,7 +66,8 @@ void iwl_mld_pass_packet_to_mac80211(struct iwl_mld *mld, struct sk_buff *skb, int queue, struct ieee80211_sta *sta); -void iwl_mld_rx_monitor_no_data(struct iwl_mld *mld, struct napi_struct *napi, - struct iwl_rx_packet *pkt, int queue); +void iwl_mld_handle_phy_air_sniffer_notif(struct iwl_mld *mld, + struct napi_struct *napi, + struct iwl_rx_packet *pkt); #endif /* __iwl_mld_agg_h__ */ From 38f79506195c32f4f1d1fe5808e7294cd1f298ad Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 19 Oct 2025 11:45:03 +0300 Subject: [PATCH 317/867] wifi: iwlwifi: mld: include raw PHY notification in radiotap This is useful for debugging and can also be used to see anything that isn't encoded in radiotap (yet.) Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251019114304.5fe26e9061f2.Iefb45e3a6a2a62ff3247db4de3777059d390af95@changeid --- drivers/net/wireless/intel/iwlwifi/mld/rx.c | 45 +++++++++++++++++++-- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/rx.c b/drivers/net/wireless/intel/iwlwifi/mld/rx.c index 052a19bb85b45..19e78d2fbf9b5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/rx.c @@ -1217,8 +1217,9 @@ static void iwl_mld_add_rtap_sniffer_config(struct iwl_mld *mld, radiotap->oui[0] = 0xf6; radiotap->oui[1] = 0x54; radiotap->oui[2] = 0x25; - /* radiotap sniffer config sub-namespace */ + /* Intel OUI default radiotap subtype */ radiotap->oui_subtype = 1; + /* Sniffer config element type */ radiotap->vendor_type = 0; /* fill the data now */ @@ -1229,6 +1230,34 @@ static void iwl_mld_add_rtap_sniffer_config(struct iwl_mld *mld, } #endif +static void iwl_mld_add_rtap_sniffer_phy_data(struct iwl_mld *mld, + struct sk_buff *skb, + struct iwl_rx_phy_air_sniffer_ntfy *ntfy) +{ + struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); + struct ieee80211_radiotap_vendor_content *radiotap; + const u16 vendor_data_len = sizeof(*ntfy); + + radiotap = + iwl_mld_radiotap_put_tlv(skb, + IEEE80211_RADIOTAP_VENDOR_NAMESPACE, + sizeof(*radiotap) + vendor_data_len); + + /* Intel OUI */ + radiotap->oui[0] = 0xf6; + radiotap->oui[1] = 0x54; + radiotap->oui[2] = 0x25; + /* Intel OUI default radiotap subtype */ + radiotap->oui_subtype = 1; + /* PHY data element type */ + radiotap->vendor_type = cpu_to_le16(1); + + /* fill the data now */ + memcpy(radiotap->data, ntfy, vendor_data_len); + + rx_status->flag |= RX_FLAG_RADIOTAP_TLV_AT_END; +} + static void iwl_mld_set_rx_nonlegacy_rate_info(u32 rate_n_flags, struct ieee80211_rx_status *rx_status) @@ -1371,6 +1400,9 @@ static void iwl_mld_rx_fill_status(struct iwl_mld *mld, int link_id, } } #endif + + if (phy_data->ntfy) + iwl_mld_add_rtap_sniffer_phy_data(mld, skb, phy_data->ntfy); } /* iwl_mld_create_skb adds the rxb to a new skb */ @@ -1866,6 +1898,7 @@ void iwl_mld_rx_mpdu(struct iwl_mld *mld, struct napi_struct *napi, u32 mpdu_len; enum iwl_mld_reorder_result reorder_res; struct ieee80211_rx_status *rx_status; + unsigned int alloc_size = 128; if (unlikely(mld->fw_status.in_hw_restart)) return; @@ -1884,8 +1917,13 @@ void iwl_mld_rx_mpdu(struct iwl_mld *mld, struct napi_struct *napi, /* Don't use dev_alloc_skb(), we'll have enough headroom once * ieee80211_hdr pulled. + * + * For monitor mode we need more space to include the full PHY + * notification data. */ - skb = alloc_skb(128, GFP_ATOMIC); + if (unlikely(mld->monitor.on) && phy_data.ntfy) + alloc_size += sizeof(struct iwl_rx_phy_air_sniffer_ntfy); + skb = alloc_skb(alloc_size, GFP_ATOMIC); if (!skb) { IWL_ERR(mld, "alloc_skb failed\n"); return; @@ -2102,7 +2140,8 @@ static void iwl_mld_no_data_rx(struct iwl_mld *mld, u32 format = phy_data.rate_n_flags & RATE_MCS_MOD_TYPE_MSK; struct sk_buff *skb; - skb = alloc_skb(128, GFP_ATOMIC); + skb = alloc_skb(128 + sizeof(struct iwl_rx_phy_air_sniffer_ntfy), + GFP_ATOMIC); if (!skb) return; From 7f59fadbcbbc7881e434544531a1d6c04036f034 Mon Sep 17 00:00:00 2001 From: Nidhish A N Date: Sun, 19 Oct 2025 11:45:04 +0300 Subject: [PATCH 318/867] wifi: iwlwifi: fw: remove support of several iwl_lari_config_change_cmd versions We only need versions 1, 6, 8 and 12. Remove versions 2, 3, 4, 5, 7, 9, 10, 11. Signed-off-by: Nidhish A N Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251019114304.5c3de58594e8.I6e6f8707eb66a2b540fb19303c06393f13e1f68e@changeid --- .../wireless/intel/iwlwifi/fw/api/nvm-reg.h | 134 +----------------- .../wireless/intel/iwlwifi/fw/regulatory.c | 26 +--- 2 files changed, 11 insertions(+), 149 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h index e90f3187e55c4..4644fc1aa1ec9 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h @@ -18,13 +18,8 @@ enum iwl_regulatory_and_nvm_subcmd_ids { /** * @LARI_CONFIG_CHANGE: &struct iwl_lari_config_change_cmd_v1, - * &struct iwl_lari_config_change_cmd_v2, - * &struct iwl_lari_config_change_cmd_v3, - * &struct iwl_lari_config_change_cmd_v4, - * &struct iwl_lari_config_change_cmd_v5, * &struct iwl_lari_config_change_cmd_v6, - * &struct iwl_lari_config_change_cmd_v7, - * &struct iwl_lari_config_change_cmd_v10 or + * &struct iwl_lari_config_change_cmd_v8, * &struct iwl_lari_config_change_cmd */ LARI_CONFIG_CHANGE = 0x1, @@ -564,74 +559,6 @@ struct iwl_lari_config_change_cmd_v1 { __le32 config_bitmap; } __packed; /* LARI_CHANGE_CONF_CMD_S_VER_1 */ -/** - * struct iwl_lari_config_change_cmd_v2 - change LARI configuration - * @config_bitmap: bit map of the config commands. each bit will trigger a - * different predefined FW config operation - * @oem_uhb_allow_bitmap: bitmap of UHB enabled MCC sets - */ -struct iwl_lari_config_change_cmd_v2 { - __le32 config_bitmap; - __le32 oem_uhb_allow_bitmap; -} __packed; /* LARI_CHANGE_CONF_CMD_S_VER_2 */ - -/** - * struct iwl_lari_config_change_cmd_v3 - change LARI configuration - * @config_bitmap: bit map of the config commands. each bit will trigger a - * different predefined FW config operation - * @oem_uhb_allow_bitmap: bitmap of UHB enabled MCC sets - * @oem_11ax_allow_bitmap: bitmap of 11ax allowed MCCs. - * For each supported country, a pair of regulatory override bit and 11ax mode exist - * in the bit field. - */ -struct iwl_lari_config_change_cmd_v3 { - __le32 config_bitmap; - __le32 oem_uhb_allow_bitmap; - __le32 oem_11ax_allow_bitmap; -} __packed; /* LARI_CHANGE_CONF_CMD_S_VER_3 */ - -/** - * struct iwl_lari_config_change_cmd_v4 - change LARI configuration - * @config_bitmap: Bitmap of the config commands. Each bit will trigger a - * different predefined FW config operation. - * @oem_uhb_allow_bitmap: Bitmap of UHB enabled MCC sets. - * @oem_11ax_allow_bitmap: Bitmap of 11ax allowed MCCs. There are two bits - * per country, one to indicate whether to override and the other to - * indicate the value to use. - * @oem_unii4_allow_bitmap: Bitmap of unii4 allowed MCCs.There are two bits - * per country, one to indicate whether to override and the other to - * indicate allow/disallow unii4 channels. - */ -struct iwl_lari_config_change_cmd_v4 { - __le32 config_bitmap; - __le32 oem_uhb_allow_bitmap; - __le32 oem_11ax_allow_bitmap; - __le32 oem_unii4_allow_bitmap; -} __packed; /* LARI_CHANGE_CONF_CMD_S_VER_4 */ - -/** - * struct iwl_lari_config_change_cmd_v5 - change LARI configuration - * @config_bitmap: Bitmap of the config commands. Each bit will trigger a - * different predefined FW config operation. - * @oem_uhb_allow_bitmap: Bitmap of UHB enabled MCC sets. - * @oem_11ax_allow_bitmap: Bitmap of 11ax allowed MCCs. There are two bits - * per country, one to indicate whether to override and the other to - * indicate the value to use. - * @oem_unii4_allow_bitmap: Bitmap of unii4 allowed MCCs.There are two bits - * per country, one to indicate whether to override and the other to - * indicate allow/disallow unii4 channels. - * @chan_state_active_bitmap: Bitmap for overriding channel state to active. - * Each bit represents a country or region to activate, according to the BIOS - * definitions. - */ -struct iwl_lari_config_change_cmd_v5 { - __le32 config_bitmap; - __le32 oem_uhb_allow_bitmap; - __le32 oem_11ax_allow_bitmap; - __le32 oem_unii4_allow_bitmap; - __le32 chan_state_active_bitmap; -} __packed; /* LARI_CHANGE_CONF_CMD_S_VER_5 */ - /** * struct iwl_lari_config_change_cmd_v6 - change LARI configuration * @config_bitmap: Bitmap of the config commands. Each bit will trigger a @@ -659,8 +586,7 @@ struct iwl_lari_config_change_cmd_v6 { } __packed; /* LARI_CHANGE_CONF_CMD_S_VER_6 */ /** - * struct iwl_lari_config_change_cmd_v7 - change LARI configuration - * This structure is used also for lari cmd version 8 and 9. + * struct iwl_lari_config_change_cmd_v8 - change LARI configuration * @config_bitmap: Bitmap of the config commands. Each bit will trigger a * different predefined FW config operation. * @oem_uhb_allow_bitmap: Bitmap of UHB enabled MCC sets. @@ -670,21 +596,19 @@ struct iwl_lari_config_change_cmd_v6 { * @oem_unii4_allow_bitmap: Bitmap of unii4 allowed MCCs.There are two bits * per country, one to indicate whether to override and the other to * indicate allow/disallow unii4 channels. - * For LARI cmd version 4 to 8 - bits 0:3 are supported. - * For LARI cmd version 9 - bits 0:5 are supported. + * bit 0 - 3: supported. * @chan_state_active_bitmap: Bitmap to enable different bands per country * or region. * Each bit represents a country or region, and a band to activate * according to the BIOS definitions. - * For LARI cmd version 7 - bits 0:3 are supported. - * For LARI cmd version 8 - bits 0:4 are supported. + * bit 0 - 4: supported. * @force_disable_channels_bitmap: Bitmap of disabled bands/channels. * Each bit represents a set of channels in a specific band that should be * disabled * @edt_bitmap: Bitmap of energy detection threshold table. * Disable/enable the EDT optimization method for different band. */ -struct iwl_lari_config_change_cmd_v7 { +struct iwl_lari_config_change_cmd_v8 { __le32 config_bitmap; __le32 oem_uhb_allow_bitmap; __le32 oem_11ax_allow_bitmap; @@ -693,48 +617,8 @@ struct iwl_lari_config_change_cmd_v7 { __le32 force_disable_channels_bitmap; __le32 edt_bitmap; } __packed; -/* LARI_CHANGE_CONF_CMD_S_VER_7 */ /* LARI_CHANGE_CONF_CMD_S_VER_8 */ -/* LARI_CHANGE_CONF_CMD_S_VER_9 */ -/** - * struct iwl_lari_config_change_cmd_v10 - change LARI configuration - * @config_bitmap: Bitmap of the config commands. Each bit will trigger a - * different predefined FW config operation. - * @oem_uhb_allow_bitmap: Bitmap of UHB enabled MCC sets. - * @oem_11ax_allow_bitmap: Bitmap of 11ax allowed MCCs. There are two bits - * per country, one to indicate whether to override and the other to - * indicate the value to use. - * @oem_unii4_allow_bitmap: Bitmap of unii4 allowed MCCs.There are two bits - * per country, one to indicate whether to override and the other to - * indicate allow/disallow unii4 channels. - * For LARI cmd version 10 - bits 0:5 are supported. - * @chan_state_active_bitmap: Bitmap to enable different bands per country - * or region. - * Each bit represents a country or region, and a band to activate - * according to the BIOS definitions. - * For LARI cmd version 10 - bits 0:4 are supported. - * @force_disable_channels_bitmap: Bitmap of disabled bands/channels. - * Each bit represents a set of channels in a specific band that should be - * disabled - * @edt_bitmap: Bitmap of energy detection threshold table. - * Disable/enable the EDT optimization method for different band. - * @oem_320mhz_allow_bitmap: 320Mhz bandwidth enablement bitmap per MCC. - * bit0: enable 320Mhz in Japan. - * bit1: enable 320Mhz in South Korea. - * bit 2 - 31: reserved. - */ -struct iwl_lari_config_change_cmd_v10 { - __le32 config_bitmap; - __le32 oem_uhb_allow_bitmap; - __le32 oem_11ax_allow_bitmap; - __le32 oem_unii4_allow_bitmap; - __le32 chan_state_active_bitmap; - __le32 force_disable_channels_bitmap; - __le32 edt_bitmap; - __le32 oem_320mhz_allow_bitmap; -} __packed; -/* LARI_CHANGE_CONF_CMD_S_VER_10 */ /** * struct iwl_lari_config_change_cmd - change LARI configuration @@ -747,14 +631,11 @@ struct iwl_lari_config_change_cmd_v10 { * @oem_unii4_allow_bitmap: Bitmap of unii4 allowed MCCs.There are two bits * per country, one to indicate whether to override and the other to * indicate allow/disallow unii4 channels. - * For LARI cmd version 11 - bits 0:5 are supported. * @chan_state_active_bitmap: Bitmap to enable different bands per country * or region. * Each bit represents a country or region, and a band to activate * according to the BIOS definitions. - * For LARI cmd version 11 - bits 0:4 are supported. - * For LARI cmd version 12 - bits 0:6 are supported and bits 7:31 are - * reserved. + * bit 0 - 6: supported. * @force_disable_channels_bitmap: Bitmap of disabled bands/channels. * Each bit represents a set of channels in a specific band that should be * disabled @@ -781,12 +662,11 @@ struct iwl_lari_config_change_cmd { __le32 oem_320mhz_allow_bitmap; __le32 oem_11be_allow_bitmap; } __packed; -/* LARI_CHANGE_CONF_CMD_S_VER_11 */ /* LARI_CHANGE_CONF_CMD_S_VER_12 */ /* Activate UNII-1 (5.2GHz) for World Wide */ #define ACTIVATE_5G2_IN_WW_MASK BIT(4) -#define CHAN_STATE_ACTIVE_BITMAP_CMD_V11 0x1F +#define CHAN_STATE_ACTIVE_BITMAP_CMD_V8 0x1F #define CHAN_STATE_ACTIVE_BITMAP_CMD_V12 0x7F /** diff --git a/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c index e1f28b0532530..d2ad169ae880f 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c @@ -543,32 +543,14 @@ static size_t iwl_get_lari_config_cmd_size(u8 cmd_ver) switch (cmd_ver) { case 12: - case 11: cmd_size = sizeof(struct iwl_lari_config_change_cmd); break; - case 10: - cmd_size = sizeof(struct iwl_lari_config_change_cmd_v10); - break; - case 9: case 8: - case 7: - cmd_size = sizeof(struct iwl_lari_config_change_cmd_v7); + cmd_size = sizeof(struct iwl_lari_config_change_cmd_v8); break; case 6: cmd_size = sizeof(struct iwl_lari_config_change_cmd_v6); break; - case 5: - cmd_size = sizeof(struct iwl_lari_config_change_cmd_v5); - break; - case 4: - cmd_size = sizeof(struct iwl_lari_config_change_cmd_v4); - break; - case 3: - cmd_size = sizeof(struct iwl_lari_config_change_cmd_v3); - break; - case 2: - cmd_size = sizeof(struct iwl_lari_config_change_cmd_v2); - break; default: cmd_size = sizeof(struct iwl_lari_config_change_cmd_v1); break; @@ -609,11 +591,11 @@ int iwl_fill_lari_config(struct iwl_fw_runtime *fwrt, if (!has_raw_dsm_capa) value &= DSM_UNII4_ALLOW_BITMAP; - /* Since version 9, bits 4 and 5 are supported + /* Since version 12, bits 4 and 5 are supported * regardless of this capability, By pass this masking * if firmware has capability of accepting raw DSM table. */ - if (!has_raw_dsm_capa && cmd_ver < 9 && + if (!has_raw_dsm_capa && cmd_ver < 12 && !fw_has_capa(&fwrt->fw->ucode_capa, IWL_UCODE_TLV_CAPA_BIOS_OVERRIDE_5G9_FOR_CA)) value &= ~(DSM_VALUE_UNII4_CANADA_OVERRIDE_MSK | @@ -637,7 +619,7 @@ int iwl_fill_lari_config(struct iwl_fw_runtime *fwrt, if (!has_raw_dsm_capa && cmd_ver < 12 && !fw_has_capa(&fwrt->fw->ucode_capa, IWL_UCODE_TLV_CAPA_BIOS_OVERRIDE_UNII4_US_CA)) - value &= CHAN_STATE_ACTIVE_BITMAP_CMD_V11; + value &= CHAN_STATE_ACTIVE_BITMAP_CMD_V8; cmd->chan_state_active_bitmap = cpu_to_le32(value); } From 8377e92a3a08304b06e4a5cef89d49ab93728dc1 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 19 Oct 2025 11:45:05 +0300 Subject: [PATCH 319/867] wifi: iwlwifi: be more chatty when we fail to find a wifi7 device All wifi7 devices need CONFIG_IWLMLD to be enabled. If we can't support the wifi7 device and the module is not enabled, complain to the user. The check in iwl_req_fw_callback is then no longer required. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251019114304.071dae9a5de2.I1603085bac5a796442faa75982f8675647becfec@changeid --- drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 9 +-------- drivers/net/wireless/intel/iwlwifi/iwl-drv.h | 6 ++++++ drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans.c | 9 +++++++++ 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index 7d58e294618d6..94ef422dc56a9 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -177,7 +177,7 @@ static inline char iwl_drv_get_step(int step) return 'a' + step; } -static bool iwl_drv_is_wifi7_supported(struct iwl_trans *trans) +bool iwl_drv_is_wifi7_supported(struct iwl_trans *trans) { return trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_BZ && CSR_HW_RFID_TYPE(trans->info.hw_rf_id) >= IWL_CFG_RF_TYPE_FM; @@ -1859,13 +1859,6 @@ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context) #if IS_ENABLED(CONFIG_IWLMLD) if (iwl_drv_is_wifi7_supported(drv->trans)) op = &iwlwifi_opmode_table[MLD_OP_MODE]; -#else - if (iwl_drv_is_wifi7_supported(drv->trans)) { - IWL_ERR(drv, - "IWLMLD needs to be compiled to support this firmware\n"); - mutex_unlock(&iwlwifi_opmode_table_mtx); - goto out_unbind; - } #endif IWL_INFO(drv, "loaded firmware version %s op_mode %s\n", diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.h b/drivers/net/wireless/intel/iwlwifi/iwl-drv.h index a0b67e8aba8da..6e60953de2ec4 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.h @@ -78,6 +78,12 @@ struct iwl_drv *iwl_drv_start(struct iwl_trans *trans); */ void iwl_drv_stop(struct iwl_drv *drv); +/* + * iwl_drv_is_wifi7_supported - returns if wifi7 is supported + * If yes, iwlmld needs to be used to drive the device. + */ +bool iwl_drv_is_wifi7_supported(struct iwl_trans *trans); + /* * exported symbol management * diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans.c index 59307b5df4417..164d060ec617c 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans.c @@ -4218,6 +4218,15 @@ int iwl_pci_gen1_2_probe(struct pci_dev *pdev, pdev->device, pdev->subsystem_device, info.hw_rev, info.hw_rf_id); +#if !IS_ENABLED(CONFIG_IWLMLD) + if (iwl_drv_is_wifi7_supported(iwl_trans)) { + IWL_ERR(iwl_trans, + "IWLMLD needs to be compiled to support this device\n"); + ret = -EOPNOTSUPP; + goto out_free_trans; + } +#endif + dev_info = iwl_pci_find_dev_info(pdev->device, pdev->subsystem_device, CSR_HW_RFID_TYPE(info.hw_rf_id), CSR_HW_RFID_IS_CDB(info.hw_rf_id), From d24076e0758ff8aa788e181c565683abae8bb397 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 19 Oct 2025 11:45:06 +0300 Subject: [PATCH 320/867] wifi: iwlwifi: stop checking the firmware's error pointer It is not very clear what values we put in min_umac_error_event_table. For Ma (Meteor Lake), this value is wrong and we get the print: Not valid error log pointer ... Just remove the check. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251019114304.a64d0803150f.Ie2db385f68e17fb0adcdcb16e5bf0125289e177d@changeid --- drivers/net/wireless/intel/iwlwifi/cfg/22000.c | 1 - drivers/net/wireless/intel/iwlwifi/cfg/8000.c | 1 - drivers/net/wireless/intel/iwlwifi/cfg/9000.c | 1 - drivers/net/wireless/intel/iwlwifi/cfg/ax210.c | 1 - drivers/net/wireless/intel/iwlwifi/cfg/bz.c | 1 - drivers/net/wireless/intel/iwlwifi/cfg/dr.c | 1 - drivers/net/wireless/intel/iwlwifi/cfg/sc.c | 1 - drivers/net/wireless/intel/iwlwifi/iwl-config.h | 2 -- drivers/net/wireless/intel/iwlwifi/mld/fw.c | 6 +----- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 13 ++----------- 10 files changed, 3 insertions(+), 25 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c index ca488931a33c5..f0453f3f6ba63 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c @@ -38,7 +38,6 @@ static const struct iwl_family_base_params iwl_22000_base = { .features = IWL_TX_CSUM_NETIF_FLAGS | NETIF_F_RXCSUM, .apmg_not_supported = true, .mac_addr_from_csr = 0x380, - .min_umac_error_event_table = 0x400000, .d3_debug_data_base_addr = 0x401000, .d3_debug_data_length = 60 * 1024, .mon_smem_regs = { diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/8000.c b/drivers/net/wireless/intel/iwlwifi/cfg/8000.c index b56574006ee0f..3c844cd419e89 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/8000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/8000.c @@ -50,7 +50,6 @@ static const struct iwl_family_base_params iwl8000_base = { .smem_offset = IWL8260_SMEM_OFFSET, .smem_len = IWL8260_SMEM_LEN, .apmg_not_supported = true, - .min_umac_error_event_table = 0x800000, }; static const struct iwl_tt_params iwl8000_tt_params = { diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c index ac1fa291cf2f0..5872fc9b8caf4 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c @@ -41,7 +41,6 @@ static const struct iwl_family_base_params iwl9000_base = { .features = IWL_TX_CSUM_NETIF_FLAGS | NETIF_F_RXCSUM, .apmg_not_supported = true, .mac_addr_from_csr = 0x380, - .min_umac_error_event_table = 0x800000, .d3_debug_data_base_addr = 0x401000, .d3_debug_data_length = 92 * 1024, .nvm_hw_section_num = 10, diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/ax210.c b/drivers/net/wireless/intel/iwlwifi/cfg/ax210.c index ddf3d313da5a6..582f61661062e 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/ax210.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/ax210.c @@ -33,7 +33,6 @@ static const struct iwl_family_base_params iwl_ax210_base = { .features = IWL_TX_CSUM_NETIF_FLAGS | NETIF_F_RXCSUM, .apmg_not_supported = true, .mac_addr_from_csr = 0x380, - .min_umac_error_event_table = 0x400000, .d3_debug_data_base_addr = 0x401000, .d3_debug_data_length = 60 * 1024, .mon_smem_regs = { diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/bz.c b/drivers/net/wireless/intel/iwlwifi/cfg/bz.c index 18c9244ee8efd..7babb60463bef 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/bz.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/bz.c @@ -38,7 +38,6 @@ static const struct iwl_family_base_params iwl_bz_base = { .smem_len = IWL_BZ_SMEM_LEN, .apmg_not_supported = true, .mac_addr_from_csr = 0x30, - .min_umac_error_event_table = 0xD0000, .d3_debug_data_base_addr = 0x401000, .d3_debug_data_length = 60 * 1024, .mon_smem_regs = { diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/dr.c b/drivers/net/wireless/intel/iwlwifi/cfg/dr.c index e53a785686c89..ad65951d5643c 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/dr.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/dr.c @@ -33,7 +33,6 @@ static const struct iwl_family_base_params iwl_dr_base = { .smem_len = IWL_DR_SMEM_LEN, .apmg_not_supported = true, .mac_addr_from_csr = 0x30, - .min_umac_error_event_table = 0xD0000, .d3_debug_data_base_addr = 0x401000, .d3_debug_data_length = 60 * 1024, .mon_smem_regs = { diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c index e9449b59114ac..abb6283dfb8f4 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c @@ -41,7 +41,6 @@ static const struct iwl_family_base_params iwl_sc_base = { .smem_len = IWL_SC_SMEM_LEN, .apmg_not_supported = true, .mac_addr_from_csr = 0x30, - .min_umac_error_event_table = 0xD0000, .d3_debug_data_base_addr = 0x401000, .d3_debug_data_length = 60 * 1024, .mon_smem_regs = { diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 0b34c9f90b3ff..e44936204068f 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -170,7 +170,6 @@ struct iwl_fw_mon_regs { * for aggregation * @min_txq_size: minimum number of slots required in a TX queue * @gp2_reg_addr: GP2 (timer) register address - * @min_umac_error_event_table: minimum SMEM location of UMAC error table * @mon_dbgi_regs: monitor DBGI registers * @mon_dram_regs: monitor DRAM registers * @mon_smem_regs: monitor SMEM registers @@ -203,7 +202,6 @@ struct iwl_family_base_params { netdev_features_t features; u32 smem_offset; u32 smem_len; - u32 min_umac_error_event_table; u32 d3_debug_data_base_addr; u32 d3_debug_data_length; u32 min_txq_size; diff --git a/drivers/net/wireless/intel/iwlwifi/mld/fw.c b/drivers/net/wireless/intel/iwlwifi/mld/fw.c index b3abfa1ec8102..19da521a4babc 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/fw.c @@ -167,11 +167,7 @@ static bool iwl_alive_fn(struct iwl_notif_wait_data *notif_wait, umac_error_table = le32_to_cpu(umac->dbg_ptrs.error_info_addr) & ~FW_ADDR_CACHE_CONTROL; - if (umac_error_table >= trans->mac_cfg->base->min_umac_error_event_table) - iwl_fw_umac_set_alive_err_table(trans, umac_error_table); - else - IWL_ERR(mld, "Not valid error log pointer 0x%08X\n", - umac_error_table); + iwl_fw_umac_set_alive_err_table(trans, umac_error_table); alive_data->valid = status == IWL_ALIVE_STATUS_OK; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 6b76ce35443d2..edae13755ee61 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -214,17 +214,8 @@ static bool iwl_alive_fn(struct iwl_notif_wait_data *notif_wait, ~FW_ADDR_CACHE_CONTROL; if (umac_error_table) { - if (umac_error_table >= - mvm->trans->mac_cfg->base->min_umac_error_event_table) { - iwl_fw_umac_set_alive_err_table(mvm->trans, - umac_error_table); - } else { - IWL_ERR(mvm, - "Not valid error log pointer 0x%08X for %s uCode\n", - umac_error_table, - (mvm->fwrt.cur_fw_img == IWL_UCODE_INIT) ? - "Init" : "RT"); - } + iwl_fw_umac_set_alive_err_table(mvm->trans, + umac_error_table); } alive_data->valid = status == IWL_ALIVE_STATUS_OK; From 7906c61a8faf6660bc2d5c6e71985be695a71065 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Sun, 19 Oct 2025 11:45:07 +0300 Subject: [PATCH 321/867] wifi: iwlwifi: mld: check the validity of noa_len Validate iwl_probe_resp_data_notif::noa_attr::len_low since we are using its value to determine the noa_len, which is later used for the NoA attribute. Reviewed-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251019114304.b127a2b57e8c.I7ccaf118d236fb39da5da351b95ad9b37b825bc2@changeid --- drivers/net/wireless/intel/iwlwifi/mld/iface.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/iface.c b/drivers/net/wireless/intel/iwlwifi/mld/iface.c index ed379825a9236..a5ececfc13e44 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/iface.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/iface.c @@ -528,6 +528,19 @@ void iwl_mld_handle_probe_resp_data_notif(struct iwl_mld *mld, mld_link = &iwl_mld_vif_from_mac80211(vif)->deflink; + /* len_low should be 2 + n*13 (where n is the number of descriptors. + * 13 is the size of a NoA descriptor). We can have either one or two + * descriptors. + */ + if (IWL_FW_CHECK(mld, notif->noa_active && + notif->noa_attr.len_low != 2 + + sizeof(struct ieee80211_p2p_noa_desc) && + notif->noa_attr.len_low != 2 + + sizeof(struct ieee80211_p2p_noa_desc) * 2, + "Invalid noa_attr.len_low (%d)\n", + notif->noa_attr.len_low)) + return; + new_data = kzalloc(sizeof(*new_data), GFP_KERNEL); if (!new_data) return; From f67cf9aaae4681692dce78ce31ba08cdf9cc0705 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 19 Oct 2025 11:45:08 +0300 Subject: [PATCH 322/867] wifi: iwlwifi: fix build when mvm/mld not configured When neither mvm nor mld are configured, we don't have the iwl_bz_mac_cfg symbol and thus cannot check for it. But in that case the relevant device entries aren't and cannot be present, so just ifdef the test code for that. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202509170625.BAJBe7Bi-lkp@intel.com/ Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251019114304.615810979e7b.I9a215f955bb3208d99239be8496d19e0f186b4d0@changeid --- drivers/net/wireless/intel/iwlwifi/tests/devinfo.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c b/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c index 3054bc0a9c679..6bf2ad18b0091 100644 --- a/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c +++ b/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c @@ -274,6 +274,7 @@ static void devinfo_pci_ids_config(struct kunit *test) if (s->device == PCI_ANY_ID || s->subdevice == PCI_ANY_ID) continue; +#if IS_ENABLED(CONFIG_IWLMVM) || IS_ENABLED(CONFIG_IWLMLD) /* * The check below only works for old (pre-CNVI) devices. Most * new have subdevice==ANY, so are already skipped, but for some @@ -281,6 +282,7 @@ static void devinfo_pci_ids_config(struct kunit *test) */ if (s->driver_data == (kernel_ulong_t)&iwl_bz_mac_cfg) continue; +#endif di = iwl_pci_find_dev_info(s->device, s->subdevice, 0, 0, 0, 0, true); From 50d0cafec60ce2c6ece236806df7fbef0cc8a0da Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 19 Oct 2025 11:45:09 +0300 Subject: [PATCH 323/867] wifi: iwlwifi: bump core version for BZ/SC/DR Start supporting Core 100 FW on those devices. In addition, the move to the new Core scheme (instead of API scheme) will start Core 100 and not 99, as planned. Adjust for that as well. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251019114304.00c28b4259da.Idd6281cd647f1b33f2572a8c107c3a3228e03665@changeid --- drivers/net/wireless/intel/iwlwifi/cfg/bz.c | 2 +- drivers/net/wireless/intel/iwlwifi/cfg/dr.c | 2 +- drivers/net/wireless/intel/iwlwifi/cfg/sc.c | 2 +- drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/bz.c b/drivers/net/wireless/intel/iwlwifi/cfg/bz.c index 7babb60463bef..28005f25c38b5 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/bz.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/bz.c @@ -10,7 +10,7 @@ #include "fw/api/txq.h" /* Highest firmware core release supported */ -#define IWL_BZ_UCODE_CORE_MAX 99 +#define IWL_BZ_UCODE_CORE_MAX 100 /* Lowest firmware API version supported */ #define IWL_BZ_UCODE_API_MIN 100 diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/dr.c b/drivers/net/wireless/intel/iwlwifi/cfg/dr.c index ad65951d5643c..c1c6174534d7f 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/dr.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/dr.c @@ -9,7 +9,7 @@ #include "fw/api/txq.h" /* Highest firmware core release supported */ -#define IWL_DR_UCODE_CORE_MAX 99 +#define IWL_DR_UCODE_CORE_MAX 100 /* Lowest firmware API version supported */ #define IWL_DR_UCODE_API_MIN 100 diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c index abb6283dfb8f4..d9943d252ae05 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c @@ -10,7 +10,7 @@ #include "fw/api/txq.h" /* Highest firmware core release supported */ -#define IWL_SC_UCODE_CORE_MAX 99 +#define IWL_SC_UCODE_CORE_MAX 100 /* Lowest firmware API version supported */ #define IWL_SC_UCODE_API_MIN 100 diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index 94ef422dc56a9..3391f07b01de3 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -348,8 +348,8 @@ static int iwl_request_firmware(struct iwl_drv *drv, bool first) if (first) drv->fw_index = ucode_api_max; - else if (drv->fw_index == ENCODE_CORE_AS_API(99)) - drv->fw_index = 101; /* last API-scheme number below core 99 */ + else if (drv->fw_index == ENCODE_CORE_AS_API(100)) + drv->fw_index = 102; /* last API-scheme number below core 100 */ else drv->fw_index--; From 3e24ba621ba122ff765c8c9790800a05196fbbba Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 19 Oct 2025 11:45:10 +0300 Subject: [PATCH 324/867] wifi: iwlwifi: mvm/mld: report non-HT frames as 20 MHz Non-HT frames can only be encoded in 20 MHz, however, they could be duplicated on all/some of the subchannels (mostly used for RTS/CTS), in which case the firmware will report and estimate of the overall used bandwidth based on energy detected. This could be confusing so don't report it that way, always use 20 MHz for non-HT/legacy frames instead. Note that currently the value doesn't appear to be used by mac80211, it never checks the bandwidth field for legacy encodings. Signed-off-by: Johannes Berg Reviewed-by: Benjamin Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251019114304.23e6695039ca.I3da7c542bde6de4362755f200248dbcc12aa246e@changeid --- drivers/net/wireless/intel/iwlwifi/mld/rx.c | 3 +++ drivers/net/wireless/intel/iwlwifi/mvm/rx.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/rx.c b/drivers/net/wireless/intel/iwlwifi/mld/rx.c index 19e78d2fbf9b5..6a76e3fcb581c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/rx.c @@ -1309,6 +1309,9 @@ static void iwl_mld_set_rx_rate(struct iwl_mld *mld, iwl_mld_legacy_hw_idx_to_mac80211_idx(rate_n_flags, rx_status->band); + /* override BW - it could be DUP and indicate the wrong BW */ + rx_status->bw = RATE_INFO_BW_20; + /* valid rate */ if (rate >= 0 && rate <= 0xFF) { rx_status->rate_idx = rate; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c index 8c1bb3a7ffcaa..d0c0faae01221 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c @@ -519,6 +519,8 @@ void iwl_mvm_rx_rx_mpdu(struct iwl_mvm *mvm, struct napi_struct *napi, return; } rx_status->rate_idx = rate; + /* override BW - it could be DUP and indicate the wrong BW */ + rx_status->bw = RATE_INFO_BW_20; } #ifdef CONFIG_IWLWIFI_DEBUGFS From bd8a6e46e6b89f758669614d2aa1219e8ea048ef Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 19 Oct 2025 11:45:11 +0300 Subject: [PATCH 325/867] wifi: iwlwifi: mld: use FW_CHECK on bad ROC notification If the firmware sends a ROC notification after the driver cancelled it, we can get into this WARN_ON(). Don't do that, use IWL_FW_CHECK() instead. Signed-off-by: Johannes Berg Tested-by: Emmanuel Grumbach Reviewed-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251019114304.48aff2c8922e.Ie27b21eb26b67c8010d13ce9590751cad417d1ad@changeid --- drivers/net/wireless/intel/iwlwifi/mld/roc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/roc.c b/drivers/net/wireless/intel/iwlwifi/mld/roc.c index 4136c98030d04..4e37a288471e5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/roc.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/roc.c @@ -231,7 +231,9 @@ void iwl_mld_handle_roc_notif(struct iwl_mld *mld, struct ieee80211_vif *vif; vif = iwl_mld_find_roc_vif(mld, activity); - if (WARN_ON(!vif)) + if (IWL_FW_CHECK(mld, !vif, + "unexpected ROC notif from FW for activity %d\n", + activity)) return; mld_vif = iwl_mld_vif_from_mac80211(vif); From 0d0e8149c6d77fe22dfb10b985ebedfbc192a2cd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 19 Oct 2025 11:45:12 +0300 Subject: [PATCH 326/867] wifi: iwlwifi: bump core version for BZ/SC/DR Start supporting Core 101 FW on these devices. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251019114304.23300d52cd8b.I6aad50aed469d7734c165086796dfa9cdf9d81bd@changeid --- drivers/net/wireless/intel/iwlwifi/cfg/bz.c | 2 +- drivers/net/wireless/intel/iwlwifi/cfg/dr.c | 2 +- drivers/net/wireless/intel/iwlwifi/cfg/sc.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/bz.c b/drivers/net/wireless/intel/iwlwifi/cfg/bz.c index 28005f25c38b5..d25445bd1e5c3 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/bz.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/bz.c @@ -10,7 +10,7 @@ #include "fw/api/txq.h" /* Highest firmware core release supported */ -#define IWL_BZ_UCODE_CORE_MAX 100 +#define IWL_BZ_UCODE_CORE_MAX 101 /* Lowest firmware API version supported */ #define IWL_BZ_UCODE_API_MIN 100 diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/dr.c b/drivers/net/wireless/intel/iwlwifi/cfg/dr.c index c1c6174534d7f..a279dcfd3083e 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/dr.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/dr.c @@ -9,7 +9,7 @@ #include "fw/api/txq.h" /* Highest firmware core release supported */ -#define IWL_DR_UCODE_CORE_MAX 100 +#define IWL_DR_UCODE_CORE_MAX 101 /* Lowest firmware API version supported */ #define IWL_DR_UCODE_API_MIN 100 diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c index d9943d252ae05..ee00b2af7a1d4 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c @@ -10,7 +10,7 @@ #include "fw/api/txq.h" /* Highest firmware core release supported */ -#define IWL_SC_UCODE_CORE_MAX 100 +#define IWL_SC_UCODE_CORE_MAX 101 /* Lowest firmware API version supported */ #define IWL_SC_UCODE_API_MIN 100 From 7ed47d42943fba8ced505f62d4358f63963bb968 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 19 Oct 2025 11:45:13 +0300 Subject: [PATCH 327/867] wifi: iwlwifi: disable EHT if the device doesn't allow it We have a few devices that don't allow EHT. Make sure we reflect this towards mac80211 so that we won't try to enable it. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251019114304.71121f4e5557.I49e2329d4121f9e52d0889156d0c3e8778e27d88@changeid --- .../net/wireless/intel/iwlwifi/cfg/rf-fm.c | 1 + .../net/wireless/intel/iwlwifi/cfg/rf-wh.c | 23 +++++++++++++++++++ .../net/wireless/intel/iwlwifi/iwl-config.h | 5 +++- .../wireless/intel/iwlwifi/iwl-nvm-parse.c | 2 +- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 6 +++-- 5 files changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/rf-fm.c b/drivers/net/wireless/intel/iwlwifi/cfg/rf-fm.c index 456a666c8dfdf..fd82050e33a3c 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/rf-fm.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/rf-fm.c @@ -19,6 +19,7 @@ .non_shared_ant = ANT_B, \ .vht_mu_mimo_supported = true, \ .uhb_supported = true, \ + .eht_supported = true, \ .num_rbds = IWL_NUM_RBDS_EHT, \ .nvm_ver = IWL_FM_NVM_VERSION, \ .nvm_type = IWL_NVM_EXT diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/rf-wh.c b/drivers/net/wireless/intel/iwlwifi/cfg/rf-wh.c index b8c6b06e70991..b5803ea1eb782 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/rf-wh.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/rf-wh.c @@ -4,8 +4,31 @@ */ #include "iwl-config.h" +/* NVM versions */ +#define IWL_WH_NVM_VERSION 0x0a1d + +#define IWL_DEVICE_WH \ + .ht_params = { \ + .stbc = true, \ + .ldpc = true, \ + .ht40_bands = BIT(NL80211_BAND_2GHZ) | \ + BIT(NL80211_BAND_5GHZ), \ + }, \ + .led_mode = IWL_LED_RF_STATE, \ + .non_shared_ant = ANT_B, \ + .vht_mu_mimo_supported = true, \ + .uhb_supported = true, \ + .num_rbds = IWL_NUM_RBDS_EHT, \ + .nvm_ver = IWL_WH_NVM_VERSION, \ + .nvm_type = IWL_NVM_EXT + /* currently iwl_rf_wh/iwl_rf_wh_160mhz are just defines for the FM ones */ +const struct iwl_rf_cfg iwl_rf_wh_non_eht = { + IWL_DEVICE_WH, + .eht_supported = false, +}; + const char iwl_killer_be1775s_name[] = "Killer(R) Wi-Fi 7 BE1775s 320MHz Wireless Network Adapter (BE211D2W)"; const char iwl_killer_be1775i_name[] = diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index e44936204068f..0a5a683325efa 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -416,6 +416,7 @@ struct iwl_mac_cfg { * @vht_mu_mimo_supported: VHT MU-MIMO support * @nvm_type: see &enum iwl_nvm_type * @uhb_supported: ultra high band channels supported + * @eht_supported: EHT supported * @num_rbds: number of receive buffer descriptors to use * (only used for multi-queue capable devices) * @@ -448,7 +449,8 @@ struct iwl_rf_cfg { host_interrupt_operation_mode:1, lp_xtal_workaround:1, vht_mu_mimo_supported:1, - uhb_supported:1; + uhb_supported:1, + eht_supported:1; u8 valid_tx_ant; u8 valid_rx_ant; u8 non_shared_ant; @@ -742,6 +744,7 @@ extern const struct iwl_rf_cfg iwl_rf_fm; extern const struct iwl_rf_cfg iwl_rf_fm_160mhz; #define iwl_rf_wh iwl_rf_fm #define iwl_rf_wh_160mhz iwl_rf_fm_160mhz +extern const struct iwl_rf_cfg iwl_rf_wh_non_eht; #define iwl_rf_pe iwl_rf_fm #endif /* CONFIG_IWLMLD */ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 23465e4c4b399..e021fc57d85d2 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -2080,7 +2080,7 @@ struct iwl_nvm_data *iwl_get_nvm(struct iwl_trans *trans, !!(mac_flags & NVM_MAC_SKU_FLAGS_BAND_5_2_ENABLED); nvm->sku_cap_mimo_disabled = !!(mac_flags & NVM_MAC_SKU_FLAGS_MIMO_DISABLED); - if (CSR_HW_RFID_TYPE(trans->info.hw_rf_id) >= IWL_CFG_RF_TYPE_FM) + if (trans->cfg->eht_supported) nvm->sku_cap_11be_enable = true; /* Initialize PHY sku data */ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index de04a84def0d8..73001cdce13aa 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1061,8 +1061,10 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { /* WH RF */ IWL_DEV_INFO(iwl_rf_wh, iwl_be211_name, RF_TYPE(WH)), - IWL_DEV_INFO(iwl_rf_wh, iwl_ax221_name, RF_TYPE(WH), SUBDEV(0x0514)), - IWL_DEV_INFO(iwl_rf_wh, iwl_ax221_name, RF_TYPE(WH), SUBDEV(0x4514)), + IWL_DEV_INFO(iwl_rf_wh_non_eht, iwl_ax221_name, RF_TYPE(WH), + SUBDEV(0x0514)), + IWL_DEV_INFO(iwl_rf_wh_non_eht, iwl_ax221_name, RF_TYPE(WH), + SUBDEV(0x4514)), IWL_DEV_INFO(iwl_rf_wh_160mhz, iwl_be213_name, RF_TYPE(WH), BW_LIMITED), /* PE RF */ From 9e69bcb527ea66288d49b97a85e33ce429825079 Mon Sep 17 00:00:00 2001 From: Nidhish A N Date: Sun, 19 Oct 2025 11:45:14 +0300 Subject: [PATCH 328/867] wifi: iwlwifi: mld: Move EMLSR prints to IWL_DL_EHT Modify EMLSR debug prints to use IWL_DL_EHT instead of IWL_DL_INFO. This will allow better communication with validation as they might enable only IWL_DL_EHT or IWL_DL_INFO as required. Add prints to log attempt to switch links when missed beacons exceed threshold. Print both link ids and missed beacons when in EMLSR mode. Signed-off-by: Nidhish A N Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251019114304.3bfc2bc8f410.I405ab2aa81af1ba0ea5eaff343eae1778f2035d9@changeid --- drivers/net/wireless/intel/iwlwifi/mld/link.c | 9 +- drivers/net/wireless/intel/iwlwifi/mld/mlo.c | 97 +++++++++---------- drivers/net/wireless/intel/iwlwifi/mld/sta.c | 2 +- 3 files changed, 52 insertions(+), 56 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/link.c b/drivers/net/wireless/intel/iwlwifi/mld/link.c index 738f80fe0c50a..deb1e7227dd85 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/link.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/link.c @@ -571,8 +571,12 @@ void iwl_mld_handle_missed_beacon_notif(struct iwl_mld *mld, /* Not in EMLSR and we can't hear the link. * Try to switch to a better link. EMLSR case is handled below. */ - if (!iwl_mld_emlsr_active(vif)) + if (!iwl_mld_emlsr_active(vif)) { + IWL_DEBUG_EHT(mld, + "missed beacons exceeds threshold. link_id=%u. Try to switch to a better link.\n", + link_id); iwl_mld_int_mlo_scan(mld, vif); + } } /* no more logic if we're not in EMLSR */ @@ -591,7 +595,8 @@ void iwl_mld_handle_missed_beacon_notif(struct iwl_mld *mld, return; IWL_DEBUG_EHT(mld, - "missed bcn on the other link (link_id=%u): %u\n", + "missed bcn link_id=%u: %u consecutive=%u, other link_id=%u: %u\n", + link_id, missed_bcon, missed_bcon_since_rx, other_link->link_id, scnd_lnk_bcn_lost); /* Exit EMLSR if we lost more than diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mlo.c b/drivers/net/wireless/intel/iwlwifi/mld/mlo.c index fa04fbe06656a..c6b151f269216 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mlo.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mlo.c @@ -31,11 +31,9 @@ static void iwl_mld_print_emlsr_blocked(struct iwl_mld *mld, u32 mask) { #define NAME_FMT(x) "%s" #define NAME_PR(x) (mask & IWL_MLD_EMLSR_BLOCKED_##x) ? "[" #x "]" : "", - IWL_DEBUG_INFO(mld, - "EMLSR blocked = " HANDLE_EMLSR_BLOCKED_REASONS(NAME_FMT) - " (0x%x)\n", - HANDLE_EMLSR_BLOCKED_REASONS(NAME_PR) - mask); + IWL_DEBUG_EHT(mld, + "EMLSR blocked = " HANDLE_EMLSR_BLOCKED_REASONS(NAME_FMT) + " (0x%x)\n", HANDLE_EMLSR_BLOCKED_REASONS(NAME_PR) mask); #undef NAME_FMT #undef NAME_PR } @@ -72,11 +70,9 @@ static void iwl_mld_print_emlsr_exit(struct iwl_mld *mld, u32 mask) { #define NAME_FMT(x) "%s" #define NAME_PR(x) (mask & IWL_MLD_EMLSR_EXIT_##x) ? "[" #x "]" : "", - IWL_DEBUG_INFO(mld, - "EMLSR exit = " HANDLE_EMLSR_EXIT_REASONS(NAME_FMT) - " (0x%x)\n", - HANDLE_EMLSR_EXIT_REASONS(NAME_PR) - mask); + IWL_DEBUG_EHT(mld, + "EMLSR exit = " HANDLE_EMLSR_EXIT_REASONS(NAME_FMT) + " (0x%x)\n", HANDLE_EMLSR_EXIT_REASONS(NAME_PR) mask); #undef NAME_FMT #undef NAME_PR } @@ -170,10 +166,10 @@ static void iwl_mld_check_emlsr_prevention(struct iwl_mld *mld, WARN_ON(mld_vif->emlsr.exit_repeat_count > 3); } - IWL_DEBUG_INFO(mld, - "Preventing EMLSR for %ld seconds due to %u exits with the reason = %s (0x%x)\n", - delay / HZ, mld_vif->emlsr.exit_repeat_count, - iwl_mld_get_emlsr_exit_string(reason), reason); + IWL_DEBUG_EHT(mld, + "Preventing EMLSR for %ld seconds due to %u exits with the reason = %s (0x%x)\n", + delay / HZ, mld_vif->emlsr.exit_repeat_count, + iwl_mld_get_emlsr_exit_string(reason), reason); wiphy_delayed_work_queue(mld->wiphy, &mld_vif->emlsr.prevent_done_wk, delay); @@ -217,10 +213,10 @@ static int _iwl_mld_exit_emlsr(struct iwl_mld *mld, struct ieee80211_vif *vif, link_to_keep = __ffs(vif->active_links); new_active_links = BIT(link_to_keep); - IWL_DEBUG_INFO(mld, - "Exiting EMLSR. reason = %s (0x%x). Current active links=0x%x, new active links = 0x%x\n", - iwl_mld_get_emlsr_exit_string(exit), exit, - vif->active_links, new_active_links); + IWL_DEBUG_EHT(mld, + "Exiting EMLSR. reason = %s (0x%x). Current active links=0x%x, new active links = 0x%x\n", + iwl_mld_get_emlsr_exit_string(exit), exit, + vif->active_links, new_active_links); if (sync) ret = ieee80211_set_active_links(vif, new_active_links); @@ -262,9 +258,8 @@ static int _iwl_mld_emlsr_block(struct iwl_mld *mld, struct ieee80211_vif *vif, mld_vif->emlsr.blocked_reasons |= reason; - IWL_DEBUG_INFO(mld, - "Blocking EMLSR mode. reason = %s (0x%x)\n", - iwl_mld_get_emlsr_blocked_string(reason), reason); + IWL_DEBUG_EHT(mld, "Blocking EMLSR mode. reason = %s (0x%x)\n", + iwl_mld_get_emlsr_blocked_string(reason), reason); iwl_mld_print_emlsr_blocked(mld, mld_vif->emlsr.blocked_reasons); if (reason == IWL_MLD_EMLSR_BLOCKED_TPT) @@ -335,9 +330,8 @@ void iwl_mld_unblock_emlsr(struct iwl_mld *mld, struct ieee80211_vif *vif, mld_vif->emlsr.blocked_reasons &= ~reason; - IWL_DEBUG_INFO(mld, - "Unblocking EMLSR mode. reason = %s (0x%x)\n", - iwl_mld_get_emlsr_blocked_string(reason), reason); + IWL_DEBUG_EHT(mld, "Unblocking EMLSR mode. reason = %s (0x%x)\n", + iwl_mld_get_emlsr_blocked_string(reason), reason); iwl_mld_print_emlsr_blocked(mld, mld_vif->emlsr.blocked_reasons); if (reason == IWL_MLD_EMLSR_BLOCKED_TPT) @@ -348,7 +342,7 @@ void iwl_mld_unblock_emlsr(struct iwl_mld *mld, struct ieee80211_vif *vif, if (mld_vif->emlsr.blocked_reasons) return; - IWL_DEBUG_INFO(mld, "EMLSR is unblocked\n"); + IWL_DEBUG_EHT(mld, "EMLSR is unblocked\n"); iwl_mld_int_mlo_scan(mld, vif); } @@ -365,18 +359,17 @@ iwl_mld_vif_iter_emlsr_mode_notif(void *data, u8 *mac, switch (action) { case ESR_RECOMMEND_LEAVE: - IWL_DEBUG_INFO(mld_vif->mld, - "FW recommend leave reason = 0x%x\n", - le32_to_cpu(notif->leave_reason_mask)); + IWL_DEBUG_EHT(mld_vif->mld, + "FW recommend leave reason = 0x%x\n", + le32_to_cpu(notif->leave_reason_mask)); iwl_mld_exit_emlsr(mld_vif->mld, vif, IWL_MLD_EMLSR_EXIT_FW_REQUEST, iwl_mld_get_primary_link(vif)); break; case ESR_FORCE_LEAVE: - IWL_DEBUG_INFO(mld_vif->mld, - "FW force leave reason = 0x%x\n", - le32_to_cpu(notif->leave_reason_mask)); + IWL_DEBUG_EHT(mld_vif->mld, "FW force leave reason = 0x%x\n", + le32_to_cpu(notif->leave_reason_mask)); fallthrough; case ESR_RECOMMEND_ENTER: default: @@ -412,11 +405,12 @@ void iwl_mld_handle_emlsr_trans_fail_notif(struct iwl_mld *mld, struct ieee80211_bss_conf *bss_conf = iwl_mld_fw_id_to_link_conf(mld, fw_link_id); - IWL_DEBUG_INFO(mld, "Failed to %s EMLSR on link %d (FW: %d), reason %d\n", - le32_to_cpu(notif->activation) ? "enter" : "exit", - bss_conf ? bss_conf->link_id : -1, - le32_to_cpu(notif->link_id), - le32_to_cpu(notif->err_code)); + IWL_DEBUG_EHT(mld, + "Failed to %s EMLSR on link %d (FW: %d), reason %d\n", + le32_to_cpu(notif->activation) ? "enter" : "exit", + bss_conf ? bss_conf->link_id : -1, + le32_to_cpu(notif->link_id), + le32_to_cpu(notif->err_code)); if (IWL_FW_CHECK(mld, !bss_conf, "FW reported failure to %sactivate EMLSR on a non-existing link: %d\n", @@ -590,8 +584,8 @@ void iwl_mld_emlsr_check_tpt(struct wiphy *wiphy, struct wiphy_work *wk) spin_unlock_bh(&queue_counter->lock); } - IWL_DEBUG_INFO(mld, "total Tx MPDUs: %ld. total Rx MPDUs: %ld\n", - total_tx, total_rx); + IWL_DEBUG_EHT(mld, "total Tx MPDUs: %ld. total Rx MPDUs: %ld\n", + total_tx, total_rx); /* If we don't have enough MPDUs - exit EMLSR */ if (total_tx < IWL_MLD_ENTER_EMLSR_TPT_THRESH && @@ -605,8 +599,8 @@ void iwl_mld_emlsr_check_tpt(struct wiphy *wiphy, struct wiphy_work *wk) if (sec_link_id == -1) goto schedule; - IWL_DEBUG_INFO(mld, "Secondary Link %d: Tx MPDUs: %ld. Rx MPDUs: %ld\n", - sec_link_id, sec_link_tx, sec_link_rx); + IWL_DEBUG_EHT(mld, "Secondary Link %d: Tx MPDUs: %ld. Rx MPDUs: %ld\n", + sec_link_id, sec_link_tx, sec_link_rx); /* Calculate the percentage of the secondary link TX/RX */ sec_link_tx_perc = total_tx ? sec_link_tx * 100 / total_tx : 0; @@ -703,10 +697,8 @@ iwl_mld_emlsr_disallowed_with_link(struct iwl_mld *mld, ret |= IWL_MLD_EMLSR_EXIT_CSA; if (ret) { - IWL_DEBUG_INFO(mld, - "Link %d is not allowed for EMLSR as %s\n", - link->link_id, - primary ? "primary" : "secondary"); + IWL_DEBUG_EHT(mld, "Link %d is not allowed for EMLSR as %s\n", + link->link_id, primary ? "primary" : "secondary"); iwl_mld_print_emlsr_exit(mld, ret); } @@ -870,13 +862,12 @@ iwl_mld_emlsr_pair_state(struct ieee80211_vif *vif, reason_mask |= IWL_MLD_EMLSR_EXIT_CHAN_LOAD; if (reason_mask) { - IWL_DEBUG_INFO(mld, - "Links %d and %d are not a valid pair for EMLSR\n", - a->link_id, b->link_id); - IWL_DEBUG_INFO(mld, - "Links bandwidth are: %d and %d\n", - nl80211_chan_width_to_mhz(a->chandef->width), - nl80211_chan_width_to_mhz(b->chandef->width)); + IWL_DEBUG_EHT(mld, + "Links %d and %d are not a valid pair for EMLSR\n", + a->link_id, b->link_id); + IWL_DEBUG_EHT(mld, "Links bandwidth are: %d and %d\n", + nl80211_chan_width_to_mhz(a->chandef->width), + nl80211_chan_width_to_mhz(b->chandef->width)); iwl_mld_print_emlsr_exit(mld, reason_mask); } @@ -994,8 +985,8 @@ static void _iwl_mld_select_links(struct iwl_mld *mld, } set_active: - IWL_DEBUG_INFO(mld, "Link selection result: 0x%x. Primary = %d\n", - new_active, new_primary); + IWL_DEBUG_EHT(mld, "Link selection result: 0x%x. Primary = %d\n", + new_active, new_primary); mld_vif->emlsr.selected_primary = new_primary; mld_vif->emlsr.selected_links = new_active; diff --git a/drivers/net/wireless/intel/iwlwifi/mld/sta.c b/drivers/net/wireless/intel/iwlwifi/mld/sta.c index 5cdbfa29a2027..61ecc33116cf0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/sta.c @@ -890,7 +890,7 @@ static void iwl_mld_count_mpdu(struct ieee80211_link_sta *link_sta, int queue, sizeof(queue_counter->per_link)); queue_counter->window_start_time = jiffies; - IWL_DEBUG_INFO(mld, "MPDU counters are cleared\n"); + IWL_DEBUG_EHT(mld, "MPDU counters are cleared\n"); } link_counter = &queue_counter->per_link[mld_link->fw_id]; From 5ee10092eb77dee1bed468777ff8e16130f21877 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 19 Oct 2025 11:45:15 +0300 Subject: [PATCH 329/867] wifi: iwlwifi: cfg: fix a few device names There are going to be some devices called BN203 instead of BN201, adjust the names accordingly. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251019114304.2ad0f42ffb57.I2c7864d33f0d7d3dc49381949571c4ce620a9723@changeid --- drivers/net/wireless/intel/iwlwifi/cfg/rf-pe.c | 1 + drivers/net/wireless/intel/iwlwifi/iwl-config.h | 1 + drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 7 ++++--- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/rf-pe.c b/drivers/net/wireless/intel/iwlwifi/cfg/rf-pe.c index 483f21659effc..408b9850bd104 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/rf-pe.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/rf-pe.c @@ -12,5 +12,6 @@ const char iwl_killer_bn1850i_name[] = "Killer(R) Wi-Fi 8 BN1850i 320MHz Wireless Network Adapter (BN201.NGW)"; const char iwl_bn201_name[] = "Intel(R) Wi-Fi 8 BN201"; +const char iwl_bn203_name[] = "Intel(R) Wi-Fi 8 BN203"; const char iwl_be221_name[] = "Intel(R) Wi-Fi 7 BE221"; const char iwl_be223_name[] = "Intel(R) Wi-Fi 7 BE223"; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 0a5a683325efa..076810ee5d341 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -686,6 +686,7 @@ extern const char iwl_be211_name[]; extern const char iwl_killer_bn1850w2_name[]; extern const char iwl_killer_bn1850i_name[]; extern const char iwl_bn201_name[]; +extern const char iwl_bn203_name[]; extern const char iwl_be221_name[]; extern const char iwl_be223_name[]; extern const char iwl_ax221_name[]; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 73001cdce13aa..dc99e7ac47261 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1069,9 +1069,10 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { /* PE RF */ IWL_DEV_INFO(iwl_rf_pe, iwl_bn201_name, RF_TYPE(PE)), - IWL_DEV_INFO(iwl_rf_pe, iwl_be223_name, RF_TYPE(PE), SUBDEV(0x0524)), - IWL_DEV_INFO(iwl_rf_pe, iwl_be223_name, RF_TYPE(PE), SUBDEV(0x4524)), - IWL_DEV_INFO(iwl_rf_pe, iwl_be221_name, RF_TYPE(PE), SUBDEV(0x0324)), + IWL_DEV_INFO(iwl_rf_pe, iwl_be223_name, RF_TYPE(PE), + SUBDEV_MASKED(0x0524, 0xFFF)), + IWL_DEV_INFO(iwl_rf_pe, iwl_bn203_name, RF_TYPE(PE), + SUBDEV_MASKED(0x0324, 0xFFF)), /* Killer */ IWL_DEV_INFO(iwl_rf_wh, iwl_killer_be1775s_name, SUBDEV(0x1776)), From 75dd87e3f181bbf7d333c69fa45ef49eb75e4acd Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 19 Oct 2025 11:45:16 +0300 Subject: [PATCH 330/867] wifi: iwlwifi: mld: check for NULL pointer after kmalloc Coverity complained that we didn't add a NULL check for the link we allocate. Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251019114304.d1f958160c5a.Icc891c14c633c3b8625372680fdc67ca33c83cc7@changeid --- drivers/net/wireless/intel/iwlwifi/mld/link.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/link.c b/drivers/net/wireless/intel/iwlwifi/mld/link.c index deb1e7227dd85..27ae8bd6f407c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/link.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/link.c @@ -465,10 +465,13 @@ int iwl_mld_add_link(struct iwl_mld *mld, int ret; if (!link) { - if (is_deflink) + if (is_deflink) { link = &mld_vif->deflink; - else + } else { link = kzalloc(sizeof(*link), GFP_KERNEL); + if (!link) + return -ENOMEM; + } } else { WARN_ON(!mld->fw_status.in_hw_restart); } From 3df28496673bd8009f1cd3a85a63650c96e369f4 Mon Sep 17 00:00:00 2001 From: Li Qiang Date: Fri, 17 Oct 2025 12:11:28 +0800 Subject: [PATCH 331/867] wifi: iwlwifi: mld: add null check for kzalloc() in iwl_mld_send_proto_offload() Add a missing NULL pointer check after kzalloc() in iwl_mld_send_proto_offload(). Without this check, a failed allocation could lead to a NULL dereference. Fixes: d1e879ec600f9 ("wifi: iwlwifi: add iwlmld sub-driver") Signed-off-by: Li Qiang Link: https://patch.msgid.link/20251017041128.1379715-1-liqiang01@kylinos.cn Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/mld/d3.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/d3.c b/drivers/net/wireless/intel/iwlwifi/mld/d3.c index 1d4282a21f09e..dd85be94433cc 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/d3.c @@ -1794,6 +1794,10 @@ iwl_mld_send_proto_offload(struct iwl_mld *mld, u32 enabled = 0; cmd = kzalloc(hcmd.len[0], GFP_KERNEL); + if (!cmd) { + IWL_DEBUG_WOWLAN(mld, "Failed to allocate proto offload cmd\n"); + return -ENOMEM; + } #if IS_ENABLED(CONFIG_IPV6) struct iwl_mld_vif *mld_vif = iwl_mld_vif_from_mac80211(vif); From c51aa14be9c4ad9f3d45f9dd2890776cfbccb55a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Thu, 23 Oct 2025 18:22:51 +0200 Subject: [PATCH 332/867] dt-bindings: net: cdns,macb: add Mobileye EyeQ5 ethernet interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add "cdns,eyeq5-gem" as compatible for the integrated GEM block inside Mobileye EyeQ5 SoCs. It is different from other compatibles in two main ways: (1) it requires a generic PHY and (2) it is better to keep TCP Segmentation Offload (TSO) disabled. Reviewed-by: Andrew Lunn Acked-by: Conor Dooley Signed-off-by: Théo Lebrun Link: https://patch.msgid.link/20251023-macb-eyeq5-v3-1-af509422c204@bootlin.com Signed-off-by: Paolo Abeni --- Documentation/devicetree/bindings/net/cdns,macb.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/devicetree/bindings/net/cdns,macb.yaml b/Documentation/devicetree/bindings/net/cdns,macb.yaml index 02f14a0b72f9c..ea8337846ab26 100644 --- a/Documentation/devicetree/bindings/net/cdns,macb.yaml +++ b/Documentation/devicetree/bindings/net/cdns,macb.yaml @@ -57,6 +57,7 @@ properties: - cdns,np4-macb # NP4 SoC devices - microchip,sama7g5-emac # Microchip SAMA7G5 ethernet interface - microchip,sama7g5-gem # Microchip SAMA7G5 gigabit ethernet interface + - mobileye,eyeq5-gem # Mobileye EyeQ5 SoCs - raspberrypi,rp1-gem # Raspberry Pi RP1 gigabit ethernet interface - sifive,fu540-c000-gem # SiFive FU540-C000 SoC @@ -183,6 +184,15 @@ allOf: reg: maxItems: 1 + - if: + properties: + compatible: + contains: + const: mobileye,eyeq5-gem + then: + required: + - phys + unevaluatedProperties: false examples: From ae7a9585ea6974bd7a772fee96bb8514e250dbd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Thu, 23 Oct 2025 18:22:52 +0200 Subject: [PATCH 333/867] net: macb: match skb_reserve(skb, NET_IP_ALIGN) with HW alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If HW is RSC capable, it cannot add dummy bytes at the start of IP packets. Alignment (ie number of dummy bytes) is configured using the RBOF field inside the NCFGR register. On the software side, the skb_reserve(skb, NET_IP_ALIGN) call must only be done if those dummy bytes are added by the hardware; notice the skb_reserve() is done AFTER writing the address to the device. We cannot do the skb_reserve() call BEFORE writing the address because the address field ignores the low 2/3 bits. Conclusion: in some cases, we risk not being able to respect the NET_IP_ALIGN value (which is picked based on unaligned CPU access performance). Signed-off-by: Théo Lebrun Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251023-macb-eyeq5-v3-2-af509422c204@bootlin.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/cadence/macb.h | 3 +++ drivers/net/ethernet/cadence/macb_main.c | 23 ++++++++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 5b7d4cdb204d8..93e8dd092313e 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -537,6 +537,8 @@ /* Bitfields in DCFG6. */ #define GEM_PBUF_LSO_OFFSET 27 #define GEM_PBUF_LSO_SIZE 1 +#define GEM_PBUF_RSC_OFFSET 26 +#define GEM_PBUF_RSC_SIZE 1 #define GEM_PBUF_CUTTHRU_OFFSET 25 #define GEM_PBUF_CUTTHRU_SIZE 1 #define GEM_DAW64_OFFSET 23 @@ -775,6 +777,7 @@ #define MACB_CAPS_MACB_IS_GEM BIT(20) #define MACB_CAPS_DMA_64B BIT(21) #define MACB_CAPS_DMA_PTP BIT(22) +#define MACB_CAPS_RSC BIT(23) /* LSO settings */ #define MACB_LSO_UFO_ENABLE 0x01 diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 39673f5c3337f..be3d0c2313a16 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1300,8 +1300,19 @@ static void gem_rx_refill(struct macb_queue *queue) dma_wmb(); macb_set_addr(bp, desc, paddr); - /* properly align Ethernet header */ - skb_reserve(skb, NET_IP_ALIGN); + /* Properly align Ethernet header. + * + * Hardware can add dummy bytes if asked using the RBOF + * field inside the NCFGR register. That feature isn't + * available if hardware is RSC capable. + * + * We cannot fallback to doing the 2-byte shift before + * DMA mapping because the address field does not allow + * setting the low 2/3 bits. + * It is 3 bits if HW_DMA_CAP_PTP, else 2 bits. + */ + if (!(bp->caps & MACB_CAPS_RSC)) + skb_reserve(skb, NET_IP_ALIGN); } else { desc->ctrl = 0; dma_wmb(); @@ -2773,7 +2784,11 @@ static void macb_init_hw(struct macb *bp) macb_set_hwaddr(bp); config = macb_mdc_clk_div(bp); - config |= MACB_BF(RBOF, NET_IP_ALIGN); /* Make eth data aligned */ + /* Make eth data aligned. + * If RSC capable, that offset is ignored by HW. + */ + if (!(bp->caps & MACB_CAPS_RSC)) + config |= MACB_BF(RBOF, NET_IP_ALIGN); config |= MACB_BIT(DRFCS); /* Discard Rx FCS */ if (bp->caps & MACB_CAPS_JUMBO) config |= MACB_BIT(JFRAME); /* Enable jumbo frames */ @@ -4321,6 +4336,8 @@ static void macb_configure_caps(struct macb *bp, dcfg = gem_readl(bp, DCFG2); if ((dcfg & (GEM_BIT(RX_PKT_BUFF) | GEM_BIT(TX_PKT_BUFF))) == 0) bp->caps |= MACB_CAPS_FIFO_MODE; + if (GEM_BFEXT(PBUF_RSC, gem_readl(bp, DCFG6))) + bp->caps |= MACB_CAPS_RSC; if (gem_has_ptp(bp)) { if (!GEM_BFEXT(TSU, gem_readl(bp, DCFG5))) dev_err(&bp->pdev->dev, From 7a3d209145d17b4e0d24a51864fb2def1f5ecf0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Thu, 23 Oct 2025 18:22:53 +0200 Subject: [PATCH 334/867] net: macb: add no LSO capability (MACB_CAPS_NO_LSO) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LSO is runtime-detected using the PBUF_LSO field inside register DCFG6. Allow disabling that feature if it is broken by using bp->caps coming from match data. Reviewed-by: Andrew Lunn Signed-off-by: Théo Lebrun Link: https://patch.msgid.link/20251023-macb-eyeq5-v3-3-af509422c204@bootlin.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/cadence/macb.h | 1 + drivers/net/ethernet/cadence/macb_main.c | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 93e8dd092313e..05bfa9bd47821 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -778,6 +778,7 @@ #define MACB_CAPS_DMA_64B BIT(21) #define MACB_CAPS_DMA_PTP BIT(22) #define MACB_CAPS_RSC BIT(23) +#define MACB_CAPS_NO_LSO BIT(24) /* LSO settings */ #define MACB_LSO_UFO_ENABLE 0x01 diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index be3d0c2313a16..8b688a6cb2f94 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4564,8 +4564,11 @@ static int macb_init(struct platform_device *pdev) /* Set features */ dev->hw_features = NETIF_F_SG; - /* Check LSO capability */ - if (GEM_BFEXT(PBUF_LSO, gem_readl(bp, DCFG6))) + /* Check LSO capability; runtime detection can be overridden by a cap + * flag if the hardware is known to be buggy + */ + if (!(bp->caps & MACB_CAPS_NO_LSO) && + GEM_BFEXT(PBUF_LSO, gem_readl(bp, DCFG6))) dev->hw_features |= MACB_NETIF_LSO; /* Checksum offload is only available on gem with packet buffer */ From 3f7e51cd5fbf4d970b14956ee9464515bb40666f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Thu, 23 Oct 2025 18:22:54 +0200 Subject: [PATCH 335/867] net: macb: rename bp->sgmii_phy field to bp->phy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bp->sgmii_phy field is initialised at probe by init_reset_optional() if bp->phy_interface == PHY_INTERFACE_MODE_SGMII. It gets used by: - zynqmp_config: "cdns,zynqmp-gem" or "xlnx,zynqmp-gem" compatibles. - mpfs_config: "microchip,mpfs-macb" compatible. - versal_config: "xlnx,versal-gem" compatible. Make name more generic as EyeQ5 requires the PHY in SGMII & RGMII cases. Drop "for ZynqMP SGMII mode" comment that is already a lie, as it gets used on Microchip platforms as well. And soon it won't be SGMII-only. Reviewed-by: Andrew Lunn Reviewed-by: Maxime Chevallier Signed-off-by: Théo Lebrun Link: https://patch.msgid.link/20251023-macb-eyeq5-v3-4-af509422c204@bootlin.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/cadence/macb.h | 2 +- drivers/net/ethernet/cadence/macb_main.c | 26 ++++++++++++------------ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 05bfa9bd47821..87414a2ddf6e3 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -1341,7 +1341,7 @@ struct macb { struct macb_ptp_info *ptp_info; /* macb-ptp interface */ - struct phy *sgmii_phy; /* for ZynqMP SGMII mode */ + struct phy *phy; spinlock_t tsu_clk_lock; /* gem tsu clock locking */ unsigned int tsu_rate; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 8b688a6cb2f94..44188e7eee569 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -2965,7 +2965,7 @@ static int macb_open(struct net_device *dev) macb_init_hw(bp); - err = phy_power_on(bp->sgmii_phy); + err = phy_power_on(bp->phy); if (err) goto reset_hw; @@ -2981,7 +2981,7 @@ static int macb_open(struct net_device *dev) return 0; phy_off: - phy_power_off(bp->sgmii_phy); + phy_power_off(bp->phy); reset_hw: macb_reset_hw(bp); @@ -3013,7 +3013,7 @@ static int macb_close(struct net_device *dev) phylink_stop(bp->phylink); phylink_disconnect_phy(bp->phylink); - phy_power_off(bp->sgmii_phy); + phy_power_off(bp->phy); spin_lock_irqsave(&bp->lock, flags); macb_reset_hw(bp); @@ -5141,13 +5141,13 @@ static int init_reset_optional(struct platform_device *pdev) if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII) { /* Ensure PHY device used in SGMII mode is ready */ - bp->sgmii_phy = devm_phy_optional_get(&pdev->dev, NULL); + bp->phy = devm_phy_optional_get(&pdev->dev, NULL); - if (IS_ERR(bp->sgmii_phy)) - return dev_err_probe(&pdev->dev, PTR_ERR(bp->sgmii_phy), + if (IS_ERR(bp->phy)) + return dev_err_probe(&pdev->dev, PTR_ERR(bp->phy), "failed to get SGMII PHY\n"); - ret = phy_init(bp->sgmii_phy); + ret = phy_init(bp->phy); if (ret) return dev_err_probe(&pdev->dev, ret, "failed to init SGMII PHY\n"); @@ -5176,7 +5176,7 @@ static int init_reset_optional(struct platform_device *pdev) /* Fully reset controller at hardware level if mapped in device tree */ ret = device_reset_optional(&pdev->dev); if (ret) { - phy_exit(bp->sgmii_phy); + phy_exit(bp->phy); return dev_err_probe(&pdev->dev, ret, "failed to reset controller"); } @@ -5184,7 +5184,7 @@ static int init_reset_optional(struct platform_device *pdev) err_out_phy_exit: if (ret) - phy_exit(bp->sgmii_phy); + phy_exit(bp->phy); return ret; } @@ -5594,7 +5594,7 @@ static int macb_probe(struct platform_device *pdev) mdiobus_free(bp->mii_bus); err_out_phy_exit: - phy_exit(bp->sgmii_phy); + phy_exit(bp->phy); err_out_free_netdev: free_netdev(dev); @@ -5618,7 +5618,7 @@ static void macb_remove(struct platform_device *pdev) if (dev) { bp = netdev_priv(dev); unregister_netdev(dev); - phy_exit(bp->sgmii_phy); + phy_exit(bp->phy); mdiobus_unregister(bp->mii_bus); mdiobus_free(bp->mii_bus); @@ -5645,7 +5645,7 @@ static int __maybe_unused macb_suspend(struct device *dev) u32 tmp; if (!device_may_wakeup(&bp->dev->dev)) - phy_exit(bp->sgmii_phy); + phy_exit(bp->phy); if (!netif_running(netdev)) return 0; @@ -5774,7 +5774,7 @@ static int __maybe_unused macb_resume(struct device *dev) int err; if (!device_may_wakeup(&bp->dev->dev)) - phy_init(bp->sgmii_phy); + phy_init(bp->phy); if (!netif_running(netdev)) return 0; From 48cf0be9b9a66ea64192beb215911d3d7c94a409 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Thu, 23 Oct 2025 18:22:55 +0200 Subject: [PATCH 336/867] net: macb: Add "mobileye,eyeq5-gem" compatible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for the two GEM instances inside Mobileye EyeQ5 SoCs, using compatible "mobileye,eyeq5-gem". With it, add a custom init sequence that must grab a generic PHY and initialise it. We use bp->phy in both RGMII and SGMII cases. Tell our mode by adding a phy_set_mode_ext() during macb_open(), before phy_power_on(). We are the first users of bp->phy that use it in non-SGMII cases. The phy_set_mode_ext() call is made unconditionally. It cannot cause issues on platforms where !bp->phy or !bp->phy->ops->set_mode as, in those cases, the call is a no-op (returning zero). From reading upstream DTS, we can figure out that no platform has a bp->phy and a PHY driver that has a .set_mode() implementation: - cdns,zynqmp-gem: no DTS upstream. - microchip,mpfs-macb: microchip/mpfs.dtsi, &mac0..1, no PHY attached. - xlnx,versal-gem: xilinx/versal-net.dtsi, &gem0..1, no PHY attached. - xlnx,zynqmp-gem: xilinx/zynqmp.dtsi, &gem0..3, PHY attached to drivers/phy/xilinx/phy-zynqmp.c which has no .set_mode(). Reviewed-by: Maxime Chevallier Signed-off-by: Théo Lebrun Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251023-macb-eyeq5-v3-5-af509422c204@bootlin.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/cadence/macb_main.c | 38 ++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 44188e7eee569..b1ed98d9c4380 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -2965,6 +2965,10 @@ static int macb_open(struct net_device *dev) macb_init_hw(bp); + err = phy_set_mode_ext(bp->phy, PHY_MODE_ETHERNET, bp->phy_interface); + if (err) + goto reset_hw; + err = phy_power_on(bp->phy); if (err) goto reset_hw; @@ -5189,6 +5193,28 @@ static int init_reset_optional(struct platform_device *pdev) return ret; } +static int eyeq5_init(struct platform_device *pdev) +{ + struct net_device *netdev = platform_get_drvdata(pdev); + struct macb *bp = netdev_priv(netdev); + struct device *dev = &pdev->dev; + int ret; + + bp->phy = devm_phy_get(dev, NULL); + if (IS_ERR(bp->phy)) + return dev_err_probe(dev, PTR_ERR(bp->phy), + "failed to get PHY\n"); + + ret = phy_init(bp->phy); + if (ret) + return dev_err_probe(dev, ret, "failed to init PHY\n"); + + ret = macb_init(pdev); + if (ret) + phy_exit(bp->phy); + return ret; +} + static const struct macb_usrio_config sama7g5_usrio = { .mii = 0, .rmii = 1, @@ -5343,6 +5369,17 @@ static const struct macb_config versal_config = { .usrio = &macb_default_usrio, }; +static const struct macb_config eyeq5_config = { + .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO | + MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_QUEUE_DISABLE | + MACB_CAPS_NO_LSO, + .dma_burst_length = 16, + .clk_init = macb_clk_init, + .init = eyeq5_init, + .jumbo_max_len = 10240, + .usrio = &macb_default_usrio, +}; + static const struct macb_config raspberrypi_rp1_config = { .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_CLK_HW_CHG | MACB_CAPS_JUMBO | @@ -5374,6 +5411,7 @@ static const struct of_device_id macb_dt_ids[] = { { .compatible = "microchip,mpfs-macb", .data = &mpfs_config }, { .compatible = "microchip,sama7g5-gem", .data = &sama7g5_gem_config }, { .compatible = "microchip,sama7g5-emac", .data = &sama7g5_emac_config }, + { .compatible = "mobileye,eyeq5-gem", .data = &eyeq5_config }, { .compatible = "raspberrypi,rp1-gem", .data = &raspberrypi_rp1_config }, { .compatible = "xlnx,zynqmp-gem", .data = &zynqmp_config}, { .compatible = "xlnx,zynq-gem", .data = &zynq_config }, From 792000fbcd0ca32f358c4e14eaa40a73690be24c Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Fri, 24 Oct 2025 09:07:17 +0200 Subject: [PATCH 337/867] net: stmmac: Move subsecond increment configuration in dedicated helper In preparation for fine/coarse support, let's move the subsecond increment and addend configuration in a dedicated helper. Signed-off-by: Maxime Chevallier Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20251024070720.71174-2-maxime.chevallier@bootlin.com Signed-off-by: Paolo Abeni --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 48 +++++++++++-------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index fd51068801928..6706ae7dd89ff 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -464,6 +464,33 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p, } } +static void stmmac_update_subsecond_increment(struct stmmac_priv *priv) +{ + bool xmac = dwmac_is_xmac(priv->plat->core_type); + u32 sec_inc = 0; + u64 temp = 0; + + stmmac_config_hw_tstamping(priv, priv->ptpaddr, priv->systime_flags); + + /* program Sub Second Increment reg */ + stmmac_config_sub_second_increment(priv, priv->ptpaddr, + priv->plat->clk_ptp_rate, + xmac, &sec_inc); + temp = div_u64(1000000000ULL, sec_inc); + + /* Store sub second increment for later use */ + priv->sub_second_inc = sec_inc; + + /* calculate default added value: + * formula is : + * addend = (2^32)/freq_div_ratio; + * where, freq_div_ratio = 1e9ns/sec_inc + */ + temp = (u64)(temp << 32); + priv->default_addend = div_u64(temp, priv->plat->clk_ptp_rate); + stmmac_config_addend(priv, priv->ptpaddr, priv->default_addend); +} + /** * stmmac_hwtstamp_set - control hardware timestamping. * @dev: device pointer. @@ -697,10 +724,7 @@ static int stmmac_hwtstamp_get(struct net_device *dev, static int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags) { - bool xmac = dwmac_is_xmac(priv->plat->core_type); struct timespec64 now; - u32 sec_inc = 0; - u64 temp = 0; if (!priv->plat->clk_ptp_rate) { netdev_err(priv->dev, "Invalid PTP clock rate"); @@ -710,23 +734,7 @@ static int stmmac_init_tstamp_counter(struct stmmac_priv *priv, stmmac_config_hw_tstamping(priv, priv->ptpaddr, systime_flags); priv->systime_flags = systime_flags; - /* program Sub Second Increment reg */ - stmmac_config_sub_second_increment(priv, priv->ptpaddr, - priv->plat->clk_ptp_rate, - xmac, &sec_inc); - temp = div_u64(1000000000ULL, sec_inc); - - /* Store sub second increment for later use */ - priv->sub_second_inc = sec_inc; - - /* calculate default added value: - * formula is : - * addend = (2^32)/freq_div_ratio; - * where, freq_div_ratio = 1e9ns/sec_inc - */ - temp = (u64)(temp << 32); - priv->default_addend = div_u64(temp, priv->plat->clk_ptp_rate); - stmmac_config_addend(priv, priv->ptpaddr, priv->default_addend); + stmmac_update_subsecond_increment(priv); /* initialize system time */ ktime_get_real_ts64(&now); From 6920fa0c764dbdd35d311d4df986226bb48165f6 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Fri, 24 Oct 2025 09:07:18 +0200 Subject: [PATCH 338/867] net: stmmac: Add a devlink attribute to control timestamping mode The DWMAC1000 supports 2 timestamping configurations to configure how frequency adjustments are made to the ptp_clock, as well as the reported timestamp values. There was a previous attempt at upstreaming support for configuring this mode by Olivier Dautricourt and Julien Beraud a few years back [1] In a nutshell, the timestamping can be either set in fine mode or in coarse mode. In fine mode, which is the default, we use the overflow of an accumulator to trigger frequency adjustments, but by doing so we lose precision on the timetamps that are produced by the timestamping unit. The main drawback is that the sub-second increment value, used to generate timestamps, can't be set to lower than (2 / ptp_clock_freq). The "fine" qualification comes from the frequent frequency adjustments we are able to do, which is perfect for a PTP follower usecase. In Coarse mode, we don't do frequency adjustments based on an accumulator overflow. We can therefore have very fine subsecond increment values, allowing for better timestamping precision. However this mode works best when the ptp clock frequency is adjusted based on an external signal, such as a PPS input produced by a GPS clock. This mode is therefore perfect for a Grand-master usecase. Introduce a driver-specific devlink parameter "ts_coarse" to enable or disable coarse mode, keeping the "fine" mode as a default. This can then be changed with: devlink dev param set name ts_coarse value true cmode runtime The associated documentation is also added. [1] : https://lore.kernel.org/netdev/20200514102808.31163-1-olivier.dautricourt@orolia.com/ Signed-off-by: Maxime Chevallier Reviewed-by: Russell King (Oracle) Reviewed-by: Kory Maincent Link: https://patch.msgid.link/20251024070720.71174-3-maxime.chevallier@bootlin.com Signed-off-by: Paolo Abeni --- Documentation/networking/devlink/index.rst | 1 + Documentation/networking/devlink/stmmac.rst | 31 +++++ drivers/net/ethernet/stmicro/stmmac/Kconfig | 1 + drivers/net/ethernet/stmicro/stmmac/stmmac.h | 3 + .../net/ethernet/stmicro/stmmac/stmmac_main.c | 115 +++++++++++++++++- 5 files changed, 148 insertions(+), 3 deletions(-) create mode 100644 Documentation/networking/devlink/stmmac.rst diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst index 0c58e5c729d92..35b12a2bfeba5 100644 --- a/Documentation/networking/devlink/index.rst +++ b/Documentation/networking/devlink/index.rst @@ -99,5 +99,6 @@ parameters, info versions, and other features it supports. prestera qed sfc + stmmac ti-cpsw-switch zl3073x diff --git a/Documentation/networking/devlink/stmmac.rst b/Documentation/networking/devlink/stmmac.rst new file mode 100644 index 0000000000000..e8e33d1c7baf1 --- /dev/null +++ b/Documentation/networking/devlink/stmmac.rst @@ -0,0 +1,31 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================================= +stmmac (synopsys dwmac) devlink support +======================================= + +This document describes the devlink features implemented by the ``stmmac`` +device driver. + +Parameters +========== + +The ``stmmac`` driver implements the following driver-specific parameters. + +.. list-table:: Driver-specific parameters implemented + :widths: 5 5 5 85 + + * - Name + - Type + - Mode + - Description + * - ``ts_coarse`` + - Boolean + - runtime + - Enable the Coarse timestamping mode. In Coarse mode, the ptp clock is + expected to be updated through an external PPS input, but the subsecond + increment used for timestamping is set to 1/ptp_clock_rate. In Fine mode + (i.e. Coarse mode == false), the ptp clock frequency is adjusted more + frequently, but the subsecond increment is set to 2/ptp_clock_rate. + Coarse mode is suitable for PTP Grand Master operation. If unsure, leave + the parameter to False. diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 716daa51df7e5..87c5bea6c2a24 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -10,6 +10,7 @@ config STMMAC_ETH select PHYLINK select CRC32 select RESET_CONTROLLER + select NET_DEVLINK help This is the driver for the Ethernet IPs built around a Synopsys IP Core. diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index d5af9344dfb03..3ea680cc63d81 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -259,6 +259,7 @@ struct stmmac_priv { u32 sarc_type; u32 rx_riwt[MTL_MAX_RX_QUEUES]; int hwts_rx_en; + bool tsfupdt_coarse; void __iomem *ioaddr; struct net_device *dev; @@ -369,6 +370,8 @@ struct stmmac_priv { /* XDP BPF Program */ unsigned long *af_xdp_zc_qps; struct bpf_prog *xdp_prog; + + struct devlink *devlink; }; enum stmmac_state { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 6706ae7dd89ff..ba4eeba14baaa 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -58,8 +59,7 @@ * with fine resolution and binary rollover. This avoid non-monotonic behavior * (clock jumps) when changing timestamping settings at runtime. */ -#define STMMAC_HWTS_ACTIVE (PTP_TCR_TSENA | PTP_TCR_TSCFUPDT | \ - PTP_TCR_TSCTRLSSR) +#define STMMAC_HWTS_ACTIVE (PTP_TCR_TSENA | PTP_TCR_TSCTRLSSR) #define STMMAC_ALIGN(x) ALIGN(ALIGN(x, SMP_CACHE_BYTES), 16) #define TSO_MAX_BUFF_SIZE (SZ_16K - 1) @@ -148,6 +148,15 @@ static void stmmac_exit_fs(struct net_device *dev); #define STMMAC_COAL_TIMER(x) (ns_to_ktime((x) * NSEC_PER_USEC)) +struct stmmac_devlink_priv { + struct stmmac_priv *stmmac_priv; +}; + +enum stmmac_dl_param_id { + STMMAC_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + STMMAC_DEVLINK_PARAM_ID_TS_COARSE, +}; + /** * stmmac_set_clk_tx_rate() - set the clock rate for the MAC transmit clock * @bsp_priv: BSP private data structure (unused) @@ -675,6 +684,8 @@ static int stmmac_hwtstamp_set(struct net_device *dev, priv->hwts_tx_en = config->tx_type == HWTSTAMP_TX_ON; priv->systime_flags = STMMAC_HWTS_ACTIVE; + if (!priv->tsfupdt_coarse) + priv->systime_flags |= PTP_TCR_TSCFUPDT; if (priv->hwts_tx_en || priv->hwts_rx_en) { priv->systime_flags |= tstamp_all | ptp_v2 | @@ -765,7 +776,8 @@ static int stmmac_init_timestamping(struct stmmac_priv *priv) return -EOPNOTSUPP; } - ret = stmmac_init_tstamp_counter(priv, STMMAC_HWTS_ACTIVE); + ret = stmmac_init_tstamp_counter(priv, STMMAC_HWTS_ACTIVE | + PTP_TCR_TSCFUPDT); if (ret) { netdev_warn(priv->dev, "PTP init failed\n"); return ret; @@ -7400,6 +7412,95 @@ static const struct xdp_metadata_ops stmmac_xdp_metadata_ops = { .xmo_rx_timestamp = stmmac_xdp_rx_timestamp, }; +static int stmmac_dl_ts_coarse_set(struct devlink *dl, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) +{ + struct stmmac_devlink_priv *dl_priv = devlink_priv(dl); + struct stmmac_priv *priv = dl_priv->stmmac_priv; + + priv->tsfupdt_coarse = ctx->val.vbool; + + if (priv->tsfupdt_coarse) + priv->systime_flags &= ~PTP_TCR_TSCFUPDT; + else + priv->systime_flags |= PTP_TCR_TSCFUPDT; + + /* In Coarse mode, we can use a smaller subsecond increment, let's + * reconfigure the systime, subsecond increment and addend. + */ + stmmac_update_subsecond_increment(priv); + + return 0; +} + +static int stmmac_dl_ts_coarse_get(struct devlink *dl, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct stmmac_devlink_priv *dl_priv = devlink_priv(dl); + struct stmmac_priv *priv = dl_priv->stmmac_priv; + + ctx->val.vbool = priv->tsfupdt_coarse; + + return 0; +} + +static const struct devlink_param stmmac_devlink_params[] = { + DEVLINK_PARAM_DRIVER(STMMAC_DEVLINK_PARAM_ID_TS_COARSE, "ts_coarse", + DEVLINK_PARAM_TYPE_BOOL, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + stmmac_dl_ts_coarse_get, + stmmac_dl_ts_coarse_set, NULL), +}; + +/* None of the generic devlink parameters are implemented */ +static const struct devlink_ops stmmac_devlink_ops = {}; + +static int stmmac_register_devlink(struct stmmac_priv *priv) +{ + struct stmmac_devlink_priv *dl_priv; + int ret; + + /* For now, what is exposed over devlink is only relevant when + * timestamping is available and we have a valid ptp clock rate + */ + if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp) || + !priv->plat->clk_ptp_rate) + return 0; + + priv->devlink = devlink_alloc(&stmmac_devlink_ops, sizeof(*dl_priv), + priv->device); + if (!priv->devlink) + return -ENOMEM; + + dl_priv = devlink_priv(priv->devlink); + dl_priv->stmmac_priv = priv; + + ret = devlink_params_register(priv->devlink, stmmac_devlink_params, + ARRAY_SIZE(stmmac_devlink_params)); + if (ret) + goto dl_free; + + devlink_register(priv->devlink); + return 0; + +dl_free: + devlink_free(priv->devlink); + + return ret; +} + +static void stmmac_unregister_devlink(struct stmmac_priv *priv) +{ + if (!priv->devlink) + return; + + devlink_unregister(priv->devlink); + devlink_params_unregister(priv->devlink, stmmac_devlink_params, + ARRAY_SIZE(stmmac_devlink_params)); + devlink_free(priv->devlink); +} + /** * stmmac_dvr_probe * @device: device pointer @@ -7673,6 +7774,10 @@ int stmmac_dvr_probe(struct device *device, goto error_phy_setup; } + ret = stmmac_register_devlink(priv); + if (ret) + goto error_devlink_setup; + ret = register_netdev(ndev); if (ret) { dev_err(priv->device, "%s: ERROR %i registering the device\n", @@ -7695,6 +7800,8 @@ int stmmac_dvr_probe(struct device *device, return ret; error_netdev_register: + stmmac_unregister_devlink(priv); +error_devlink_setup: phylink_destroy(priv->phylink); error_phy_setup: stmmac_pcs_clean(ndev); @@ -7731,6 +7838,8 @@ void stmmac_dvr_remove(struct device *dev) #ifdef CONFIG_DEBUG_FS stmmac_exit_fs(ndev); #endif + stmmac_unregister_devlink(priv); + phylink_destroy(priv->phylink); if (priv->plat->stmmac_rst) reset_control_assert(priv->plat->stmmac_rst); From f0f7a3f542c1698edb69075f25a3f846207facba Mon Sep 17 00:00:00 2001 From: Qiu Wenbo Date: Tue, 28 Oct 2025 14:30:09 +0800 Subject: [PATCH 339/867] platform/x86: int3472: Fix double free of GPIO device during unregister MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit regulator_unregister() already frees the associated GPIO device. On ThinkPad X9 (Lunar Lake), this causes a double free issue that leads to random failures when other drivers (typically Intel THC) attempt to allocate interrupts. The root cause is that the reference count of the pinctrl_intel_platform module unexpectedly drops to zero when this driver defers its probe. This behavior can also be reproduced by unloading the module directly. Fix the issue by removing the redundant release of the GPIO device during regulator unregistration. Cc: stable@vger.kernel.org Fixes: 1e5d088a52c2 ("platform/x86: int3472: Stop using devm_gpiod_get()") Signed-off-by: Qiu Wenbo Reviewed-by: Andy Shevchenko Reviewed-by: Sakari Ailus Reviewed-by: Hans de Goede Reviewed-by: Daniel Scally Link: https://patch.msgid.link/20251028063009.289414-1-qiuwenbo@gnome.org Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/int3472/clk_and_regulator.c | 5 +---- include/linux/platform_data/x86/int3472.h | 1 - 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/platform/x86/intel/int3472/clk_and_regulator.c b/drivers/platform/x86/intel/int3472/clk_and_regulator.c index 476ec24d37020..9e052b164a1ab 100644 --- a/drivers/platform/x86/intel/int3472/clk_and_regulator.c +++ b/drivers/platform/x86/intel/int3472/clk_and_regulator.c @@ -245,15 +245,12 @@ int skl_int3472_register_regulator(struct int3472_discrete_device *int3472, if (IS_ERR(regulator->rdev)) return PTR_ERR(regulator->rdev); - int3472->regulators[int3472->n_regulator_gpios].ena_gpio = gpio; int3472->n_regulator_gpios++; return 0; } void skl_int3472_unregister_regulator(struct int3472_discrete_device *int3472) { - for (int i = 0; i < int3472->n_regulator_gpios; i++) { + for (int i = 0; i < int3472->n_regulator_gpios; i++) regulator_unregister(int3472->regulators[i].rdev); - gpiod_put(int3472->regulators[i].ena_gpio); - } } diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h index 1571e9157fa50..b1b837583d544 100644 --- a/include/linux/platform_data/x86/int3472.h +++ b/include/linux/platform_data/x86/int3472.h @@ -100,7 +100,6 @@ struct int3472_gpio_regulator { struct regulator_consumer_supply supply_map[GPIO_REGULATOR_SUPPLY_MAP_COUNT * 2]; char supply_name_upper[GPIO_SUPPLY_NAME_LENGTH]; char regulator_name[GPIO_REGULATOR_NAME_LENGTH]; - struct gpio_desc *ena_gpio; struct regulator_dev *rdev; struct regulator_desc rdesc; }; From 7f7d28c69eda3692bcf102b7096b93fd45c75b1d Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 28 Oct 2025 10:49:59 +0200 Subject: [PATCH 340/867] MAINTAINERS: Update int3472 maintainers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add myself as the maintainer of the int3472 driver. Also update Daniel's e-mail address while at it. Signed-off-by: Sakari Ailus Acked-by: Daniel Scally Link: https://patch.msgid.link/20251028084959.394795-1-sakari.ailus@linux.intel.com Signed-off-by: Ilpo Järvinen --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 832f3279ea83b..c2bf47675a03e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12856,7 +12856,8 @@ F: tools/testing/selftests/sgx/* K: \bSGX_ INTEL SKYLAKE INT3472 ACPI DEVICE DRIVER -M: Daniel Scally +M: Daniel Scally +M: Sakari Ailus S: Maintained F: drivers/platform/x86/intel/int3472/ F: include/linux/platform_data/x86/int3472.h From 8f3eaad9812f62e7006ad08602444b32c3101824 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Oct 2025 17:23:30 +0200 Subject: [PATCH 341/867] Input: Add keycodes for electronic privacy screen on/off hotkeys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add keycodes for hotkeys toggling the electronic privacy screen found on some laptops on/off. There already is an API for eprivacy screens as kernel-mode-setting drm connector object properties: https://www.kernel.org/doc/html/latest/gpu/drm-kms.html#standard-connector-properties this API also supports reporting when the eprivacy screen is turned on/off by the embedded-controller (EC) in response to hotkey presses. But on some laptops (e.g. the Dell Latitude 7300) the firmware does not allow querying the presence nor the status of the eprivacy screen at boot. This makes it impossible to implement the drm connector properties API since drm objects do not allow adding new properties after creation and the presence of the eprivacy cannot be detected at boot. The first notice of the presence of an eprivacy screen on these laptops is an EC generated (WMI) event when the eprivacy screen hotkeys are pressed. In this case the new keycodes this change adds can be generated to notify userspace of the eprivacy screen on/off hotkeys being pressed, so that userspace can show the usual on-screen-display (OSD) notification for eprivacy screen on/off to the user. This is similar to how e.g. touchpad on/off keycodes are used to show the touchpad on/off OSD. Signed-off-by: Hans de Goede Acked-by: Dmitry Torokhov Link: https://patch.msgid.link/20251020152331.52870-2-hansg@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/uapi/linux/input-event-codes.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 4a9fbf42aa9fa..9cd89bcc1d9c0 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -631,6 +631,18 @@ #define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */ #define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */ +/* + * Keycodes for hotkeys toggling the electronic privacy screen found on some + * laptops on/off. Note when the embedded-controller turns on/off the eprivacy + * screen itself then the state should be reported through drm connecter props: + * https://www.kernel.org/doc/html/latest/gpu/drm-kms.html#standard-connector-properties + * Except when implementing the drm connecter properties API is not possible + * because e.g. the firmware does not allow querying the presence and/or status + * of the eprivacy screen at boot. + */ +#define KEY_EPRIVACY_SCREEN_ON 0x252 +#define KEY_EPRIVACY_SCREEN_OFF 0x253 + #define KEY_KBDINPUTASSIST_PREV 0x260 #define KEY_KBDINPUTASSIST_NEXT 0x261 #define KEY_KBDINPUTASSIST_PREVGROUP 0x262 From 4173edb076b3ae30d734d55fce0ebac63139b656 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Oct 2025 17:23:31 +0200 Subject: [PATCH 342/867] platform/x86: dell-wmi-base: Handle electronic privacy screen on/off events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add handling for events for the electronic privacy screen found on some models (e.g. Dell Latitude 7300) being toggled on/off. Emit KEY_EPRIVACY_SCREEN_OFF / KEY_EPRIVACY_SCREEN_ON events for this so that userspace can show the usual on-screen-display (OSD) notification for eprivacy screen on/off to the user. Signed-off-by: Hans de Goede Link: https://patch.msgid.link/20251020152331.52870-3-hansg@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/dell/dell-wmi-base.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/platform/x86/dell/dell-wmi-base.c b/drivers/platform/x86/dell/dell-wmi-base.c index 841a5414d28a6..28076929d6af5 100644 --- a/drivers/platform/x86/dell/dell-wmi-base.c +++ b/drivers/platform/x86/dell/dell-wmi-base.c @@ -365,6 +365,13 @@ static const struct key_entry dell_wmi_keymap_type_0012[] = { /* Backlight brightness change event */ { KE_IGNORE, 0x0003, { KEY_RESERVED } }, + /* + * Electronic privacy screen toggled, extended data gives state, + * separate entries for on/off see handling in dell_wmi_process_key(). + */ + { KE_KEY, 0x000c, { KEY_EPRIVACY_SCREEN_OFF } }, + { KE_KEY, 0x000c, { KEY_EPRIVACY_SCREEN_ON } }, + /* Ultra-performance mode switch request */ { KE_IGNORE, 0x000d, { KEY_RESERVED } }, @@ -435,6 +442,11 @@ static int dell_wmi_process_key(struct wmi_device *wdev, int type, int code, u16 "Dell tablet mode switch", SW_TABLET_MODE, !buffer[0]); return 1; + } else if (type == 0x0012 && code == 0x000c && remaining > 0) { + /* Eprivacy toggle, switch to "on" key entry for on events */ + if (buffer[0] == 2) + key++; + used = 1; } else if (type == 0x0012 && code == 0x000d && remaining > 0) { value = (buffer[2] == 2); used = 1; From bd34bf518a5ffeb8eb7c8b9907ba97b606166f7b Mon Sep 17 00:00:00 2001 From: Lazar Aleksic Date: Tue, 28 Oct 2025 19:09:05 +0100 Subject: [PATCH 343/867] platform: x86: Kconfig: fix minor typo in help for WIRELESS_HOTKEY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixed a misspelling of Xiaomi. Signed-off-by: Lazar Aleksic Link: https://patch.msgid.link/20251028180956.10753-1-kripticni.dev@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 46e62feeda3c9..c122016d82f19 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -432,7 +432,7 @@ config WIRELESS_HOTKEY depends on INPUT help This driver provides supports for the wireless buttons found on some AMD, - HP, & Xioami laptops. + HP, & Xiaomi laptops. On such systems the driver should load automatically (via ACPI alias). To compile this driver as a module, choose M here: the module will From 09e2603513841c48e5422d132f57f8a4c7337721 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 27 Oct 2025 12:29:57 -0700 Subject: [PATCH 344/867] tools: ynl: fix indent issues in the main Python lib Class NlError() and operation_do_attributes() are indented by 2 spaces rather than 4 spaces used by the rest of the file. Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20251027192958.2058340-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/lib/ynl.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py index 62383c70ebb95..bdcc4f031d394 100644 --- a/tools/net/ynl/pyynl/lib/ynl.py +++ b/tools/net/ynl/pyynl/lib/ynl.py @@ -100,12 +100,12 @@ class Netlink: 'bitfield32', 'sint', 'uint']) class NlError(Exception): - def __init__(self, nl_msg): - self.nl_msg = nl_msg - self.error = -nl_msg.error + def __init__(self, nl_msg): + self.nl_msg = nl_msg + self.error = -nl_msg.error - def __str__(self): - return f"Netlink error: {os.strerror(self.error)}\n{self.nl_msg}" + def __str__(self): + return f"Netlink error: {os.strerror(self.error)}\n{self.nl_msg}" class ConfigError(Exception): @@ -1039,15 +1039,15 @@ def poll_ntf(self, duration=None): self.check_ntf() def operation_do_attributes(self, name): - """ - For a given operation name, find and return a supported - set of attributes (as a dict). - """ - op = self.find_operation(name) - if not op: - return None - - return op['do']['request']['attributes'].copy() + """ + For a given operation name, find and return a supported + set of attributes (as a dict). + """ + op = self.find_operation(name) + if not op: + return None + + return op['do']['request']['attributes'].copy() def _encode_message(self, op, vals, flags, req_seq): nl_flags = Netlink.NLM_F_REQUEST | Netlink.NLM_F_ACK From 34164142b5fd6878cd487f531ae074e3227031ac Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 27 Oct 2025 12:29:58 -0700 Subject: [PATCH 345/867] tools: ynl: rework the string representation of NlError In early days of YNL development dumping the NlMsg on errors was quite useful, as the library itself could have been buggy. These days increasingly the NlMsg is just taking up screen space and means nothing to a typical user. Try to format the errors more in line with how YNL C formats its errors strings. Before: $ ynl --family ethtool --do channels-set --json '{}' Netlink error: Invalid argument nl_len = 44 (28) nl_flags = 0x300 nl_type = 2 error: -22 extack: {'miss-type': 'header'} $ ynl --family ethtool --do channels-set --json '{..., "tx-count": 999}' Netlink error: Invalid argument nl_len = 88 (72) nl_flags = 0x300 nl_type = 2 error: -22 extack: {'msg': 'requested channel count exceeds maximum', 'bad-attr': '.tx-count'} After: $ ynl --family ethtool --do channels-set --json '{}' Netlink error: Invalid argument {'miss-type': 'header'} $ ynl --family ethtool --do channels-set --json '{..., "tx-count": 999}' Netlink error: requested channel count exceeds maximum: Invalid argument {'bad-attr': '.tx-count'} Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20251027192958.2058340-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/lib/ynl.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py index bdcc4f031d394..225baad3c8f8b 100644 --- a/tools/net/ynl/pyynl/lib/ynl.py +++ b/tools/net/ynl/pyynl/lib/ynl.py @@ -105,7 +105,16 @@ def __init__(self, nl_msg): self.error = -nl_msg.error def __str__(self): - return f"Netlink error: {os.strerror(self.error)}\n{self.nl_msg}" + msg = "Netlink error: " + + extack = self.nl_msg.extack.copy() if self.nl_msg.extack else {} + if 'msg' in extack: + msg += extack['msg'] + ': ' + del extack['msg'] + msg += os.strerror(self.error) + if extack: + msg += ' ' + str(extack) + return msg class ConfigError(Exception): From a086e9860ce6a751acd71dbec54d24a819dd6baa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 24 Oct 2025 09:05:17 +0000 Subject: [PATCH 346/867] net: optimize enqueue_to_backlog() for the fast path Add likely() and unlikely() clauses for the common cases: Device is running. Queue is not full. Queue is less than half capacity. Add max_backlog parameter to skb_flow_limit() to avoid a second READ_ONCE(net_hotdata.max_backlog). skb_flow_limit() does not need the backlog_lock protection, and can be called before we acquire the lock, for even better resistance to attacks. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20251024090517.3289181-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 378c2d010faf2..d32f0b0c03bbd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5249,14 +5249,15 @@ void kick_defer_list_purge(unsigned int cpu) int netdev_flow_limit_table_len __read_mostly = (1 << 12); #endif -static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) +static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen, + int max_backlog) { #ifdef CONFIG_NET_FLOW_LIMIT - struct sd_flow_limit *fl; - struct softnet_data *sd; unsigned int old_flow, new_flow; + const struct softnet_data *sd; + struct sd_flow_limit *fl; - if (qlen < (READ_ONCE(net_hotdata.max_backlog) >> 1)) + if (likely(qlen < (max_backlog >> 1))) return false; sd = this_cpu_ptr(&softnet_data); @@ -5301,19 +5302,19 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, u32 tail; reason = SKB_DROP_REASON_DEV_READY; - if (!netif_running(skb->dev)) + if (unlikely(!netif_running(skb->dev))) goto bad_dev; - reason = SKB_DROP_REASON_CPU_BACKLOG; sd = &per_cpu(softnet_data, cpu); qlen = skb_queue_len_lockless(&sd->input_pkt_queue); max_backlog = READ_ONCE(net_hotdata.max_backlog); - if (unlikely(qlen > max_backlog)) + if (unlikely(qlen > max_backlog) || + skb_flow_limit(skb, qlen, max_backlog)) goto cpu_backlog_drop; backlog_lock_irq_save(sd, &flags); qlen = skb_queue_len(&sd->input_pkt_queue); - if (qlen <= max_backlog && !skb_flow_limit(skb, qlen)) { + if (likely(qlen <= max_backlog)) { if (!qlen) { /* Schedule NAPI for backlog device. We can use * non atomic operation as we own the queue lock. @@ -5334,6 +5335,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, backlog_unlock_irq_restore(sd, &flags); cpu_backlog_drop: + reason = SKB_DROP_REASON_CPU_BACKLOG; numa_drop_add(&sd->drop_counters, 1); bad_dev: dev_core_stats_rx_dropped_inc(skb->dev); From c72568c21b97dbc48d02b769f4eec6667ad13d5a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 24 Oct 2025 09:12:40 +0000 Subject: [PATCH 347/867] net: rps: softnet_data reorg to make enqueue_to_backlog() fast enqueue_to_backlog() is showing up in kernel profiles on hosts with many cores, when RFS/RPS is used. The following softnet_data fields need to be updated: - input_queue_tail - input_pkt_queue (next, prev, qlen, lock) - backlog.state (if input_pkt_queue was empty) Unfortunately they are currenly using two cache lines: /* --- cacheline 3 boundary (192 bytes) --- */ call_single_data_t csd __attribute__((__aligned__(64))); /* 0xc0 0x20 */ struct softnet_data * rps_ipi_next; /* 0xe0 0x8 */ unsigned int cpu; /* 0xe8 0x4 */ unsigned int input_queue_tail; /* 0xec 0x4 */ struct sk_buff_head input_pkt_queue; /* 0xf0 0x18 */ /* --- cacheline 4 boundary (256 bytes) was 8 bytes ago --- */ struct napi_struct backlog __attribute__((__aligned__(8))); /* 0x108 0x1f0 */ Add one ____cacheline_aligned_in_smp to make sure they now are using a single cache line. Also, because napi_struct has written fields, make @state its first field. We want to make sure that cpus adding packets to sd->input_pkt_queue are not slowing down cpus processing their backlog because of false sharing. After this patch new layout is: /* --- cacheline 5 boundary (320 bytes) --- */ long int pad[3] __attribute__((__aligned__(64))); /* 0x140 0x18 */ unsigned int input_queue_tail; /* 0x158 0x4 */ /* XXX 4 bytes hole, try to pack */ struct sk_buff_head input_pkt_queue; /* 0x160 0x18 */ struct napi_struct backlog __attribute__((__aligned__(8))); /* 0x178 0x1f0 */ Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20251024091240.3292546-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7f5aad5cc9a19..9c1e5042c5e76 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -377,6 +377,8 @@ struct napi_config { * Structure for NAPI scheduling similar to tasklet but with weighting */ struct napi_struct { + /* This field should be first or softnet_data.backlog needs tweaks. */ + unsigned long state; /* The poll_list must only be managed by the entity which * changes the state of the NAPI_STATE_SCHED bit. This means * whoever atomically sets that bit can add this napi_struct @@ -385,7 +387,6 @@ struct napi_struct { */ struct list_head poll_list; - unsigned long state; int weight; u32 defer_hard_irqs_count; int (*poll)(struct napi_struct *, int); @@ -3529,9 +3530,17 @@ struct softnet_data { call_single_data_t csd ____cacheline_aligned_in_smp; struct softnet_data *rps_ipi_next; unsigned int cpu; + + /* We force a cacheline alignment from here, to hold together + * input_queue_tail, input_pkt_queue and backlog.state. + * We add holes so that backlog.state is the last field + * of this cache line. + */ + long pad[3] ____cacheline_aligned_in_smp; unsigned int input_queue_tail; #endif struct sk_buff_head input_pkt_queue; + struct napi_struct backlog; struct numa_drop_counters drop_counters; From 8443c3160858b860bfc2db6a8397c72c9f6b513e Mon Sep 17 00:00:00 2001 From: Bobby Eshleman Date: Fri, 24 Oct 2025 11:02:56 -0700 Subject: [PATCH 348/867] net: netmem: remove NET_IOV_MAX from net_iov_type enum Remove the NET_IOV_MAX workaround from the net_iov_type enum. This entry was previously added to force the enum size to unsigned long to satisfy the NET_IOV_ASSERT_OFFSET static assertions. After commit f3d85c9ee510 ("netmem: introduce struct netmem_desc mirroring struct page") this approach became unnecessary by placing the net_iov_type after the netmem_desc. Placing the net_iov_type after netmem_desc results in the net_iov_type size having no effect on the position or layout of the fields that mirror the struct page. The layout before this patch: struct net_iov { union { struct netmem_desc desc; /* 0 48 */ struct { long unsigned int _flags; /* 0 8 */ long unsigned int pp_magic; /* 8 8 */ struct page_pool * pp; /* 16 8 */ long unsigned int _pp_mapping_pad; /* 24 8 */ long unsigned int dma_addr; /* 32 8 */ atomic_long_t pp_ref_count; /* 40 8 */ }; /* 0 48 */ }; /* 0 48 */ struct net_iov_area * owner; /* 48 8 */ enum net_iov_type type; /* 56 8 */ /* size: 64, cachelines: 1, members: 3 */ }; The layout after this patch: struct net_iov { union { struct netmem_desc desc; /* 0 48 */ struct { long unsigned int _flags; /* 0 8 */ long unsigned int pp_magic; /* 8 8 */ struct page_pool * pp; /* 16 8 */ long unsigned int _pp_mapping_pad; /* 24 8 */ long unsigned int dma_addr; /* 32 8 */ atomic_long_t pp_ref_count; /* 40 8 */ }; /* 0 48 */ }; /* 0 48 */ struct net_iov_area * owner; /* 48 8 */ enum net_iov_type type; /* 56 4 */ /* size: 64, cachelines: 1, members: 3 */ /* padding: 4 */ }; Signed-off-by: Bobby Eshleman Reviewed-by: Mina Almasry Link: https://patch.msgid.link/20251024-b4-devmem-remove-niov-max-v1-1-ba72c68bc869@meta.com Signed-off-by: Jakub Kicinski --- include/net/netmem.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/net/netmem.h b/include/net/netmem.h index 651e2c62d1dde..9e10f4ac50c3d 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -68,10 +68,6 @@ DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers); enum net_iov_type { NET_IOV_DMABUF, NET_IOV_IOURING, - - /* Force size to unsigned long to make the NET_IOV_ASSERTS below pass. - */ - NET_IOV_MAX = ULONG_MAX }; /* A memory descriptor representing abstract networking I/O vectors, From 294bfe0343da3b59db040c3a4dac05b4c91ce013 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 25 Oct 2025 09:40:59 +0200 Subject: [PATCH 349/867] sctp: Constify struct sctp_sched_ops 'struct sctp_sched_ops' is not modified in these drivers. Constifying this structure moves some data to a read-only section, so increases overall security, especially when the structure holds some function pointers. On a x86_64, with allmodconfig, as an example: Before: ====== text data bss dec hex filename 8019 568 0 8587 218b net/sctp/stream_sched_fc.o After: ===== text data bss dec hex filename 8275 312 0 8587 218b net/sctp/stream_sched_fc.o Signed-off-by: Christophe JAILLET Link: https://patch.msgid.link/dce03527eb7b7cc8a3c26d5cdac12bafe3350135.1761377890.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- include/net/sctp/stream_sched.h | 4 ++-- include/net/sctp/structs.h | 2 +- net/sctp/stream.c | 8 ++++---- net/sctp/stream_sched.c | 16 ++++++++-------- net/sctp/stream_sched_fc.c | 4 ++-- net/sctp/stream_sched_prio.c | 2 +- net/sctp/stream_sched_rr.c | 2 +- 7 files changed, 19 insertions(+), 19 deletions(-) diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h index 8034bf5febbe8..77806ef1cb70c 100644 --- a/include/net/sctp/stream_sched.h +++ b/include/net/sctp/stream_sched.h @@ -52,10 +52,10 @@ void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch); void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch); int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp); -struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream); +const struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream); void sctp_sched_ops_register(enum sctp_sched_type sched, - struct sctp_sched_ops *sched_ops); + const struct sctp_sched_ops *sched_ops); void sctp_sched_ops_prio_init(void); void sctp_sched_ops_rr_init(void); void sctp_sched_ops_fc_init(void); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 3dd304e411d02..5900196d65fd1 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1073,7 +1073,7 @@ struct sctp_outq { struct list_head out_chunk_list; /* Stream scheduler being used */ - struct sctp_sched_ops *sched; + const struct sctp_sched_ops *sched; unsigned int out_qlen; /* Total length of queued data chunks. */ diff --git a/net/sctp/stream.c b/net/sctp/stream.c index f205556c5b248..0615e4426341c 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -54,7 +54,7 @@ static void sctp_stream_shrink_out(struct sctp_stream *stream, __u16 outcnt) static void sctp_stream_free_ext(struct sctp_stream *stream, __u16 sid) { - struct sctp_sched_ops *sched; + const struct sctp_sched_ops *sched; if (!SCTP_SO(stream, sid)->ext) return; @@ -130,7 +130,7 @@ static int sctp_stream_alloc_in(struct sctp_stream *stream, __u16 incnt, int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, gfp_t gfp) { - struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); + const struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); int i, ret = 0; gfp |= __GFP_NOWARN; @@ -182,7 +182,7 @@ int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid) void sctp_stream_free(struct sctp_stream *stream) { - struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); + const struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); int i; sched->unsched_all(stream); @@ -207,7 +207,7 @@ void sctp_stream_clear(struct sctp_stream *stream) void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new) { - struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); + const struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); sched->unsched_all(stream); sctp_stream_outq_migrate(stream, new, new->outcnt); diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 54afbe4fb0872..50f8b5240359e 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -91,7 +91,7 @@ static void sctp_sched_fcfs_unsched_all(struct sctp_stream *stream) { } -static struct sctp_sched_ops sctp_sched_fcfs = { +static const struct sctp_sched_ops sctp_sched_fcfs = { .set = sctp_sched_fcfs_set, .get = sctp_sched_fcfs_get, .init = sctp_sched_fcfs_init, @@ -111,10 +111,10 @@ static void sctp_sched_ops_fcfs_init(void) /* API to other parts of the stack */ -static struct sctp_sched_ops *sctp_sched_ops[SCTP_SS_MAX + 1]; +static const struct sctp_sched_ops *sctp_sched_ops[SCTP_SS_MAX + 1]; void sctp_sched_ops_register(enum sctp_sched_type sched, - struct sctp_sched_ops *sched_ops) + const struct sctp_sched_ops *sched_ops) { sctp_sched_ops[sched] = sched_ops; } @@ -130,7 +130,7 @@ void sctp_sched_ops_init(void) static void sctp_sched_free_sched(struct sctp_stream *stream) { - struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); + const struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); struct sctp_stream_out_ext *soute; int i; @@ -148,9 +148,9 @@ static void sctp_sched_free_sched(struct sctp_stream *stream) int sctp_sched_set_sched(struct sctp_association *asoc, enum sctp_sched_type sched) { - struct sctp_sched_ops *old = asoc->outqueue.sched; + const struct sctp_sched_ops *old = asoc->outqueue.sched; struct sctp_datamsg *msg = NULL; - struct sctp_sched_ops *n; + const struct sctp_sched_ops *n; struct sctp_chunk *ch; int i, ret = 0; @@ -263,14 +263,14 @@ void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch) int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp) { - struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); + const struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); struct sctp_stream_out_ext *ext = SCTP_SO(stream, sid)->ext; INIT_LIST_HEAD(&ext->outq); return sched->init_sid(stream, sid, gfp); } -struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream) +const struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream) { struct sctp_association *asoc; diff --git a/net/sctp/stream_sched_fc.c b/net/sctp/stream_sched_fc.c index 4bd18a497a6dc..776c6de46c224 100644 --- a/net/sctp/stream_sched_fc.c +++ b/net/sctp/stream_sched_fc.c @@ -188,7 +188,7 @@ static void sctp_sched_fc_unsched_all(struct sctp_stream *stream) list_del_init(&soute->fc_list); } -static struct sctp_sched_ops sctp_sched_fc = { +static const struct sctp_sched_ops sctp_sched_fc = { .set = sctp_sched_fc_set, .get = sctp_sched_fc_get, .init = sctp_sched_fc_init, @@ -206,7 +206,7 @@ void sctp_sched_ops_fc_init(void) sctp_sched_ops_register(SCTP_SS_FC, &sctp_sched_fc); } -static struct sctp_sched_ops sctp_sched_wfq = { +static const struct sctp_sched_ops sctp_sched_wfq = { .set = sctp_sched_wfq_set, .get = sctp_sched_wfq_get, .init = sctp_sched_fc_init, diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c index 4d4d9da331f4c..fb6c55e5615df 100644 --- a/net/sctp/stream_sched_prio.c +++ b/net/sctp/stream_sched_prio.c @@ -300,7 +300,7 @@ static void sctp_sched_prio_unsched_all(struct sctp_stream *stream) sctp_sched_prio_unsched(soute); } -static struct sctp_sched_ops sctp_sched_prio = { +static const struct sctp_sched_ops sctp_sched_prio = { .set = sctp_sched_prio_set, .get = sctp_sched_prio_get, .init = sctp_sched_prio_init, diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c index 1f235e7f643a7..9157b653f1961 100644 --- a/net/sctp/stream_sched_rr.c +++ b/net/sctp/stream_sched_rr.c @@ -171,7 +171,7 @@ static void sctp_sched_rr_unsched_all(struct sctp_stream *stream) sctp_sched_rr_unsched(stream, soute); } -static struct sctp_sched_ops sctp_sched_rr = { +static const struct sctp_sched_ops sctp_sched_rr = { .set = sctp_sched_rr_set, .get = sctp_sched_rr_get, .init = sctp_sched_rr_init, From 5c00da851c31edd58be06a628d6c57684fd0d6a3 Mon Sep 17 00:00:00 2001 From: Rakuram Eswaran Date: Sat, 25 Oct 2025 17:35:18 +0530 Subject: [PATCH 350/867] net: tcp_lp: fix kernel-doc warnings and update outdated reference links Fix kernel-doc warnings in tcp_lp.c by adding missing parameter descriptions for tcp_lp_cong_avoid() and tcp_lp_pkts_acked() when building with W=1. Also replace invalid URLs in the file header comment with the currently valid links to the TCP-LP paper and implementation page. No functional changes. Signed-off-by: Rakuram Eswaran Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251025-net_ipv4_tcp_lp_c-v1-1-058cc221499e@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_lp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index 52fe17167460f..976b56644a8a7 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -23,9 +23,9 @@ * Original Author: * Aleksandar Kuzmanovic * Available from: - * http://www.ece.rice.edu/~akuzma/Doc/akuzma/TCP-LP.pdf + * https://users.cs.northwestern.edu/~akuzma/doc/TCP-LP-ToN.pdf * Original implementation for 2.4.19: - * http://www-ece.rice.edu/networks/TCP-LP/ + * https://users.cs.northwestern.edu/~akuzma/rice/TCP-LP/linux/tcp-lp-linux.htm * * 2.6.x module Authors: * Wong Hoi Sing, Edison @@ -113,6 +113,8 @@ static void tcp_lp_init(struct sock *sk) /** * tcp_lp_cong_avoid * @sk: socket to avoid congesting + * @ack: current ack sequence number + * @acked: number of ACKed packets * * Implementation of cong_avoid. * Will only call newReno CA when away from inference. @@ -261,6 +263,7 @@ static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt) /** * tcp_lp_pkts_acked * @sk: socket requiring congestion avoidance calculations + * @sample: ACK sample containing timing and rate information * * Implementation of pkts_acked. * Deal with active drop under Early Congestion Indication. From 61958b33ef0bab1c1874c933cd3910f495526782 Mon Sep 17 00:00:00 2001 From: Issam Hamdi Date: Fri, 24 Oct 2025 11:49:00 +0200 Subject: [PATCH 351/867] net: phy: realtek: Add RTL8224 cable testing support The RTL8224 can detect open pairs and short types (in same pair or some other pair). The distance to this problem can be estimated. This is done for each of the 4 pairs separately. It is not meant to be run while there is an active link partner because this interferes with the active test pulses. Output with open 50 m cable: Pair A code Open Circuit, source: TDR Pair A, fault length: 51.79m, source: TDR Pair B code Open Circuit, source: TDR Pair B, fault length: 51.28m, source: TDR Pair C code Open Circuit, source: TDR Pair C, fault length: 50.46m, source: TDR Pair D code Open Circuit, source: TDR Pair D, fault length: 51.12m, source: TDR Terminated cable: Pair A code OK, source: TDR Pair B code OK, source: TDR Pair C code OK, source: TDR Pair D code OK, source: TDR Shorted cable (both short types are at roughly the same distance) Pair A code Short to another pair, source: TDR Pair A, fault length: 2.35m, source: TDR Pair B code Short to another pair, source: TDR Pair B, fault length: 2.15m, source: TDR Pair C code OK, source: TDR Pair D code Short within Pair, source: TDR Pair D, fault length: 1.94m, source: TDR Signed-off-by: Issam Hamdi Co-developed-by: Sven Eckelmann Signed-off-by: Sven Eckelmann Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251024-rtl8224-cable-test-v1-1-e3cda89ac98f@simonwunderlich.de Signed-off-by: Jakub Kicinski --- drivers/net/phy/realtek/realtek_main.c | 187 +++++++++++++++++++++++++ 1 file changed, 187 insertions(+) diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index 16a347084293e..1fd4b6cf5c1e1 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -8,6 +8,7 @@ * Copyright (c) 2004 Freescale Semiconductor, Inc. */ #include +#include #include #include #include @@ -127,6 +128,27 @@ */ #define RTL822X_VND2_C22_REG(reg) (0xa400 + 2 * (reg)) +#define RTL8224_MII_RTCT 0x11 +#define RTL8224_MII_RTCT_ENABLE BIT(0) +#define RTL8224_MII_RTCT_PAIR_A BIT(4) +#define RTL8224_MII_RTCT_PAIR_B BIT(5) +#define RTL8224_MII_RTCT_PAIR_C BIT(6) +#define RTL8224_MII_RTCT_PAIR_D BIT(7) +#define RTL8224_MII_RTCT_DONE BIT(15) + +#define RTL8224_MII_SRAM_ADDR 0x1b +#define RTL8224_MII_SRAM_DATA 0x1c + +#define RTL8224_SRAM_RTCT_FAULT(pair) (0x8026 + (pair) * 4) +#define RTL8224_SRAM_RTCT_FAULT_BUSY BIT(0) +#define RTL8224_SRAM_RTCT_FAULT_OPEN BIT(3) +#define RTL8224_SRAM_RTCT_FAULT_SAME_SHORT BIT(4) +#define RTL8224_SRAM_RTCT_FAULT_OK BIT(5) +#define RTL8224_SRAM_RTCT_FAULT_DONE BIT(6) +#define RTL8224_SRAM_RTCT_FAULT_CROSS_SHORT BIT(7) + +#define RTL8224_SRAM_RTCT_LEN(pair) (0x8028 + (pair) * 4) + #define RTL8366RB_POWER_SAVE 0x15 #define RTL8366RB_POWER_SAVE_ON BIT(12) @@ -1453,6 +1475,168 @@ static int rtl822xb_c45_read_status(struct phy_device *phydev) return 0; } +static int rtl8224_cable_test_start(struct phy_device *phydev) +{ + u32 val; + int ret; + + /* disable auto-negotiation and force 1000/Full */ + ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, + RTL822X_VND2_C22_REG(MII_BMCR), + BMCR_ANENABLE | BMCR_SPEED100 | BMCR_SPEED10, + BMCR_SPEED1000 | BMCR_FULLDPLX); + if (ret) + return ret; + + mdelay(500); + + /* trigger cable test */ + val = RTL8224_MII_RTCT_ENABLE; + val |= RTL8224_MII_RTCT_PAIR_A; + val |= RTL8224_MII_RTCT_PAIR_B; + val |= RTL8224_MII_RTCT_PAIR_C; + val |= RTL8224_MII_RTCT_PAIR_D; + + return phy_modify_mmd(phydev, MDIO_MMD_VEND2, + RTL822X_VND2_C22_REG(RTL8224_MII_RTCT), + RTL8224_MII_RTCT_DONE, val); +} + +static int rtl8224_sram_read(struct phy_device *phydev, u32 reg) +{ + int ret; + + ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, + RTL822X_VND2_C22_REG(RTL8224_MII_SRAM_ADDR), + reg); + if (ret) + return ret; + + return phy_read_mmd(phydev, MDIO_MMD_VEND2, + RTL822X_VND2_C22_REG(RTL8224_MII_SRAM_DATA)); +} + +static int rtl8224_pair_len_get(struct phy_device *phydev, u32 pair) +{ + int cable_len; + u32 reg_len; + int ret; + u32 cm; + + reg_len = RTL8224_SRAM_RTCT_LEN(pair); + + ret = rtl8224_sram_read(phydev, reg_len); + if (ret < 0) + return ret; + + cable_len = ret & 0xff00; + + ret = rtl8224_sram_read(phydev, reg_len + 1); + if (ret < 0) + return ret; + + cable_len |= (ret & 0xff00) >> 8; + + cable_len -= 620; + cable_len = max(cable_len, 0); + + cm = cable_len * 100 / 78; + + return cm; +} + +static int rtl8224_cable_test_result_trans(u32 result) +{ + if (!(result & RTL8224_SRAM_RTCT_FAULT_DONE)) + return -EBUSY; + + if (result & RTL8224_SRAM_RTCT_FAULT_OK) + return ETHTOOL_A_CABLE_RESULT_CODE_OK; + + if (result & RTL8224_SRAM_RTCT_FAULT_OPEN) + return ETHTOOL_A_CABLE_RESULT_CODE_OPEN; + + if (result & RTL8224_SRAM_RTCT_FAULT_SAME_SHORT) + return ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT; + + if (result & RTL8224_SRAM_RTCT_FAULT_BUSY) + return ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC; + + if (result & RTL8224_SRAM_RTCT_FAULT_CROSS_SHORT) + return ETHTOOL_A_CABLE_RESULT_CODE_CROSS_SHORT; + + return ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC; +} + +static int rtl8224_cable_test_report_pair(struct phy_device *phydev, unsigned int pair) +{ + int fault_rslt; + int ret; + + ret = rtl8224_sram_read(phydev, RTL8224_SRAM_RTCT_FAULT(pair)); + if (ret < 0) + return ret; + + fault_rslt = rtl8224_cable_test_result_trans(ret); + if (fault_rslt < 0) + return 0; + + ret = ethnl_cable_test_result(phydev, pair, fault_rslt); + if (ret < 0) + return ret; + + switch (fault_rslt) { + case ETHTOOL_A_CABLE_RESULT_CODE_OPEN: + case ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT: + case ETHTOOL_A_CABLE_RESULT_CODE_CROSS_SHORT: + ret = rtl8224_pair_len_get(phydev, pair); + if (ret < 0) + return ret; + + return ethnl_cable_test_fault_length(phydev, pair, ret); + default: + return 0; + } +} + +static int rtl8224_cable_test_report(struct phy_device *phydev, bool *finished) +{ + unsigned int pair; + int ret; + + for (pair = ETHTOOL_A_CABLE_PAIR_A; pair <= ETHTOOL_A_CABLE_PAIR_D; pair++) { + ret = rtl8224_cable_test_report_pair(phydev, pair); + if (ret == -EBUSY) { + *finished = false; + return 0; + } + + if (ret < 0) + return ret; + } + + return 0; +} + +static int rtl8224_cable_test_get_status(struct phy_device *phydev, bool *finished) +{ + int ret; + + *finished = false; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, + RTL822X_VND2_C22_REG(RTL8224_MII_RTCT)); + if (ret < 0) + return ret; + + if (!(ret & RTL8224_MII_RTCT_DONE)) + return 0; + + *finished = true; + + return rtl8224_cable_test_report(phydev, finished); +} + static bool rtlgen_supports_2_5gbps(struct phy_device *phydev) { int val; @@ -1930,11 +2114,14 @@ static struct phy_driver realtek_drvs[] = { }, { PHY_ID_MATCH_EXACT(0x001ccad0), .name = "RTL8224 2.5Gbps PHY", + .flags = PHY_POLL_CABLE_TEST, .get_features = rtl822x_c45_get_features, .config_aneg = rtl822x_c45_config_aneg, .read_status = rtl822x_c45_read_status, .suspend = genphy_c45_pma_suspend, .resume = rtlgen_c45_resume, + .cable_test_start = rtl8224_cable_test_start, + .cable_test_get_status = rtl8224_cable_test_get_status, }, { PHY_ID_MATCH_EXACT(0x001cc961), .name = "RTL8366RB Gigabit Ethernet", From a8abe8e210c175b1d5a7e53df069e107b65c13cb Mon Sep 17 00:00:00 2001 From: Tianling Shen Date: Sun, 26 Oct 2025 21:36:52 +0800 Subject: [PATCH 352/867] net: phy: motorcomm: Add support for PHY LEDs on YT8531 The LED registers on YT8531 are exactly same as YT8521, so simply reuse yt8521_led_hw_* functions. Tested on OrangePi R1 Plus LTS and Zero3. Signed-off-by: Tianling Shen Reviewed-by: Andrew Lunn Reviewed-by: Jijie Shao Link: https://patch.msgid.link/20251026133652.1288732-1-cnsztl@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/motorcomm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/phy/motorcomm.c b/drivers/net/phy/motorcomm.c index a3593e6630594..89b5b19a9bd28 100644 --- a/drivers/net/phy/motorcomm.c +++ b/drivers/net/phy/motorcomm.c @@ -3048,6 +3048,9 @@ static struct phy_driver motorcomm_phy_drvs[] = { .get_wol = ytphy_get_wol, .set_wol = yt8531_set_wol, .link_change_notify = yt8531_link_change_notify, + .led_hw_is_supported = yt8521_led_hw_is_supported, + .led_hw_control_set = yt8521_led_hw_control_set, + .led_hw_control_get = yt8521_led_hw_control_get, }, { PHY_ID_MATCH_EXACT(PHY_ID_YT8531S), From 9222582ec524707fbb9d076febead5b6a07611ed Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Tue, 28 Oct 2025 14:07:44 +0800 Subject: [PATCH 353/867] Revert "wifi: ath12k: Fix missing station power save configuration" This reverts commit 4b66d18918f8e4d85e51974a9e3ce9abad5c7c3d. In [1], Ross Brown reports poor performance of WCN7850 after enabling power save. Temporarily revert the fix; it will be re-enabled once the issue is resolved. Tested-on: WCN7850 hw2.0 PCI WLAN.IOE_HMT.1.1-00011-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1 Fixes: 4b66d18918f8 ("wifi: ath12k: Fix missing station power save configuration") Reported-by: Ross Brown Closes: https://lore.kernel.org/all/CAMn66qZENLhDOcVJuwUZ3ir89PVtVnQRq9DkV5xjJn1p6BKB9w@mail.gmail.com/ # [1] Signed-off-by: Miaoqing Pan Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20251028060744.897198-1-miaoqing.pan@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 122 ++++++++++++-------------- 1 file changed, 55 insertions(+), 67 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index eacab798630aa..db351c9220181 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -4064,68 +4064,12 @@ static int ath12k_mac_fils_discovery(struct ath12k_link_vif *arvif, return ret; } -static void ath12k_mac_vif_setup_ps(struct ath12k_link_vif *arvif) -{ - struct ath12k *ar = arvif->ar; - struct ieee80211_vif *vif = arvif->ahvif->vif; - struct ieee80211_conf *conf = &ath12k_ar_to_hw(ar)->conf; - enum wmi_sta_powersave_param param; - struct ieee80211_bss_conf *info; - enum wmi_sta_ps_mode psmode; - int ret; - int timeout; - bool enable_ps; - - lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); - - if (vif->type != NL80211_IFTYPE_STATION) - return; - - enable_ps = arvif->ahvif->ps; - if (enable_ps) { - psmode = WMI_STA_PS_MODE_ENABLED; - param = WMI_STA_PS_PARAM_INACTIVITY_TIME; - - timeout = conf->dynamic_ps_timeout; - if (timeout == 0) { - info = ath12k_mac_get_link_bss_conf(arvif); - if (!info) { - ath12k_warn(ar->ab, "unable to access bss link conf in setup ps for vif %pM link %u\n", - vif->addr, arvif->link_id); - return; - } - - /* firmware doesn't like 0 */ - timeout = ieee80211_tu_to_usec(info->beacon_int) / 1000; - } - - ret = ath12k_wmi_set_sta_ps_param(ar, arvif->vdev_id, param, - timeout); - if (ret) { - ath12k_warn(ar->ab, "failed to set inactivity time for vdev %d: %i\n", - arvif->vdev_id, ret); - return; - } - } else { - psmode = WMI_STA_PS_MODE_DISABLED; - } - - ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac vdev %d psmode %s\n", - arvif->vdev_id, psmode ? "enable" : "disable"); - - ret = ath12k_wmi_pdev_set_ps_mode(ar, arvif->vdev_id, psmode); - if (ret) - ath12k_warn(ar->ab, "failed to set sta power save mode %d for vdev %d: %d\n", - psmode, arvif->vdev_id, ret); -} - static void ath12k_mac_op_vif_cfg_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u64 changed) { struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); unsigned long links = ahvif->links_map; - struct ieee80211_vif_cfg *vif_cfg; struct ieee80211_bss_conf *info; struct ath12k_link_vif *arvif; struct ieee80211_sta *sta; @@ -4189,24 +4133,61 @@ static void ath12k_mac_op_vif_cfg_changed(struct ieee80211_hw *hw, } } } +} - if (changed & BSS_CHANGED_PS) { - links = ahvif->links_map; - vif_cfg = &vif->cfg; +static void ath12k_mac_vif_setup_ps(struct ath12k_link_vif *arvif) +{ + struct ath12k *ar = arvif->ar; + struct ieee80211_vif *vif = arvif->ahvif->vif; + struct ieee80211_conf *conf = &ath12k_ar_to_hw(ar)->conf; + enum wmi_sta_powersave_param param; + struct ieee80211_bss_conf *info; + enum wmi_sta_ps_mode psmode; + int ret; + int timeout; + bool enable_ps; - for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) { - arvif = wiphy_dereference(hw->wiphy, ahvif->link[link_id]); - if (!arvif || !arvif->ar) - continue; + lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); - ar = arvif->ar; + if (vif->type != NL80211_IFTYPE_STATION) + return; + + enable_ps = arvif->ahvif->ps; + if (enable_ps) { + psmode = WMI_STA_PS_MODE_ENABLED; + param = WMI_STA_PS_PARAM_INACTIVITY_TIME; - if (ar->ab->hw_params->supports_sta_ps) { - ahvif->ps = vif_cfg->ps; - ath12k_mac_vif_setup_ps(arvif); + timeout = conf->dynamic_ps_timeout; + if (timeout == 0) { + info = ath12k_mac_get_link_bss_conf(arvif); + if (!info) { + ath12k_warn(ar->ab, "unable to access bss link conf in setup ps for vif %pM link %u\n", + vif->addr, arvif->link_id); + return; } + + /* firmware doesn't like 0 */ + timeout = ieee80211_tu_to_usec(info->beacon_int) / 1000; } + + ret = ath12k_wmi_set_sta_ps_param(ar, arvif->vdev_id, param, + timeout); + if (ret) { + ath12k_warn(ar->ab, "failed to set inactivity time for vdev %d: %i\n", + arvif->vdev_id, ret); + return; + } + } else { + psmode = WMI_STA_PS_MODE_DISABLED; } + + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac vdev %d psmode %s\n", + arvif->vdev_id, psmode ? "enable" : "disable"); + + ret = ath12k_wmi_pdev_set_ps_mode(ar, arvif->vdev_id, psmode); + if (ret) + ath12k_warn(ar->ab, "failed to set sta power save mode %d for vdev %d: %d\n", + psmode, arvif->vdev_id, ret); } static bool ath12k_mac_supports_tpc(struct ath12k *ar, struct ath12k_vif *ahvif, @@ -4228,6 +4209,7 @@ static void ath12k_mac_bss_info_changed(struct ath12k *ar, { struct ath12k_vif *ahvif = arvif->ahvif; struct ieee80211_vif *vif = ath12k_ahvif_to_vif(ahvif); + struct ieee80211_vif_cfg *vif_cfg = &vif->cfg; struct cfg80211_chan_def def; u32 param_id, param_value; enum nl80211_band band; @@ -4514,6 +4496,12 @@ static void ath12k_mac_bss_info_changed(struct ath12k *ar, } ath12k_mac_fils_discovery(arvif, info); + + if (changed & BSS_CHANGED_PS && + ar->ab->hw_params->supports_sta_ps) { + ahvif->ps = vif_cfg->ps; + ath12k_mac_vif_setup_ps(arvif); + } } static struct ath12k_vif_cache *ath12k_ahvif_get_link_cache(struct ath12k_vif *ahvif, From 9e314a3c525c91c1e918546f472585cdbd44fbd1 Mon Sep 17 00:00:00 2001 From: Michal Kubiak Date: Thu, 25 Sep 2025 11:22:51 +0200 Subject: [PATCH 354/867] ice: remove legacy Rx and construct SKB The commit 53844673d555 ("iavf: kill 'legacy-rx' for good") removed the legacy Rx path in the iavf driver. This change applies the same rationale to the ice driver. The legacy Rx path relied on manual skb allocation and header copying, which has become increasingly inefficient and difficult to maintain. With the stabilization of build_skb() and the growing adoption of features like XDP, page_pool, and multi-buffer support, the legacy approach is no longer viable. Key drawbacks of the legacy path included: - Higher memory pressure due to direct page allocations and splitting; - Redundant memcpy() operations for packet headers; - CPU overhead from eth_get_headlen() and Flow Dissector usage; - Compatibility issues with XDP, which imposes strict headroom and tailroom requirements. The ice driver, like iavf, does not benefit from the minimal headroom savings that legacy Rx once offered, as it already splits pages into fixed halves. Removing this path simplifies the Rx logic, eliminates unnecessary branches in the hotpath, and prepares the driver for upcoming enhancements. In addition to removing the legacy Rx path, this change also eliminates the custom construct_skb() functions from both the standard and zero-copy (ZC) Rx paths. These are replaced with the build_skb() and standardized xdp_build_skb_from_zc() helpers, aligning the driver with the modern XDP infrastructure and reducing code duplication. This cleanup also reduces code complexity and improves maintainability as we move toward a more unified and modern Rx model across drivers. Co-developed-by: Alexander Lobakin Signed-off-by: Alexander Lobakin Reviewed-by: Alexander Lobakin Reviewed-by: Jacob Keller Signed-off-by: Michal Kubiak Tested-by: Alexander Nowlin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 1 - drivers/net/ethernet/intel/ice/ice_base.c | 23 +----- drivers/net/ethernet/intel/ice/ice_ethtool.c | 5 -- drivers/net/ethernet/intel/ice/ice_main.c | 11 +-- drivers/net/ethernet/intel/ice/ice_txrx.c | 86 +------------------- drivers/net/ethernet/intel/ice/ice_txrx.h | 16 ---- drivers/net/ethernet/intel/ice/ice_xsk.c | 72 +--------------- 7 files changed, 6 insertions(+), 208 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 9ee596773f34e..28de4273c2e82 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -509,7 +509,6 @@ enum ice_pf_flags { ICE_FLAG_MOD_POWER_UNSUPPORTED, ICE_FLAG_PHY_FW_LOAD_FAILED, ICE_FLAG_ETHTOOL_CTXT, /* set when ethtool holds RTNL lock */ - ICE_FLAG_LEGACY_RX, ICE_FLAG_VF_TRUE_PROMISC_ENA, ICE_FLAG_MDD_AUTO_RESET_VF, ICE_FLAG_VF_VLAN_PRUNING, diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 2d35a278c555c..b3eb9f5125003 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -461,19 +461,6 @@ u16 ice_calc_ts_ring_count(struct ice_tx_ring *tx_ring) return tx_ring->count + max_fetch_desc; } -/** - * ice_rx_offset - Return expected offset into page to access data - * @rx_ring: Ring we are requesting offset of - * - * Returns the offset value for ring into the data buffer. - */ -static unsigned int ice_rx_offset(struct ice_rx_ring *rx_ring) -{ - if (ice_ring_uses_build_skb(rx_ring)) - return ICE_SKB_PAD; - return 0; -} - /** * ice_setup_rx_ctx - Configure a receive ring context * @ring: The Rx ring to configure @@ -586,13 +573,7 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) if (vsi->type == ICE_VSI_VF) return 0; - /* configure Rx buffer alignment */ - if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) - ice_clear_ring_build_skb_ena(ring); - else - ice_set_ring_build_skb_ena(ring); - - ring->rx_offset = ice_rx_offset(ring); + ring->rx_offset = ICE_SKB_PAD; /* init queue specific tail register */ ring->tail = hw->hw_addr + QRX_TAIL(pf_q); @@ -753,7 +734,7 @@ int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx) */ static void ice_vsi_cfg_frame_size(struct ice_vsi *vsi, struct ice_rx_ring *ring) { - if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) { + if (!vsi->netdev) { ring->max_frame = ICE_MAX_FRAME_LEGACY_RX; ring->rx_buf_len = ICE_RXBUF_1664; #if (PAGE_SIZE < 8192) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index cb34d4675a788..240e3f35c10ad 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -340,7 +340,6 @@ static const struct ice_priv_flag ice_gstrings_priv_flags[] = { ICE_FLAG_VF_TRUE_PROMISC_ENA), ICE_PRIV_FLAG("mdd-auto-reset-vf", ICE_FLAG_MDD_AUTO_RESET_VF), ICE_PRIV_FLAG("vf-vlan-pruning", ICE_FLAG_VF_VLAN_PRUNING), - ICE_PRIV_FLAG("legacy-rx", ICE_FLAG_LEGACY_RX), }; #define ICE_PRIV_FLAG_ARRAY_SIZE ARRAY_SIZE(ice_gstrings_priv_flags) @@ -1856,10 +1855,6 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) ice_nway_reset(netdev); } } - if (test_bit(ICE_FLAG_LEGACY_RX, change_flags)) { - /* down and up VSI so that changes of Rx cfg are reflected. */ - ice_down_up(vsi); - } /* don't allow modification of this flag when a single VF is in * promiscuous mode because it's not supported */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 1de3da7b3907d..36e6a078c84c7 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2957,10 +2957,7 @@ int ice_vsi_determine_xdp_res(struct ice_vsi *vsi) */ static int ice_max_xdp_frame_size(struct ice_vsi *vsi) { - if (test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) - return ICE_RXBUF_1664; - else - return ICE_RXBUF_3072; + return ICE_RXBUF_3072; } /** @@ -7864,12 +7861,6 @@ int ice_change_mtu(struct net_device *netdev, int new_mtu) frame_size - ICE_ETH_PKT_HDR_PAD); return -EINVAL; } - } else if (test_bit(ICE_FLAG_LEGACY_RX, pf->flags)) { - if (new_mtu + ICE_ETH_PKT_HDR_PAD > ICE_MAX_FRAME_LEGACY_RX) { - netdev_err(netdev, "Too big MTU for legacy-rx; Max is %d\n", - ICE_MAX_FRAME_LEGACY_RX - ICE_ETH_PKT_HDR_PAD); - return -EINVAL; - } } /* if a reset is in progress, wait for some time for it to complete */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 73f08d02f9c76..5d59ee45d3da4 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1169,87 +1169,6 @@ ice_build_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) return skb; } -/** - * ice_construct_skb - Allocate skb and populate it - * @rx_ring: Rx descriptor ring to transact packets on - * @xdp: xdp_buff pointing to the data - * - * This function allocates an skb. It then populates it with the page - * data from the current receive descriptor, taking care to set up the - * skb correctly. - */ -static struct sk_buff * -ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) -{ - unsigned int size = xdp->data_end - xdp->data; - struct skb_shared_info *sinfo = NULL; - struct ice_rx_buf *rx_buf; - unsigned int nr_frags = 0; - unsigned int headlen; - struct sk_buff *skb; - - /* prefetch first cache line of first page */ - net_prefetch(xdp->data); - - if (unlikely(xdp_buff_has_frags(xdp))) { - sinfo = xdp_get_shared_info_from_buff(xdp); - nr_frags = sinfo->nr_frags; - } - - /* allocate a skb to store the frags */ - skb = napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE); - if (unlikely(!skb)) - return NULL; - - rx_buf = &rx_ring->rx_buf[rx_ring->first_desc]; - skb_record_rx_queue(skb, rx_ring->q_index); - /* Determine available headroom for copy */ - headlen = size; - if (headlen > ICE_RX_HDR_SIZE) - headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen, - sizeof(long))); - - /* if we exhaust the linear part then add what is left as a frag */ - size -= headlen; - if (size) { - /* besides adding here a partial frag, we are going to add - * frags from xdp_buff, make sure there is enough space for - * them - */ - if (unlikely(nr_frags >= MAX_SKB_FRAGS - 1)) { - dev_kfree_skb(skb); - return NULL; - } - skb_add_rx_frag(skb, 0, rx_buf->page, - rx_buf->page_offset + headlen, size, - xdp->frame_sz); - } else { - /* buffer is unused, restore biased page count in Rx buffer; - * data was copied onto skb's linear part so there's no - * need for adjusting page offset and we can reuse this buffer - * as-is - */ - rx_buf->pagecnt_bias++; - } - - if (unlikely(xdp_buff_has_frags(xdp))) { - struct skb_shared_info *skinfo = skb_shinfo(skb); - - memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0], - sizeof(skb_frag_t) * nr_frags); - - xdp_update_skb_frags_info(skb, skinfo->nr_frags + nr_frags, - sinfo->xdp_frags_size, - nr_frags * xdp->frame_sz, - xdp_buff_get_skb_flags(xdp)); - } - - return skb; -} - /** * ice_put_rx_buf - Clean up used buffer and either recycle or free * @rx_ring: Rx descriptor ring to transact packets on @@ -1464,10 +1383,7 @@ static int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) continue; construct_skb: - if (likely(ice_ring_uses_build_skb(rx_ring))) - skb = ice_build_skb(rx_ring, xdp); - else - skb = ice_construct_skb(rx_ring, xdp); + skb = ice_build_skb(rx_ring, xdp); /* exit if we failed to retrieve a buffer */ if (!skb) { rx_ring->ring_stats->rx_stats.alloc_buf_failed++; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 841a07bfba54f..427f672fe0538 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -373,7 +373,6 @@ struct ice_rx_ring { dma_addr_t dma; /* physical address of ring */ u8 dcb_tc; /* Traffic class of ring */ u8 ptp_rx; -#define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1) #define ICE_RX_FLAGS_CRC_STRIP_DIS BIT(2) #define ICE_RX_FLAGS_MULTIDEV BIT(3) #define ICE_RX_FLAGS_RING_GCS BIT(4) @@ -422,21 +421,6 @@ struct ice_tx_ring { u16 quanta_prof_id; } ____cacheline_internodealigned_in_smp; -static inline bool ice_ring_uses_build_skb(struct ice_rx_ring *ring) -{ - return !!(ring->flags & ICE_RX_FLAGS_RING_BUILD_SKB); -} - -static inline void ice_set_ring_build_skb_ena(struct ice_rx_ring *ring) -{ - ring->flags |= ICE_RX_FLAGS_RING_BUILD_SKB; -} - -static inline void ice_clear_ring_build_skb_ena(struct ice_rx_ring *ring) -{ - ring->flags &= ~ICE_RX_FLAGS_RING_BUILD_SKB; -} - static inline bool ice_ring_ch_enabled(struct ice_tx_ring *ring) { return !!ring->ch; diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 575fd48f485f1..b25bc5ba40abf 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -392,69 +392,6 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, return __ice_alloc_rx_bufs_zc(rx_ring, xsk_pool, leftover); } -/** - * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer - * @rx_ring: Rx ring - * @xdp: Pointer to XDP buffer - * - * This function allocates a new skb from a zero-copy Rx buffer. - * - * Returns the skb on success, NULL on failure. - */ -static struct sk_buff * -ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) -{ - unsigned int totalsize = xdp->data_end - xdp->data_meta; - unsigned int metasize = xdp->data - xdp->data_meta; - struct skb_shared_info *sinfo = NULL; - struct sk_buff *skb; - u32 nr_frags = 0; - - if (unlikely(xdp_buff_has_frags(xdp))) { - sinfo = xdp_get_shared_info_from_buff(xdp); - nr_frags = sinfo->nr_frags; - } - net_prefetch(xdp->data_meta); - - skb = napi_alloc_skb(&rx_ring->q_vector->napi, totalsize); - if (unlikely(!skb)) - return NULL; - - memcpy(__skb_put(skb, totalsize), xdp->data_meta, - ALIGN(totalsize, sizeof(long))); - - if (metasize) { - skb_metadata_set(skb, metasize); - __skb_pull(skb, metasize); - } - - if (likely(!xdp_buff_has_frags(xdp))) - goto out; - - for (int i = 0; i < nr_frags; i++) { - struct skb_shared_info *skinfo = skb_shinfo(skb); - skb_frag_t *frag = &sinfo->frags[i]; - struct page *page; - void *addr; - - page = dev_alloc_page(); - if (!page) { - dev_kfree_skb(skb); - return NULL; - } - addr = page_to_virt(page); - - memcpy(addr, skb_frag_page(frag), skb_frag_size(frag)); - - __skb_fill_page_desc_noacc(skinfo, skinfo->nr_frags++, - addr, 0, skb_frag_size(frag)); - } - -out: - xsk_buff_free(xdp); - return skb; -} - /** * ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ * @xdp_ring: XDP Tx ring @@ -757,20 +694,15 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, construct_skb: /* XDP_PASS path */ - skb = ice_construct_skb_zc(rx_ring, first); + skb = xdp_build_skb_from_zc(first); if (!skb) { + xsk_buff_free(first); rx_ring->ring_stats->rx_stats.alloc_buf_failed++; break; } first = NULL; rx_ring->first_desc = ntc; - - if (eth_skb_pad(skb)) { - skb = NULL; - continue; - } - total_rx_bytes += skb->len; total_rx_packets++; From 3a4f419f750946181e3d6a339a1ef1942c5b5685 Mon Sep 17 00:00:00 2001 From: Michal Kubiak Date: Thu, 25 Sep 2025 11:22:52 +0200 Subject: [PATCH 355/867] ice: drop page splitting and recycling As part of the transition toward Page Pool integration, remove the legacy page splitting and recycling logic from the ice driver. This mirrors the approach taken in commit 920d86f3c552 ("iavf: drop page splitting and recycling"). The previous model attempted to reuse partially consumed pages by splitting them and tracking their usage across descriptors. While this was once a memory optimization, it introduced significant complexity and overhead in the Rx path, including: - Manual refcount management and page reuse heuristics; - Per-descriptor buffer shuffling, which could involve moving dozens of `ice_rx_buf` structures per NAPI cycle; - Increased branching and cache pressure in the hotpath. This change simplifies the Rx logic by always allocating fresh pages and letting the networking stack handle their lifecycle. Although this may temporarily reduce performance (up to ~98% in some XDP cases), it greatly improves maintainability and paves the way for Page Pool, which will restore and exceed previous performance levels. The `ice_rx_buf` array is retained for now to minimize diffstat and ease future replacement with a shared buffer abstraction. Co-developed-by: Alexander Lobakin Signed-off-by: Alexander Lobakin Reviewed-by: Alexander Lobakin Reviewed-by: Jacob Keller Signed-off-by: Michal Kubiak Tested-by: Alexander Nowlin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 2 + drivers/net/ethernet/intel/ice/ice_base.c | 29 ++-- drivers/net/ethernet/intel/ice/ice_txrx.c | 139 ++----------------- drivers/net/ethernet/intel/ice/ice_txrx.h | 72 ---------- drivers/net/ethernet/intel/ice/virt/queues.c | 5 +- 5 files changed, 24 insertions(+), 223 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 28de4273c2e82..3d4d8b88631b2 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -374,6 +374,8 @@ struct ice_vsi { spinlock_t arfs_lock; /* protects aRFS hash table and filter state */ atomic_t *arfs_last_fltr_id; + u16 max_frame; + struct ice_aqc_vsi_props info; /* VSI properties */ struct ice_vsi_vlan_info vlan_info; /* vlan config to be restored */ diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index b3eb9f5125003..fee58f879d9e1 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -495,7 +495,7 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) /* Receive Packet Data Buffer Size. * The Packet Data Buffer Size is defined in 128 byte units. */ - rlan_ctx.dbuf = DIV_ROUND_UP(ring->rx_buf_len, + rlan_ctx.dbuf = DIV_ROUND_UP(ICE_RXBUF_3072, BIT_ULL(ICE_RLAN_CTX_DBUF_S)); /* use 32 byte descriptors */ @@ -536,8 +536,8 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) /* Max packet size for this queue - must not be set to a larger value * than 5 x DBUF */ - rlan_ctx.rxmax = min_t(u32, ring->max_frame, - ICE_MAX_CHAINED_RX_BUFS * ring->rx_buf_len); + rlan_ctx.rxmax = min_t(u32, vsi->max_frame, + ICE_MAX_CHAINED_RX_BUFS * ICE_RXBUF_3072); /* Rx queue threshold in units of 64 */ rlan_ctx.lrxqthresh = 1; @@ -608,7 +608,7 @@ static unsigned int ice_get_frame_sz(struct ice_rx_ring *rx_ring) #if (PAGE_SIZE >= 8192) frame_sz = rx_ring->rx_buf_len; #else - frame_sz = ice_rx_pg_size(rx_ring) / 2; + frame_sz = PAGE_SIZE; #endif return frame_sz; @@ -624,6 +624,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) { struct device *dev = ice_pf_to_dev(ring->vsi->back); u32 num_bufs = ICE_RX_DESC_UNUSED(ring); + u32 rx_buf_len; int err; if (ring->vsi->type == ICE_VSI_PF || ring->vsi->type == ICE_VSI_SF) { @@ -631,7 +632,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->q_index, ring->q_vector->napi.napi_id, - ring->rx_buf_len); + ICE_RXBUF_3072); if (err) return err; } @@ -640,12 +641,12 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) if (ring->xsk_pool) { xdp_rxq_info_unreg(&ring->xdp_rxq); - ring->rx_buf_len = + rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool); err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->q_index, ring->q_vector->napi.napi_id, - ring->rx_buf_len); + rx_buf_len); if (err) return err; err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, @@ -663,7 +664,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->q_index, ring->q_vector->napi.napi_id, - ring->rx_buf_len); + ICE_RXBUF_3072); if (err) return err; } @@ -735,17 +736,9 @@ int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx) static void ice_vsi_cfg_frame_size(struct ice_vsi *vsi, struct ice_rx_ring *ring) { if (!vsi->netdev) { - ring->max_frame = ICE_MAX_FRAME_LEGACY_RX; - ring->rx_buf_len = ICE_RXBUF_1664; -#if (PAGE_SIZE < 8192) - } else if (!ICE_2K_TOO_SMALL_WITH_PADDING && - (vsi->netdev->mtu <= ETH_DATA_LEN)) { - ring->max_frame = ICE_RXBUF_1536 - NET_IP_ALIGN; - ring->rx_buf_len = ICE_RXBUF_1536 - NET_IP_ALIGN; -#endif + vsi->max_frame = ICE_MAX_FRAME_LEGACY_RX; } else { - ring->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX; - ring->rx_buf_len = ICE_RXBUF_3072; + vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX; } } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 5d59ee45d3da4..2b46e4c8be868 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -543,13 +543,13 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) */ dma_sync_single_range_for_cpu(dev, rx_buf->dma, rx_buf->page_offset, - rx_ring->rx_buf_len, + ICE_RXBUF_3072, DMA_FROM_DEVICE); /* free resources associated with mapping */ - dma_unmap_page_attrs(dev, rx_buf->dma, ice_rx_pg_size(rx_ring), + dma_unmap_page_attrs(dev, rx_buf->dma, PAGE_SIZE, DMA_FROM_DEVICE, ICE_RX_DMA_ATTR); - __page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias); + __free_page(rx_buf->page); rx_buf->page = NULL; rx_buf->page_offset = 0; @@ -803,10 +803,6 @@ ice_alloc_mapped_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *bi) struct page *page = bi->page; dma_addr_t dma; - /* since we are recycling buffers we should seldom need to alloc */ - if (likely(page)) - return true; - /* alloc new page for storage */ page = dev_alloc_pages(ice_rx_pg_order(rx_ring)); if (unlikely(!page)) { @@ -815,7 +811,7 @@ ice_alloc_mapped_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *bi) } /* map page for use */ - dma = dma_map_page_attrs(rx_ring->dev, page, 0, ice_rx_pg_size(rx_ring), + dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE, ICE_RX_DMA_ATTR); /* if mapping failed free memory back to system since @@ -831,7 +827,6 @@ ice_alloc_mapped_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *bi) bi->page = page; bi->page_offset = rx_ring->rx_offset; page_ref_add(page, USHRT_MAX - 1); - bi->pagecnt_bias = USHRT_MAX; return true; } @@ -902,7 +897,7 @@ bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count) /* sync the buffer for use by the device */ dma_sync_single_range_for_device(rx_ring->dev, bi->dma, bi->page_offset, - rx_ring->rx_buf_len, + ICE_RXBUF_3072, DMA_FROM_DEVICE); /* Refresh the desc even if buffer_addrs didn't change @@ -931,69 +926,6 @@ bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count) return !!cleaned_count; } -/** - * ice_rx_buf_adjust_pg_offset - Prepare Rx buffer for reuse - * @rx_buf: Rx buffer to adjust - * @size: Size of adjustment - * - * Update the offset within page so that Rx buf will be ready to be reused. - * For systems with PAGE_SIZE < 8192 this function will flip the page offset - * so the second half of page assigned to Rx buffer will be used, otherwise - * the offset is moved by "size" bytes - */ -static void -ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size) -{ -#if (PAGE_SIZE < 8192) - /* flip page offset to other buffer */ - rx_buf->page_offset ^= size; -#else - /* move offset up to the next cache line */ - rx_buf->page_offset += size; -#endif -} - -/** - * ice_can_reuse_rx_page - Determine if page can be reused for another Rx - * @rx_buf: buffer containing the page - * - * If page is reusable, we have a green light for calling ice_reuse_rx_page, - * which will assign the current buffer to the buffer that next_to_alloc is - * pointing to; otherwise, the DMA mapping needs to be destroyed and - * page freed - */ -static bool -ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) -{ - unsigned int pagecnt_bias = rx_buf->pagecnt_bias; - struct page *page = rx_buf->page; - - /* avoid re-using remote and pfmemalloc pages */ - if (!dev_page_is_reusable(page)) - return false; - - /* if we are only owner of page we can reuse it */ - if (unlikely(rx_buf->pgcnt - pagecnt_bias > 1)) - return false; -#if (PAGE_SIZE >= 8192) -#define ICE_LAST_OFFSET \ - (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_3072) - if (rx_buf->page_offset > ICE_LAST_OFFSET) - return false; -#endif /* PAGE_SIZE >= 8192) */ - - /* If we have drained the page fragment pool we need to update - * the pagecnt_bias and page count so that we fully restock the - * number of references the driver holds. - */ - if (unlikely(pagecnt_bias == 1)) { - page_ref_add(page, USHRT_MAX - 1); - rx_buf->pagecnt_bias = USHRT_MAX; - } - - return true; -} - /** * ice_add_xdp_frag - Add contents of Rx buffer to xdp buf as a frag * @rx_ring: Rx descriptor ring to transact packets on @@ -1032,35 +964,6 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, return 0; } -/** - * ice_reuse_rx_page - page flip buffer and store it back on the ring - * @rx_ring: Rx descriptor ring to store buffers on - * @old_buf: donor buffer to have page reused - * - * Synchronizes page for reuse by the adapter - */ -static void -ice_reuse_rx_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *old_buf) -{ - u16 nta = rx_ring->next_to_alloc; - struct ice_rx_buf *new_buf; - - new_buf = &rx_ring->rx_buf[nta]; - - /* update, and store next to alloc */ - nta++; - rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; - - /* Transfer page from old buffer to new buffer. - * Move each member individually to avoid possible store - * forwarding stalls and unnecessary copy of skb. - */ - new_buf->dma = old_buf->dma; - new_buf->page = old_buf->page; - new_buf->page_offset = old_buf->page_offset; - new_buf->pagecnt_bias = old_buf->pagecnt_bias; -} - /** * ice_get_rx_buf - Fetch Rx buffer and synchronize data for use * @rx_ring: Rx descriptor ring to transact packets on @@ -1086,9 +989,6 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, rx_buf->page_offset, size, DMA_FROM_DEVICE); - /* We have pulled a buffer for use, so decrement pagecnt_bias */ - rx_buf->pagecnt_bias--; - return rx_buf; } @@ -1183,16 +1083,10 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf) if (!rx_buf) return; - if (ice_can_reuse_rx_page(rx_buf)) { - /* hand second half of page back to the ring */ - ice_reuse_rx_page(rx_ring, rx_buf); - } else { - /* we are not reusing the buffer so unmap it */ - dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma, - ice_rx_pg_size(rx_ring), DMA_FROM_DEVICE, - ICE_RX_DMA_ATTR); - __page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias); - } + /* we are not reusing the buffer so unmap it */ + dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma, + PAGE_SIZE, DMA_FROM_DEVICE, + ICE_RX_DMA_ATTR); /* clear contents of buffer_info */ rx_buf->page = NULL; @@ -1218,27 +1112,12 @@ static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, u32 idx = rx_ring->first_desc; u32 cnt = rx_ring->count; struct ice_rx_buf *buf; - u32 xdp_frags = 0; - int i = 0; - - if (unlikely(xdp_buff_has_frags(xdp))) - xdp_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags; while (idx != ntc) { buf = &rx_ring->rx_buf[idx]; if (++idx == cnt) idx = 0; - /* An XDP program could release fragments from the end of the - * buffer. For these, we need to keep the pagecnt_bias as-is. - * To do this, only adjust pagecnt_bias for fragments up to - * the total remaining after the XDP program has run. - */ - if (verdict != ICE_XDP_CONSUMED) - ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); - else if (i++ <= xdp_frags) - buf->pagecnt_bias++; - ice_put_rx_buf(rx_ring, buf); } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 427f672fe0538..3c7830f787de8 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -27,71 +27,7 @@ #define ICE_MAX_TXQ_PER_TXQG 128 -/* Attempt to maximize the headroom available for incoming frames. We use a 2K - * buffer for MTUs <= 1500 and need 1536/1534 to store the data for the frame. - * This leaves us with 512 bytes of room. From that we need to deduct the - * space needed for the shared info and the padding needed to IP align the - * frame. - * - * Note: For cache line sizes 256 or larger this value is going to end - * up negative. In these cases we should fall back to the legacy - * receive path. - */ -#if (PAGE_SIZE < 8192) -#define ICE_2K_TOO_SMALL_WITH_PADDING \ - ((unsigned int)(NET_SKB_PAD + ICE_RXBUF_1536) > \ - SKB_WITH_OVERHEAD(ICE_RXBUF_2048)) - -/** - * ice_compute_pad - compute the padding - * @rx_buf_len: buffer length - * - * Figure out the size of half page based on given buffer length and - * then subtract the skb_shared_info followed by subtraction of the - * actual buffer length; this in turn results in the actual space that - * is left for padding usage - */ -static inline int ice_compute_pad(int rx_buf_len) -{ - int half_page_size; - - half_page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2); - return SKB_WITH_OVERHEAD(half_page_size) - rx_buf_len; -} - -/** - * ice_skb_pad - determine the padding that we can supply - * - * Figure out the right Rx buffer size and based on that calculate the - * padding - */ -static inline int ice_skb_pad(void) -{ - int rx_buf_len; - - /* If a 2K buffer cannot handle a standard Ethernet frame then - * optimize padding for a 3K buffer instead of a 1.5K buffer. - * - * For a 3K buffer we need to add enough padding to allow for - * tailroom due to NET_IP_ALIGN possibly shifting us out of - * cache-line alignment. - */ - if (ICE_2K_TOO_SMALL_WITH_PADDING) - rx_buf_len = ICE_RXBUF_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN); - else - rx_buf_len = ICE_RXBUF_1536; - - /* if needed make room for NET_IP_ALIGN */ - rx_buf_len -= NET_IP_ALIGN; - - return ice_compute_pad(rx_buf_len); -} - -#define ICE_SKB_PAD ice_skb_pad() -#else -#define ICE_2K_TOO_SMALL_WITH_PADDING false #define ICE_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) -#endif /* We are assuming that the cache line is always 64 Bytes here for ice. * In order to make sure that is a correct assumption there is a check in probe @@ -202,7 +138,6 @@ struct ice_rx_buf { struct page *page; unsigned int page_offset; unsigned int pgcnt; - unsigned int pagecnt_bias; }; struct ice_q_stats { @@ -368,7 +303,6 @@ struct ice_rx_ring { struct ice_tx_ring *xdp_ring; struct ice_rx_ring *next; /* pointer to next ring in q_vector */ struct xsk_buff_pool *xsk_pool; - u16 max_frame; u16 rx_buf_len; dma_addr_t dma; /* physical address of ring */ u8 dcb_tc; /* Traffic class of ring */ @@ -475,15 +409,9 @@ struct ice_coalesce_stored { static inline unsigned int ice_rx_pg_order(struct ice_rx_ring *ring) { -#if (PAGE_SIZE < 8192) - if (ring->rx_buf_len > (PAGE_SIZE / 2)) - return 1; -#endif return 0; } -#define ice_rx_pg_size(_ring) (PAGE_SIZE << ice_rx_pg_order(_ring)) - union ice_32b_rx_flex_desc; void ice_init_ctrl_rx_descs(struct ice_rx_ring *rx_ring, u32 num_descs); diff --git a/drivers/net/ethernet/intel/ice/virt/queues.c b/drivers/net/ethernet/intel/ice/virt/queues.c index 370f6ec2a374c..7928f4e8e788e 100644 --- a/drivers/net/ethernet/intel/ice/virt/queues.c +++ b/drivers/net/ethernet/intel/ice/virt/queues.c @@ -842,18 +842,17 @@ int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) (qpi->rxq.databuffer_size > ((16 * 1024) - 128) || qpi->rxq.databuffer_size < 1024)) goto error_param; - ring->rx_buf_len = qpi->rxq.databuffer_size; if (qpi->rxq.max_pkt_size > max_frame_size || qpi->rxq.max_pkt_size < 64) goto error_param; - ring->max_frame = qpi->rxq.max_pkt_size; + vsi->max_frame = qpi->rxq.max_pkt_size; /* add space for the port VLAN since the VF driver is * not expected to account for it in the MTU * calculation */ if (ice_vf_is_port_vlan_ena(vf)) - ring->max_frame += VLAN_HLEN; + vsi->max_frame += VLAN_HLEN; if (ice_vsi_cfg_single_rxq(vsi, q_idx)) { dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure RX queue %d\n", From 93f53db9f9dc4a16b40ecd18e6d338ad57e4b670 Mon Sep 17 00:00:00 2001 From: Michal Kubiak Date: Thu, 25 Sep 2025 11:22:53 +0200 Subject: [PATCH 356/867] ice: switch to Page Pool This patch completes the transition of the ice driver to use the Page Pool and libeth APIs, following the same direction as commit 5fa4caff59f2 ("iavf: switch to Page Pool"). With the legacy page splitting and recycling logic already removed, the driver is now in a clean state to adopt the modern memory model. The Page Pool integration simplifies buffer management by offloading DMA mapping and recycling to the core infrastructure. This eliminates the need for driver-specific handling of headroom, buffer sizing, and page order. The libeth helper is used for CPU-side processing, while DMA-for-device is handled by the Page Pool core. Additionally, this patch extends the conversion to cover XDP support. The driver now uses libeth_xdp helpers for Rx buffer processing, and optimizes XDP_TX by skipping per-frame DMA mapping. Instead, all buffers are mapped as bi-directional up front, leveraging Page Pool's lifecycle management. This significantly reduces overhead in virtualized environments. Performance observations: - In typical scenarios (netperf, XDP_PASS, XDP_DROP), performance remains on par with the previous implementation. - In XDP_TX mode: * With IOMMU enabled, performance improves dramatically - over 5x increase - due to reduced DMA mapping overhead and better memory reuse. * With IOMMU disabled, performance remains comparable to the previous implementation, with no significant changes observed. - In XDP_DROP mode: * For small MTUs, (where multiple buffers can be allocated on a single memory page), a performance drop of approximately 20% is observed. According to 'perf top' analysis, the bottleneck is caused by atomic reference counter increments in the Page Pool. * For normal MTUs, (where only one buffer can be allocated within a single memory page), performance remains comparable to baseline levels. This change is also a step toward a more modular and unified XDP implementation across Intel Ethernet drivers, aligning with ongoing efforts to consolidate and streamline feature support. Suggested-by: Maciej Fijalkowski Suggested-by: Alexander Lobakin Reviewed-by: Alexander Lobakin Reviewed-by: Jacob Keller Signed-off-by: Michal Kubiak Tested-by: Alexander Nowlin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/Kconfig | 1 + drivers/net/ethernet/intel/ice/ice_base.c | 91 ++-- drivers/net/ethernet/intel/ice/ice_ethtool.c | 17 +- drivers/net/ethernet/intel/ice/ice_lib.c | 1 - drivers/net/ethernet/intel/ice/ice_main.c | 10 +- drivers/net/ethernet/intel/ice/ice_txrx.c | 442 ++++-------------- drivers/net/ethernet/intel/ice/ice_txrx.h | 37 +- drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 65 ++- drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 9 - drivers/net/ethernet/intel/ice/ice_xsk.c | 76 +-- drivers/net/ethernet/intel/ice/ice_xsk.h | 6 +- 11 files changed, 203 insertions(+), 552 deletions(-) diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index a563a94e27802..7c6ee1e604aa6 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -296,6 +296,7 @@ config ICE depends on GNSS || GNSS = n select AUXILIARY_BUS select DIMLIB + select LIBETH_XDP select LIBIE select LIBIE_ADMINQ select LIBIE_FWLOG diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index fee58f879d9e1..eabab50fab33d 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019, Intel Corporation. */ #include +#include #include "ice_base.h" #include "ice_lib.h" #include "ice_dcb_lib.h" @@ -495,7 +496,7 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) /* Receive Packet Data Buffer Size. * The Packet Data Buffer Size is defined in 128 byte units. */ - rlan_ctx.dbuf = DIV_ROUND_UP(ICE_RXBUF_3072, + rlan_ctx.dbuf = DIV_ROUND_UP(ring->rx_buf_len, BIT_ULL(ICE_RLAN_CTX_DBUF_S)); /* use 32 byte descriptors */ @@ -537,7 +538,7 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) * than 5 x DBUF */ rlan_ctx.rxmax = min_t(u32, vsi->max_frame, - ICE_MAX_CHAINED_RX_BUFS * ICE_RXBUF_3072); + ICE_MAX_CHAINED_RX_BUFS * ring->rx_buf_len); /* Rx queue threshold in units of 64 */ rlan_ctx.lrxqthresh = 1; @@ -573,8 +574,6 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) if (vsi->type == ICE_VSI_VF) return 0; - ring->rx_offset = ICE_SKB_PAD; - /* init queue specific tail register */ ring->tail = hw->hw_addr + QRX_TAIL(pf_q); writel(0, ring->tail); @@ -582,38 +581,6 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) return 0; } -static void ice_xsk_pool_fill_cb(struct ice_rx_ring *ring) -{ - void *ctx_ptr = &ring->pkt_ctx; - struct xsk_cb_desc desc = {}; - - XSK_CHECK_PRIV_TYPE(struct ice_xdp_buff); - desc.src = &ctx_ptr; - desc.off = offsetof(struct ice_xdp_buff, pkt_ctx) - - sizeof(struct xdp_buff); - desc.bytes = sizeof(ctx_ptr); - xsk_pool_fill_cb(ring->xsk_pool, &desc); -} - -/** - * ice_get_frame_sz - calculate xdp_buff::frame_sz - * @rx_ring: the ring being configured - * - * Return frame size based on underlying PAGE_SIZE - */ -static unsigned int ice_get_frame_sz(struct ice_rx_ring *rx_ring) -{ - unsigned int frame_sz; - -#if (PAGE_SIZE >= 8192) - frame_sz = rx_ring->rx_buf_len; -#else - frame_sz = PAGE_SIZE; -#endif - - return frame_sz; -} - /** * ice_vsi_cfg_rxq - Configure an Rx queue * @ring: the ring being configured @@ -622,8 +589,14 @@ static unsigned int ice_get_frame_sz(struct ice_rx_ring *rx_ring) */ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) { + struct libeth_fq fq = { + .count = ring->count, + .nid = NUMA_NO_NODE, + .xdp = ice_is_xdp_ena_vsi(ring->vsi), + .buf_len = LIBIE_MAX_RX_BUF_LEN, + }; struct device *dev = ice_pf_to_dev(ring->vsi->back); - u32 num_bufs = ICE_RX_DESC_UNUSED(ring); + u32 num_bufs = ICE_DESC_UNUSED(ring); u32 rx_buf_len; int err; @@ -632,12 +605,16 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->q_index, ring->q_vector->napi.napi_id, - ICE_RXBUF_3072); + ring->rx_buf_len); if (err) return err; } ice_rx_xsk_pool(ring); + err = ice_realloc_rx_xdp_bufs(ring, ring->xsk_pool); + if (err) + return err; + if (ring->xsk_pool) { xdp_rxq_info_unreg(&ring->xdp_rxq); @@ -655,36 +632,38 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) if (err) return err; xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq); - ice_xsk_pool_fill_cb(ring); dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n", ring->q_index); } else { + err = libeth_rx_fq_create(&fq, &ring->q_vector->napi); + if (err) + return err; + + ring->pp = fq.pp; + ring->rx_fqes = fq.fqes; + ring->truesize = fq.truesize; + ring->rx_buf_len = fq.buf_len; + if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->q_index, ring->q_vector->napi.napi_id, - ICE_RXBUF_3072); + ring->rx_buf_len); if (err) - return err; + goto err_destroy_fq; } - - err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, - MEM_TYPE_PAGE_SHARED, - NULL); - if (err) - return err; + xdp_rxq_info_attach_page_pool(&ring->xdp_rxq, + ring->pp); } } - xdp_init_buff(&ring->xdp, ice_get_frame_sz(ring), &ring->xdp_rxq); ring->xdp.data = NULL; - ring->xdp_ext.pkt_ctx = &ring->pkt_ctx; err = ice_setup_rx_ctx(ring); if (err) { dev_err(dev, "ice_setup_rx_ctx failed for RxQ %d, err %d\n", ring->q_index, err); - return err; + goto err_destroy_fq; } if (ring->xsk_pool) { @@ -712,9 +691,19 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) if (ring->vsi->type == ICE_VSI_CTRL) ice_init_ctrl_rx_descs(ring, num_bufs); else - ice_alloc_rx_bufs(ring, num_bufs); + err = ice_alloc_rx_bufs(ring, num_bufs); + + if (err) + goto err_destroy_fq; return 0; + +err_destroy_fq: + libeth_rx_fq_destroy(&fq); + ring->rx_fqes = NULL; + ring->pp = NULL; + + return err; } int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 240e3f35c10ad..36fdac4fddc3c 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -10,6 +10,7 @@ #include "ice_lib.h" #include "ice_dcb_lib.h" #include +#include struct ice_stats { char stat_string[ETH_GSTRING_LEN]; @@ -1230,8 +1231,9 @@ static int ice_diag_send(struct ice_tx_ring *tx_ring, u8 *data, u16 size) */ static int ice_lbtest_receive_frames(struct ice_rx_ring *rx_ring) { - struct ice_rx_buf *rx_buf; + struct libeth_fqe *rx_buf; int valid_frames, i; + struct page *page; u8 *received_buf; valid_frames = 0; @@ -1246,8 +1248,10 @@ static int ice_lbtest_receive_frames(struct ice_rx_ring *rx_ring) cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S))))) continue; - rx_buf = &rx_ring->rx_buf[i]; - received_buf = page_address(rx_buf->page) + rx_buf->page_offset; + rx_buf = &rx_ring->rx_fqes[i]; + page = __netmem_to_page(rx_buf->netmem); + received_buf = page_address(page) + rx_buf->offset + + page->pp->p.offset; if (ice_lbtest_check_frame(received_buf)) valid_frames++; @@ -3303,7 +3307,8 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, rx_rings[i].count = new_rx_cnt; rx_rings[i].cached_phctime = pf->ptp.cached_phc_time; rx_rings[i].desc = NULL; - rx_rings[i].rx_buf = NULL; + rx_rings[i].xdp_buf = NULL; + /* this is to allow wr32 to have something to write to * during early allocation of Rx buffers */ @@ -3312,10 +3317,6 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, err = ice_setup_rx_ring(&rx_rings[i]); if (err) goto rx_unwind; - - /* allocate Rx buffers */ - err = ice_alloc_rx_bufs(&rx_rings[i], - ICE_RX_DESC_UNUSED(&rx_rings[i])); rx_unwind: if (err) { while (i) { diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 69cb0381c4609..15621707fbf81 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1427,7 +1427,6 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->reg_idx = vsi->rxq_map[i]; ring->vsi = vsi; ring->netdev = vsi->netdev; - ring->dev = dev; ring->count = vsi->num_rx_desc; ring->cached_phctime = pf->ptp.cached_phc_time; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 36e6a078c84c7..2533876f1a2fd 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -37,6 +37,8 @@ static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation."; #define ICE_DDP_PKG_FILE ICE_DDP_PKG_PATH "ice.pkg" MODULE_DESCRIPTION(DRV_SUMMARY); +MODULE_IMPORT_NS("LIBETH"); +MODULE_IMPORT_NS("LIBETH_XDP"); MODULE_IMPORT_NS("LIBIE"); MODULE_IMPORT_NS("LIBIE_ADMINQ"); MODULE_IMPORT_NS("LIBIE_FWLOG"); @@ -3015,19 +3017,11 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, } } xdp_features_set_redirect_target(vsi->netdev, true); - /* reallocate Rx queues that are used for zero-copy */ - xdp_ring_err = ice_realloc_zc_buf(vsi, true); - if (xdp_ring_err) - NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Rx resources failed"); } else if (ice_is_xdp_ena_vsi(vsi) && !prog) { xdp_features_clear_redirect_target(vsi->netdev); xdp_ring_err = ice_destroy_xdp_rings(vsi, ICE_XDP_CFG_FULL); if (xdp_ring_err) NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed"); - /* reallocate Rx queues that were used for zero-copy */ - xdp_ring_err = ice_realloc_zc_buf(vsi, false); - if (xdp_ring_err) - NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Rx resources failed"); } resume_if: diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 2b46e4c8be868..5a966138eacfc 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include #include #include @@ -111,7 +113,7 @@ ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc, static void ice_unmap_and_free_tx_buf(struct ice_tx_ring *ring, struct ice_tx_buf *tx_buf) { - if (dma_unmap_len(tx_buf, len)) + if (tx_buf->type != ICE_TX_BUF_XDP_TX && dma_unmap_len(tx_buf, len)) dma_unmap_page(ring->dev, dma_unmap_addr(tx_buf, dma), dma_unmap_len(tx_buf, len), @@ -125,7 +127,7 @@ ice_unmap_and_free_tx_buf(struct ice_tx_ring *ring, struct ice_tx_buf *tx_buf) dev_kfree_skb_any(tx_buf->skb); break; case ICE_TX_BUF_XDP_TX: - page_frag_free(tx_buf->raw_buf); + libeth_xdp_return_va(tx_buf->raw_buf, false); break; case ICE_TX_BUF_XDP_XMIT: xdp_return_frame(tx_buf->xdpf); @@ -512,54 +514,44 @@ int ice_setup_tx_ring(struct ice_tx_ring *tx_ring) */ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) { - struct xdp_buff *xdp = &rx_ring->xdp; - struct device *dev = rx_ring->dev; + struct libeth_fq fq = { + .fqes = rx_ring->rx_fqes, + .pp = rx_ring->pp, + }; u32 size; - u16 i; - - /* ring already cleared, nothing to do */ - if (!rx_ring->rx_buf) - return; if (rx_ring->xsk_pool) { ice_xsk_clean_rx_ring(rx_ring); goto rx_skip_free; } - if (xdp->data) { - xdp_return_buff(xdp); - xdp->data = NULL; - } + /* ring already cleared, nothing to do */ + if (!rx_ring->rx_fqes) + return; + + libeth_xdp_return_stash(&rx_ring->xdp); /* Free all the Rx ring sk_buffs */ - for (i = 0; i < rx_ring->count; i++) { - struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i]; + for (u32 i = rx_ring->next_to_clean; i != rx_ring->next_to_use; ) { + const struct libeth_fqe *rx_fqes = &rx_ring->rx_fqes[i]; - if (!rx_buf->page) - continue; + libeth_rx_recycle_slow(rx_fqes->netmem); - /* Invalidate cache lines that may have been written to by - * device so that we avoid corrupting memory. - */ - dma_sync_single_range_for_cpu(dev, rx_buf->dma, - rx_buf->page_offset, - ICE_RXBUF_3072, - DMA_FROM_DEVICE); - - /* free resources associated with mapping */ - dma_unmap_page_attrs(dev, rx_buf->dma, PAGE_SIZE, - DMA_FROM_DEVICE, ICE_RX_DMA_ATTR); - __free_page(rx_buf->page); - - rx_buf->page = NULL; - rx_buf->page_offset = 0; + if (unlikely(++i == rx_ring->count)) + i = 0; } + if (rx_ring->vsi->type == ICE_VSI_PF && + xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) { + xdp_rxq_info_detach_mem_model(&rx_ring->xdp_rxq); + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + } + + libeth_rx_fq_destroy(&fq); + rx_ring->rx_fqes = NULL; + rx_ring->pp = NULL; + rx_skip_free: - if (rx_ring->xsk_pool) - memset(rx_ring->xdp_buf, 0, array_size(rx_ring->count, sizeof(*rx_ring->xdp_buf))); - else - memset(rx_ring->rx_buf, 0, array_size(rx_ring->count, sizeof(*rx_ring->rx_buf))); /* Zero out the descriptor ring */ size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc), @@ -568,7 +560,6 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; - rx_ring->first_desc = 0; rx_ring->next_to_use = 0; } @@ -580,26 +571,20 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) */ void ice_free_rx_ring(struct ice_rx_ring *rx_ring) { + struct device *dev = ice_pf_to_dev(rx_ring->vsi->back); u32 size; ice_clean_rx_ring(rx_ring); - if (rx_ring->vsi->type == ICE_VSI_PF) - if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) - xdp_rxq_info_unreg(&rx_ring->xdp_rxq); WRITE_ONCE(rx_ring->xdp_prog, NULL); if (rx_ring->xsk_pool) { kfree(rx_ring->xdp_buf); rx_ring->xdp_buf = NULL; - } else { - kfree(rx_ring->rx_buf); - rx_ring->rx_buf = NULL; } if (rx_ring->desc) { size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc), PAGE_SIZE); - dmam_free_coherent(rx_ring->dev, size, - rx_ring->desc, rx_ring->dma); + dmam_free_coherent(dev, size, rx_ring->desc, rx_ring->dma); rx_ring->desc = NULL; } } @@ -612,19 +597,9 @@ void ice_free_rx_ring(struct ice_rx_ring *rx_ring) */ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) { - struct device *dev = rx_ring->dev; + struct device *dev = ice_pf_to_dev(rx_ring->vsi->back); u32 size; - if (!dev) - return -ENOMEM; - - /* warn if we are about to overwrite the pointer */ - WARN_ON(rx_ring->rx_buf); - rx_ring->rx_buf = - kcalloc(rx_ring->count, sizeof(*rx_ring->rx_buf), GFP_KERNEL); - if (!rx_ring->rx_buf) - return -ENOMEM; - /* round up to nearest page */ size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc), PAGE_SIZE); @@ -633,22 +608,16 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) if (!rx_ring->desc) { dev_err(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n", size); - goto err; + return -ENOMEM; } rx_ring->next_to_use = 0; rx_ring->next_to_clean = 0; - rx_ring->first_desc = 0; if (ice_is_xdp_ena_vsi(rx_ring->vsi)) WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog); return 0; - -err: - kfree(rx_ring->rx_buf); - rx_ring->rx_buf = NULL; - return -ENOMEM; } /** @@ -662,7 +631,7 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} */ static u32 -ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, +ice_run_xdp(struct ice_rx_ring *rx_ring, struct libeth_xdp_buff *xdp, struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring, union ice_32b_rx_flex_desc *eop_desc) { @@ -672,23 +641,23 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, if (!xdp_prog) goto exit; - ice_xdp_meta_set_desc(xdp, eop_desc); + xdp->desc = eop_desc; - act = bpf_prog_run_xdp(xdp_prog, xdp); + act = bpf_prog_run_xdp(xdp_prog, &xdp->base); switch (act) { case XDP_PASS: break; case XDP_TX: if (static_branch_unlikely(&ice_xdp_locking_key)) spin_lock(&xdp_ring->tx_lock); - ret = __ice_xmit_xdp_ring(xdp, xdp_ring, false); + ret = __ice_xmit_xdp_ring(&xdp->base, xdp_ring, false); if (static_branch_unlikely(&ice_xdp_locking_key)) spin_unlock(&xdp_ring->tx_lock); if (ret == ICE_XDP_CONSUMED) goto out_failure; break; case XDP_REDIRECT: - if (xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog)) + if (xdp_do_redirect(rx_ring->netdev, &xdp->base, xdp_prog)) goto out_failure; ret = ICE_XDP_REDIR; break; @@ -700,8 +669,10 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, trace_xdp_exception(rx_ring->netdev, xdp_prog, act); fallthrough; case XDP_DROP: + libeth_xdp_return_buff(xdp); ret = ICE_XDP_CONSUMED; } + exit: return ret; } @@ -789,48 +760,6 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, return nxmit; } -/** - * ice_alloc_mapped_page - recycle or make a new page - * @rx_ring: ring to use - * @bi: rx_buf struct to modify - * - * Returns true if the page was successfully allocated or - * reused. - */ -static bool -ice_alloc_mapped_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *bi) -{ - struct page *page = bi->page; - dma_addr_t dma; - - /* alloc new page for storage */ - page = dev_alloc_pages(ice_rx_pg_order(rx_ring)); - if (unlikely(!page)) { - rx_ring->ring_stats->rx_stats.alloc_page_failed++; - return false; - } - - /* map page for use */ - dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE, - DMA_FROM_DEVICE, ICE_RX_DMA_ATTR); - - /* if mapping failed free memory back to system since - * there isn't much point in holding memory we can't use - */ - if (dma_mapping_error(rx_ring->dev, dma)) { - __free_pages(page, ice_rx_pg_order(rx_ring)); - rx_ring->ring_stats->rx_stats.alloc_page_failed++; - return false; - } - - bi->dma = dma; - bi->page = page; - bi->page_offset = rx_ring->rx_offset; - page_ref_add(page, USHRT_MAX - 1); - - return true; -} - /** * ice_init_ctrl_rx_descs - Initialize Rx descriptors for control vsi. * @rx_ring: ring to init descriptors on @@ -877,9 +806,14 @@ void ice_init_ctrl_rx_descs(struct ice_rx_ring *rx_ring, u32 count) */ bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count) { + const struct libeth_fq_fp fq = { + .pp = rx_ring->pp, + .fqes = rx_ring->rx_fqes, + .truesize = rx_ring->truesize, + .count = rx_ring->count, + }; union ice_32b_rx_flex_desc *rx_desc; u16 ntu = rx_ring->next_to_use; - struct ice_rx_buf *bi; /* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) @@ -887,30 +821,25 @@ bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count) /* get the Rx descriptor and buffer based on next_to_use */ rx_desc = ICE_RX_DESC(rx_ring, ntu); - bi = &rx_ring->rx_buf[ntu]; do { - /* if we fail here, we have work remaining */ - if (!ice_alloc_mapped_page(rx_ring, bi)) - break; + dma_addr_t addr; - /* sync the buffer for use by the device */ - dma_sync_single_range_for_device(rx_ring->dev, bi->dma, - bi->page_offset, - ICE_RXBUF_3072, - DMA_FROM_DEVICE); + addr = libeth_rx_alloc(&fq, ntu); + if (addr == DMA_MAPPING_ERROR) { + rx_ring->ring_stats->rx_stats.alloc_page_failed++; + break; + } /* Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ - rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); + rx_desc->read.pkt_addr = cpu_to_le64(addr); rx_desc++; - bi++; ntu++; if (unlikely(ntu == rx_ring->count)) { rx_desc = ICE_RX_DESC(rx_ring, 0); - bi = rx_ring->rx_buf; ntu = 0; } @@ -926,205 +855,6 @@ bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count) return !!cleaned_count; } -/** - * ice_add_xdp_frag - Add contents of Rx buffer to xdp buf as a frag - * @rx_ring: Rx descriptor ring to transact packets on - * @xdp: xdp buff to place the data into - * @rx_buf: buffer containing page to add - * @size: packet length from rx_desc - * - * This function will add the data contained in rx_buf->page to the xdp buf. - * It will just attach the page as a frag. - */ -static int -ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, - struct ice_rx_buf *rx_buf, const unsigned int size) -{ - struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); - - if (!size) - return 0; - - if (!xdp_buff_has_frags(xdp)) { - sinfo->nr_frags = 0; - sinfo->xdp_frags_size = 0; - xdp_buff_set_frags_flag(xdp); - } - - if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) - return -ENOMEM; - - __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page, - rx_buf->page_offset, size); - sinfo->xdp_frags_size += size; - - if (page_is_pfmemalloc(rx_buf->page)) - xdp_buff_set_frag_pfmemalloc(xdp); - - return 0; -} - -/** - * ice_get_rx_buf - Fetch Rx buffer and synchronize data for use - * @rx_ring: Rx descriptor ring to transact packets on - * @size: size of buffer to add to skb - * @ntc: index of next to clean element - * - * This function will pull an Rx buffer from the ring and synchronize it - * for use by the CPU. - */ -static struct ice_rx_buf * -ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, - const unsigned int ntc) -{ - struct ice_rx_buf *rx_buf; - - rx_buf = &rx_ring->rx_buf[ntc]; - prefetchw(rx_buf->page); - - if (!size) - return rx_buf; - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma, - rx_buf->page_offset, size, - DMA_FROM_DEVICE); - - return rx_buf; -} - -/** - * ice_get_pgcnts - grab page_count() for gathered fragments - * @rx_ring: Rx descriptor ring to store the page counts on - * @ntc: the next to clean element (not included in this frame!) - * - * This function is intended to be called right before running XDP - * program so that the page recycling mechanism will be able to take - * a correct decision regarding underlying pages; this is done in such - * way as XDP program can change the refcount of page - */ -static void ice_get_pgcnts(struct ice_rx_ring *rx_ring, unsigned int ntc) -{ - u32 idx = rx_ring->first_desc; - struct ice_rx_buf *rx_buf; - u32 cnt = rx_ring->count; - - while (idx != ntc) { - rx_buf = &rx_ring->rx_buf[idx]; - rx_buf->pgcnt = page_count(rx_buf->page); - - if (++idx == cnt) - idx = 0; - } -} - -/** - * ice_build_skb - Build skb around an existing buffer - * @rx_ring: Rx descriptor ring to transact packets on - * @xdp: xdp_buff pointing to the data - * - * This function builds an skb around an existing XDP buffer, taking care - * to set up the skb correctly and avoid any memcpy overhead. Driver has - * already combined frags (if any) to skb_shared_info. - */ -static struct sk_buff * -ice_build_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) -{ - u8 metasize = xdp->data - xdp->data_meta; - struct skb_shared_info *sinfo = NULL; - unsigned int nr_frags; - struct sk_buff *skb; - - if (unlikely(xdp_buff_has_frags(xdp))) { - sinfo = xdp_get_shared_info_from_buff(xdp); - nr_frags = sinfo->nr_frags; - } - - /* Prefetch first cache line of first page. If xdp->data_meta - * is unused, this points exactly as xdp->data, otherwise we - * likely have a consumer accessing first few bytes of meta - * data, and then actual data. - */ - net_prefetch(xdp->data_meta); - /* build an skb around the page buffer */ - skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz); - if (unlikely(!skb)) - return NULL; - - /* must to record Rx queue, otherwise OS features such as - * symmetric queue won't work - */ - skb_record_rx_queue(skb, rx_ring->q_index); - - /* update pointers within the skb to store the data */ - skb_reserve(skb, xdp->data - xdp->data_hard_start); - __skb_put(skb, xdp->data_end - xdp->data); - if (metasize) - skb_metadata_set(skb, metasize); - - if (unlikely(xdp_buff_has_frags(xdp))) - xdp_update_skb_frags_info(skb, nr_frags, sinfo->xdp_frags_size, - nr_frags * xdp->frame_sz, - xdp_buff_get_skb_flags(xdp)); - - return skb; -} - -/** - * ice_put_rx_buf - Clean up used buffer and either recycle or free - * @rx_ring: Rx descriptor ring to transact packets on - * @rx_buf: Rx buffer to pull data from - * - * This function will clean up the contents of the rx_buf. It will either - * recycle the buffer or unmap it and free the associated resources. - */ -static void -ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf) -{ - if (!rx_buf) - return; - - /* we are not reusing the buffer so unmap it */ - dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma, - PAGE_SIZE, DMA_FROM_DEVICE, - ICE_RX_DMA_ATTR); - - /* clear contents of buffer_info */ - rx_buf->page = NULL; -} - -/** - * ice_put_rx_mbuf - ice_put_rx_buf() caller, for all buffers in frame - * @rx_ring: Rx ring with all the auxiliary data - * @xdp: XDP buffer carrying linear + frags part - * @ntc: the next to clean element (not included in this frame!) - * @verdict: return code from XDP program execution - * - * Called after XDP program is completed, or on error with verdict set to - * ICE_XDP_CONSUMED. - * - * Walk through buffers from first_desc to the end of the frame, releasing - * buffers and satisfying internal page recycle mechanism. The action depends - * on verdict from XDP program. - */ -static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, - u32 ntc, u32 verdict) -{ - u32 idx = rx_ring->first_desc; - u32 cnt = rx_ring->count; - struct ice_rx_buf *buf; - - while (idx != ntc) { - buf = &rx_ring->rx_buf[idx]; - if (++idx == cnt) - idx = 0; - - ice_put_rx_buf(rx_ring, buf); - } - - xdp->data = NULL; - rx_ring->first_desc = ntc; -} - /** * ice_clean_ctrl_rx_irq - Clean descriptors from flow director Rx ring * @rx_ring: Rx descriptor ring for ctrl_vsi to transact packets on @@ -1159,9 +889,8 @@ void ice_clean_ctrl_rx_irq(struct ice_rx_ring *rx_ring) total_rx_pkts++; } - rx_ring->first_desc = ntc; rx_ring->next_to_clean = ntc; - ice_init_ctrl_rx_descs(rx_ring, ICE_RX_DESC_UNUSED(rx_ring)); + ice_init_ctrl_rx_descs(rx_ring, ICE_DESC_UNUSED(rx_ring)); } /** @@ -1179,16 +908,17 @@ void ice_clean_ctrl_rx_irq(struct ice_rx_ring *rx_ring) static int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_pkts = 0; - unsigned int offset = rx_ring->rx_offset; - struct xdp_buff *xdp = &rx_ring->xdp; struct ice_tx_ring *xdp_ring = NULL; struct bpf_prog *xdp_prog = NULL; u32 ntc = rx_ring->next_to_clean; + LIBETH_XDP_ONSTACK_BUFF(xdp); u32 cached_ntu, xdp_verdict; u32 cnt = rx_ring->count; u32 xdp_xmit = 0; bool failure; + libeth_xdp_init_buff(xdp, &rx_ring->xdp, &rx_ring->xdp_rxq); + xdp_prog = READ_ONCE(rx_ring->xdp_prog); if (xdp_prog) { xdp_ring = rx_ring->xdp_ring; @@ -1198,7 +928,7 @@ static int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) /* start the loop to process Rx packets bounded by 'budget' */ while (likely(total_rx_pkts < (unsigned int)budget)) { union ice_32b_rx_flex_desc *rx_desc; - struct ice_rx_buf *rx_buf; + struct libeth_fqe *rx_buf; struct sk_buff *skb; unsigned int size; u16 stat_err_bits; @@ -1227,65 +957,47 @@ static int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) size = le16_to_cpu(rx_desc->wb.pkt_len) & ICE_RX_FLX_DESC_PKT_LEN_M; + stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S); + if (unlikely(ice_test_staterr(rx_desc->wb.status_error0, + stat_err_bits))) + size = 0; + /* retrieve a buffer from the ring */ - rx_buf = ice_get_rx_buf(rx_ring, size, ntc); + rx_buf = &rx_ring->rx_fqes[ntc]; + libeth_xdp_process_buff(xdp, rx_buf, size); - /* Increment ntc before calls to ice_put_rx_mbuf() */ if (++ntc == cnt) ntc = 0; - if (!xdp->data) { - void *hard_start; - - hard_start = page_address(rx_buf->page) + rx_buf->page_offset - - offset; - xdp_prepare_buff(xdp, hard_start, offset, size, !!offset); - xdp_buff_clear_frags_flag(xdp); - } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) { - ice_put_rx_mbuf(rx_ring, xdp, ntc, ICE_XDP_CONSUMED); - break; - } - /* skip if it is NOP desc */ - if (ice_is_non_eop(rx_ring, rx_desc)) + if (ice_is_non_eop(rx_ring, rx_desc) || unlikely(!xdp->data)) continue; - ice_get_pgcnts(rx_ring, ntc); xdp_verdict = ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_desc); if (xdp_verdict == ICE_XDP_PASS) goto construct_skb; - total_rx_bytes += xdp_get_buff_len(xdp); - total_rx_pkts++; - ice_put_rx_mbuf(rx_ring, xdp, ntc, xdp_verdict); - xdp_xmit |= xdp_verdict & (ICE_XDP_TX | ICE_XDP_REDIR); + if (xdp_verdict & (ICE_XDP_TX | ICE_XDP_REDIR)) + xdp_xmit |= xdp_verdict; + total_rx_bytes += xdp_get_buff_len(&xdp->base); + total_rx_pkts++; + xdp->data = NULL; continue; + construct_skb: - skb = ice_build_skb(rx_ring, xdp); + skb = xdp_build_skb_from_buff(&xdp->base); + xdp->data = NULL; + /* exit if we failed to retrieve a buffer */ if (!skb) { + libeth_xdp_return_buff_slow(xdp); rx_ring->ring_stats->rx_stats.alloc_buf_failed++; - xdp_verdict = ICE_XDP_CONSUMED; - } - ice_put_rx_mbuf(rx_ring, xdp, ntc, xdp_verdict); - - if (!skb) - break; - - stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S); - if (unlikely(ice_test_staterr(rx_desc->wb.status_error0, - stat_err_bits))) { - dev_kfree_skb_any(skb); continue; } vlan_tci = ice_get_vlan_tci(rx_desc); - /* pad the skb if needed, to make a valid ethernet frame */ - if (eth_skb_pad(skb)) - continue; - /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; @@ -1302,11 +1014,13 @@ static int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) rx_ring->next_to_clean = ntc; /* return up to cleaned_count buffers to hardware */ - failure = ice_alloc_rx_bufs(rx_ring, ICE_RX_DESC_UNUSED(rx_ring)); + failure = ice_alloc_rx_bufs(rx_ring, ICE_DESC_UNUSED(rx_ring)); if (xdp_xmit) ice_finalize_xdp_rx(xdp_ring, xdp_xmit, cached_ntu); + libeth_xdp_save_buff(&rx_ring->xdp, xdp); + if (rx_ring->ring_stats) ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 3c7830f787de8..e97a38ef3fe7e 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -4,6 +4,8 @@ #ifndef _ICE_TXRX_H_ #define _ICE_TXRX_H_ +#include + #include "ice_type.h" #define ICE_DFLT_IRQ_WORK 256 @@ -27,8 +29,6 @@ #define ICE_MAX_TXQ_PER_TXQG 128 -#define ICE_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) - /* We are assuming that the cache line is always 64 Bytes here for ice. * In order to make sure that is a correct assumption there is a check in probe * to print a warning if the read from GLPCI_CNF2 tells us that the cache line @@ -48,10 +48,6 @@ (u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ (R)->next_to_clean - (R)->next_to_use - 1) -#define ICE_RX_DESC_UNUSED(R) \ - ((((R)->first_desc > (R)->next_to_use) ? 0 : (R)->count) + \ - (R)->first_desc - (R)->next_to_use - 1) - #define ICE_RING_QUARTER(R) ((R)->count >> 2) #define ICE_TX_FLAGS_TSO BIT(0) @@ -133,13 +129,6 @@ struct ice_tx_offload_params { u8 header_len; }; -struct ice_rx_buf { - dma_addr_t dma; - struct page *page; - unsigned int page_offset; - unsigned int pgcnt; -}; - struct ice_q_stats { u64 pkts; u64 bytes; @@ -197,15 +186,6 @@ struct ice_pkt_ctx { __be16 vlan_proto; }; -struct ice_xdp_buff { - struct xdp_buff xdp_buff; - const union ice_32b_rx_flex_desc *eop_desc; - const struct ice_pkt_ctx *pkt_ctx; -}; - -/* Required for compatibility with xdp_buffs from xsk_pool */ -static_assert(offsetof(struct ice_xdp_buff, xdp_buff) == 0); - /* indices into GLINT_ITR registers */ #define ICE_RX_ITR ICE_IDX_ITR0 #define ICE_TX_ITR ICE_IDX_ITR1 @@ -258,7 +238,7 @@ struct ice_tstamp_ring { struct ice_rx_ring { /* CL1 - 1st cacheline starts here */ void *desc; /* Descriptor ring memory */ - struct device *dev; /* Used for DMA mapping */ + struct page_pool *pp; struct net_device *netdev; /* netdev ring maps to */ struct ice_vsi *vsi; /* Backreference to associated VSI */ struct ice_q_vector *q_vector; /* Backreference to associated vector */ @@ -270,14 +250,16 @@ struct ice_rx_ring { u16 next_to_alloc; union { - struct ice_rx_buf *rx_buf; + struct libeth_fqe *rx_fqes; struct xdp_buff **xdp_buf; }; + /* CL2 - 2nd cacheline starts here */ union { - struct ice_xdp_buff xdp_ext; - struct xdp_buff xdp; + struct libeth_xdp_buff_stash xdp; + struct libeth_xdp_buff *xsk; }; + /* CL3 - 3rd cacheline starts here */ union { struct ice_pkt_ctx pkt_ctx; @@ -287,12 +269,11 @@ struct ice_rx_ring { }; }; struct bpf_prog *xdp_prog; - u16 rx_offset; /* used in interrupt processing */ u16 next_to_use; u16 next_to_clean; - u16 first_desc; + u32 truesize; /* stats structs */ struct ice_ring_stats *ring_stats; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 45cfaabc41cbe..956da38d63b00 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -3,6 +3,7 @@ #include #include +#include #include "ice_txrx_lib.h" #include "ice_eswitch.h" @@ -230,9 +231,12 @@ ice_process_skb_fields(struct ice_rx_ring *rx_ring, if (ice_is_port_repr_netdev(netdev)) ice_repr_inc_rx_stats(netdev, skb->len); + + /* __skb_push() is needed because xdp_build_skb_from_buff() + * calls eth_type_trans() + */ + __skb_push(skb, ETH_HLEN); skb->protocol = eth_type_trans(skb, netdev); - } else { - skb->protocol = eth_type_trans(skb, rx_ring->netdev); } ice_rx_csum(rx_ring, skb, rx_desc, ptype); @@ -270,19 +274,18 @@ static void ice_clean_xdp_tx_buf(struct device *dev, struct ice_tx_buf *tx_buf, struct xdp_frame_bulk *bq) { - dma_unmap_single(dev, dma_unmap_addr(tx_buf, dma), - dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); - dma_unmap_len_set(tx_buf, len, 0); - switch (tx_buf->type) { case ICE_TX_BUF_XDP_TX: - page_frag_free(tx_buf->raw_buf); + libeth_xdp_return_va(tx_buf->raw_buf, true); break; case ICE_TX_BUF_XDP_XMIT: + dma_unmap_single(dev, dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); xdp_return_frame_bulk(tx_buf->xdpf, bq); break; } + dma_unmap_len_set(tx_buf, len, 0); tx_buf->type = ICE_TX_BUF_EMPTY; } @@ -377,9 +380,11 @@ int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf; u32 cnt = xdp_ring->count; void *data = xdp->data; + struct page *page; u32 nr_frags = 0; u32 free_space; u32 frag = 0; + u32 offset; free_space = ICE_DESC_UNUSED(xdp_ring); if (free_space < ICE_RING_QUARTER(xdp_ring)) @@ -399,24 +404,28 @@ int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring, tx_head = &xdp_ring->tx_buf[ntu]; tx_buf = tx_head; + page = virt_to_page(data); + offset = offset_in_page(xdp->data); + for (;;) { dma_addr_t dma; - dma = dma_map_single(dev, data, size, DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma)) - goto dma_unmap; - - /* record length, and DMA address */ - dma_unmap_len_set(tx_buf, len, size); - dma_unmap_addr_set(tx_buf, dma, dma); - if (frame) { + dma = dma_map_single(dev, data, size, DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma)) + goto dma_unmap; tx_buf->type = ICE_TX_BUF_FRAG; } else { + dma = page_pool_get_dma_addr(page) + offset; + dma_sync_single_for_device(dev, dma, size, DMA_BIDIRECTIONAL); tx_buf->type = ICE_TX_BUF_XDP_TX; tx_buf->raw_buf = data; } + /* record length, and DMA address */ + dma_unmap_len_set(tx_buf, len, size); + dma_unmap_addr_set(tx_buf, dma, dma); + tx_desc->buf_addr = cpu_to_le64(dma); tx_desc->cmd_type_offset_bsz = ice_build_ctob(0, 0, size, 0); @@ -430,6 +439,8 @@ int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring, tx_desc = ICE_TX_DESC(xdp_ring, ntu); tx_buf = &xdp_ring->tx_buf[ntu]; + page = skb_frag_page(&sinfo->frags[frag]); + offset = skb_frag_off(&sinfo->frags[frag]); data = skb_frag_address(&sinfo->frags[frag]); size = skb_frag_size(&sinfo->frags[frag]); frag++; @@ -514,10 +525,13 @@ void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res, */ static int ice_xdp_rx_hw_ts(const struct xdp_md *ctx, u64 *ts_ns) { - const struct ice_xdp_buff *xdp_ext = (void *)ctx; + const struct libeth_xdp_buff *xdp_ext = (void *)ctx; + struct ice_rx_ring *rx_ring; - *ts_ns = ice_ptp_get_rx_hwts(xdp_ext->eop_desc, - xdp_ext->pkt_ctx); + rx_ring = libeth_xdp_buff_to_rq(xdp_ext, typeof(*rx_ring), xdp_rxq); + + *ts_ns = ice_ptp_get_rx_hwts(xdp_ext->desc, + &rx_ring->pkt_ctx); if (!*ts_ns) return -ENODATA; @@ -545,10 +559,10 @@ ice_xdp_rx_hash_type(const union ice_32b_rx_flex_desc *eop_desc) static int ice_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, enum xdp_rss_hash_type *rss_type) { - const struct ice_xdp_buff *xdp_ext = (void *)ctx; + const struct libeth_xdp_buff *xdp_ext = (void *)ctx; - *hash = ice_get_rx_hash(xdp_ext->eop_desc); - *rss_type = ice_xdp_rx_hash_type(xdp_ext->eop_desc); + *hash = ice_get_rx_hash(xdp_ext->desc); + *rss_type = ice_xdp_rx_hash_type(xdp_ext->desc); if (!likely(*hash)) return -ENODATA; @@ -567,13 +581,16 @@ static int ice_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, static int ice_xdp_rx_vlan_tag(const struct xdp_md *ctx, __be16 *vlan_proto, u16 *vlan_tci) { - const struct ice_xdp_buff *xdp_ext = (void *)ctx; + const struct libeth_xdp_buff *xdp_ext = (void *)ctx; + struct ice_rx_ring *rx_ring; + + rx_ring = libeth_xdp_buff_to_rq(xdp_ext, typeof(*rx_ring), xdp_rxq); - *vlan_proto = xdp_ext->pkt_ctx->vlan_proto; + *vlan_proto = rx_ring->pkt_ctx.vlan_proto; if (!*vlan_proto) return -ENODATA; - *vlan_tci = ice_get_vlan_tci(xdp_ext->eop_desc); + *vlan_tci = ice_get_vlan_tci(xdp_ext->desc); if (!*vlan_tci) return -ENODATA; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index 99717730f21a7..6a3f10f7a53f4 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -135,13 +135,4 @@ ice_process_skb_fields(struct ice_rx_ring *rx_ring, void ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tci); -static inline void -ice_xdp_meta_set_desc(struct xdp_buff *xdp, - union ice_32b_rx_flex_desc *eop_desc) -{ - struct ice_xdp_buff *xdp_ext = container_of(xdp, struct ice_xdp_buff, - xdp_buff); - - xdp_ext->eop_desc = eop_desc; -} #endif /* !_ICE_TXRX_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index b25bc5ba40abf..989ff1fd91103 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -3,6 +3,7 @@ #include #include +#include #include #include #include "ice.h" @@ -169,50 +170,18 @@ ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid) * If allocation was successful, substitute buffer with allocated one. * Returns 0 on success, negative on failure */ -static int +int ice_realloc_rx_xdp_bufs(struct ice_rx_ring *rx_ring, bool pool_present) { - size_t elem_size = pool_present ? sizeof(*rx_ring->xdp_buf) : - sizeof(*rx_ring->rx_buf); - void *sw_ring = kcalloc(rx_ring->count, elem_size, GFP_KERNEL); - - if (!sw_ring) - return -ENOMEM; - if (pool_present) { - kfree(rx_ring->rx_buf); - rx_ring->rx_buf = NULL; - rx_ring->xdp_buf = sw_ring; + rx_ring->xdp_buf = kcalloc(rx_ring->count, + sizeof(*rx_ring->xdp_buf), + GFP_KERNEL); + if (!rx_ring->xdp_buf) + return -ENOMEM; } else { kfree(rx_ring->xdp_buf); rx_ring->xdp_buf = NULL; - rx_ring->rx_buf = sw_ring; - } - - return 0; -} - -/** - * ice_realloc_zc_buf - reallocate XDP ZC queue pairs - * @vsi: Current VSI - * @zc: is zero copy set - * - * Reallocate buffer for rx_rings that might be used by XSK. - * XDP requires more memory, than rx_buf provides. - * Returns 0 on success, negative on failure - */ -int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc) -{ - struct ice_rx_ring *rx_ring; - uint i; - - ice_for_each_rxq(vsi, i) { - rx_ring = vsi->rx_rings[i]; - if (!rx_ring->xsk_pool) - continue; - - if (ice_realloc_rx_xdp_bufs(rx_ring, zc)) - return -ENOMEM; } return 0; @@ -228,6 +197,7 @@ int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc) */ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid) { + struct ice_rx_ring *rx_ring = vsi->rx_rings[qid]; bool if_running, pool_present = !!pool; int ret = 0, pool_failure = 0; @@ -241,8 +211,6 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid) ice_is_xdp_ena_vsi(vsi); if (if_running) { - struct ice_rx_ring *rx_ring = vsi->rx_rings[qid]; - ret = ice_qp_dis(vsi, qid); if (ret) { netdev_err(vsi->netdev, "ice_qp_dis error = %d\n", ret); @@ -303,11 +271,6 @@ static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp, rx_desc->read.pkt_addr = cpu_to_le64(dma); rx_desc->wb.status_error0 = 0; - /* Put private info that changes on a per-packet basis - * into xdp_buff_xsk->cb. - */ - ice_xdp_meta_set_desc(*xdp, rx_desc); - rx_desc++; xdp++; } @@ -606,10 +569,10 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, struct xsk_buff_pool *xsk_pool, int budget) { + struct xdp_buff *first = (struct xdp_buff *)rx_ring->xsk; unsigned int total_rx_bytes = 0, total_rx_packets = 0; u32 ntc = rx_ring->next_to_clean; u32 ntu = rx_ring->next_to_use; - struct xdp_buff *first = NULL; struct ice_tx_ring *xdp_ring; unsigned int xdp_xmit = 0; struct bpf_prog *xdp_prog; @@ -623,9 +586,6 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, xdp_prog = READ_ONCE(rx_ring->xdp_prog); xdp_ring = rx_ring->xdp_ring; - if (ntc != rx_ring->first_desc) - first = *ice_xdp_buf(rx_ring, rx_ring->first_desc); - while (likely(total_rx_packets < (unsigned int)budget)) { union ice_32b_rx_flex_desc *rx_desc; unsigned int size, xdp_res = 0; @@ -661,15 +621,17 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, first = xdp; } else if (likely(size) && !xsk_buff_add_frag(first, xdp)) { xsk_buff_free(first); - break; + first = NULL; } if (++ntc == cnt) ntc = 0; - if (ice_is_non_eop(rx_ring, rx_desc)) + if (ice_is_non_eop(rx_ring, rx_desc) || unlikely(!first)) continue; + ((struct libeth_xdp_buff *)first)->desc = rx_desc; + xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring, xsk_pool); if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) { @@ -677,7 +639,6 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, } else if (xdp_res == ICE_XDP_EXIT) { failure = true; first = NULL; - rx_ring->first_desc = ntc; break; } else if (xdp_res == ICE_XDP_CONSUMED) { xsk_buff_free(first); @@ -689,7 +650,6 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, total_rx_packets++; first = NULL; - rx_ring->first_desc = ntc; continue; construct_skb: @@ -697,12 +657,14 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, skb = xdp_build_skb_from_zc(first); if (!skb) { xsk_buff_free(first); + first = NULL; + rx_ring->ring_stats->rx_stats.alloc_buf_failed++; - break; + continue; } first = NULL; - rx_ring->first_desc = ntc; + total_rx_bytes += skb->len; total_rx_packets++; @@ -713,7 +675,9 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, } rx_ring->next_to_clean = ntc; - entries_to_alloc = ICE_RX_DESC_UNUSED(rx_ring); + rx_ring->xsk = (struct libeth_xdp_buff *)first; + + entries_to_alloc = ICE_DESC_UNUSED(rx_ring); if (entries_to_alloc > ICE_RING_QUARTER(rx_ring)) failure |= !ice_alloc_rx_bufs_zc(rx_ring, xsk_pool, entries_to_alloc); diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h index 600cbeeaa2030..5275fcedc9e1d 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.h +++ b/drivers/net/ethernet/intel/ice/ice_xsk.h @@ -22,7 +22,7 @@ bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi); void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring); void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring); bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, struct xsk_buff_pool *xsk_pool); -int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc); +int ice_realloc_rx_xdp_bufs(struct ice_rx_ring *rx_ring, bool pool_present); void ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector, u16 qid); void ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector, @@ -77,8 +77,8 @@ static inline void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring) { } static inline void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring) { } static inline int -ice_realloc_zc_buf(struct ice_vsi __always_unused *vsi, - bool __always_unused zc) +ice_realloc_rx_xdp_bufs(struct ice_rx_ring *rx_ring, + bool __always_unused pool_present) { return 0; } From 8adfcfd6a2eedbe4007ad6732bed829f41ec720f Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 6 Oct 2025 18:20:53 +0200 Subject: [PATCH 357/867] ice: implement configurable header split for regular Rx Add second page_pool for header buffers to each Rx queue and ability to toggle the header split on/off using Ethtool (default to off to match the current behaviour). Unlike idpf, all HW backed up by ice doesn't require any W/As and correctly splits all types of packets as configured: after L4 headers for TCP/UDP/SCTP, after L3 headers for other IPv4/IPv6 frames, after the Ethernet header otherwise (in case of tunneling, same as above, but after innermost headers). This doesn't affect the XSk path as there are no benefits of having it there. Signed-off-by: Alexander Lobakin Tested-by: Alexander Nowlin Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 1 + drivers/net/ethernet/intel/ice/ice_base.c | 89 +++++++++++++++---- drivers/net/ethernet/intel/ice/ice_ethtool.c | 15 +++- .../net/ethernet/intel/ice/ice_lan_tx_rx.h | 3 + drivers/net/ethernet/intel/ice/ice_txrx.c | 89 +++++++++++++++---- drivers/net/ethernet/intel/ice/ice_txrx.h | 7 ++ 6 files changed, 168 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 3d4d8b88631b2..147aaee192a79 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -351,6 +351,7 @@ struct ice_vsi { u16 num_q_vectors; /* tell if only dynamic irq allocation is allowed */ bool irq_dyn_alloc; + bool hsplit:1; u16 vsi_num; /* HW (absolute) index of this VSI */ u16 idx; /* software index in pf->vsi[] */ diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index eabab50fab33d..eadb1e3d12b3a 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -524,8 +524,29 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) else rlan_ctx.l2tsel = 1; - rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT; - rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT; + if (ring->hdr_pp) { + rlan_ctx.hbuf = ring->rx_hdr_len >> ICE_RLAN_CTX_HBUF_S; + rlan_ctx.dtype = ICE_RX_DTYPE_HEADER_SPLIT; + + /* + * If the frame is TCP/UDP/SCTP, it will be split by the + * payload. + * If not, but it's an IPv4/IPv6 frame, it will be split by + * the IP header. + * If not IP, it will be split by the Ethernet header. + * + * In any case, the header buffer will never be left empty. + */ + rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_SPLIT_L2 | + ICE_RLAN_RX_HSPLIT_0_SPLIT_IP | + ICE_RLAN_RX_HSPLIT_0_SPLIT_TCP_UDP | + ICE_RLAN_RX_HSPLIT_0_SPLIT_SCTP; + } else { + rlan_ctx.hbuf = 0; + rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT; + rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT; + } + rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT; /* This controls whether VLAN is stripped from inner headers @@ -581,6 +602,53 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) return 0; } +static int ice_rxq_pp_create(struct ice_rx_ring *rq) +{ + struct libeth_fq fq = { + .count = rq->count, + .nid = NUMA_NO_NODE, + .hsplit = rq->vsi->hsplit, + .xdp = ice_is_xdp_ena_vsi(rq->vsi), + .buf_len = LIBIE_MAX_RX_BUF_LEN, + }; + int err; + + err = libeth_rx_fq_create(&fq, &rq->q_vector->napi); + if (err) + return err; + + rq->pp = fq.pp; + rq->rx_fqes = fq.fqes; + rq->truesize = fq.truesize; + rq->rx_buf_len = fq.buf_len; + + if (!fq.hsplit) + return 0; + + fq = (struct libeth_fq){ + .count = rq->count, + .type = LIBETH_FQE_HDR, + .nid = NUMA_NO_NODE, + .xdp = ice_is_xdp_ena_vsi(rq->vsi), + }; + + err = libeth_rx_fq_create(&fq, &rq->q_vector->napi); + if (err) + goto destroy; + + rq->hdr_pp = fq.pp; + rq->hdr_fqes = fq.fqes; + rq->hdr_truesize = fq.truesize; + rq->rx_hdr_len = fq.buf_len; + + return 0; + +destroy: + ice_rxq_pp_destroy(rq); + + return err; +} + /** * ice_vsi_cfg_rxq - Configure an Rx queue * @ring: the ring being configured @@ -589,12 +657,6 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) */ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) { - struct libeth_fq fq = { - .count = ring->count, - .nid = NUMA_NO_NODE, - .xdp = ice_is_xdp_ena_vsi(ring->vsi), - .buf_len = LIBIE_MAX_RX_BUF_LEN, - }; struct device *dev = ice_pf_to_dev(ring->vsi->back); u32 num_bufs = ICE_DESC_UNUSED(ring); u32 rx_buf_len; @@ -636,15 +698,10 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n", ring->q_index); } else { - err = libeth_rx_fq_create(&fq, &ring->q_vector->napi); + err = ice_rxq_pp_create(ring); if (err) return err; - ring->pp = fq.pp; - ring->rx_fqes = fq.fqes; - ring->truesize = fq.truesize; - ring->rx_buf_len = fq.buf_len; - if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->q_index, @@ -699,9 +756,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) return 0; err_destroy_fq: - libeth_rx_fq_destroy(&fq); - ring->rx_fqes = NULL; - ring->pp = NULL; + ice_rxq_pp_destroy(ring); return err; } diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 36fdac4fddc3c..a1d9abee97e5f 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3151,6 +3151,10 @@ ice_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, ring->rx_jumbo_max_pending = 0; ring->rx_mini_pending = 0; ring->rx_jumbo_pending = 0; + + kernel_ring->tcp_data_split = vsi->hsplit ? + ETHTOOL_TCP_DATA_SPLIT_ENABLED : + ETHTOOL_TCP_DATA_SPLIT_DISABLED; } static int @@ -3167,6 +3171,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, int i, timeout = 50, err = 0; struct ice_hw *hw = &pf->hw; u16 new_rx_cnt, new_tx_cnt; + bool hsplit; if (ring->tx_pending > ICE_MAX_NUM_DESC_BY_MAC(hw) || ring->tx_pending < ICE_MIN_NUM_DESC || @@ -3192,9 +3197,12 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, netdev_info(netdev, "Requested Rx descriptor count rounded up to %d\n", new_rx_cnt); + hsplit = kernel_ring->tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED; + /* if nothing to do return success */ if (new_tx_cnt == vsi->tx_rings[0]->count && - new_rx_cnt == vsi->rx_rings[0]->count) { + new_rx_cnt == vsi->rx_rings[0]->count && + hsplit == vsi->hsplit) { netdev_dbg(netdev, "Nothing to change, descriptor count is same as requested\n"); return 0; } @@ -3224,6 +3232,8 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, vsi->xdp_rings[i]->count = new_tx_cnt; vsi->num_tx_desc = (u16)new_tx_cnt; vsi->num_rx_desc = (u16)new_rx_cnt; + vsi->hsplit = hsplit; + netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n"); goto done; } @@ -3330,6 +3340,8 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, } process_link: + vsi->hsplit = hsplit; + /* Bring interface down, copy in the new ring info, then restore the * interface. if VSI is up, bring it down and then back up */ @@ -4811,6 +4823,7 @@ static const struct ethtool_ops ice_ethtool_ops = { ETHTOOL_COALESCE_USE_ADAPTIVE | ETHTOOL_COALESCE_RX_USECS_HIGH, .supported_input_xfrm = RXH_XFRM_SYM_XOR, + .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT, .get_link_ksettings = ice_get_link_ksettings, .set_link_ksettings = ice_set_link_ksettings, .get_fec_stats = ice_get_fec_stats, diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index 10c312d49e052..185672c7e17d0 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -342,6 +342,9 @@ enum ice_flg64_bits { /* for ice_32byte_rx_flex_desc.pkt_length member */ #define ICE_RX_FLX_DESC_PKT_LEN_M (0x3FFF) /* 14-bits */ +/* ice_32byte_rx_flex_desc::hdr_len_sph_flex_flags1 */ +#define ICE_RX_FLEX_DESC_HDR_LEN_M GENMASK(10, 0) + enum ice_rx_flex_desc_status_error_0_bits { /* Note: These are predefined bit offsets */ ICE_RX_FLEX_DESC_STATUS0_DD_S = 0, diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 5a966138eacfc..ad76768a42323 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -508,16 +508,34 @@ int ice_setup_tx_ring(struct ice_tx_ring *tx_ring) return -ENOMEM; } +void ice_rxq_pp_destroy(struct ice_rx_ring *rq) +{ + struct libeth_fq fq = { + .fqes = rq->rx_fqes, + .pp = rq->pp, + }; + + libeth_rx_fq_destroy(&fq); + rq->rx_fqes = NULL; + rq->pp = NULL; + + if (!rq->hdr_pp) + return; + + fq.fqes = rq->hdr_fqes; + fq.pp = rq->hdr_pp; + + libeth_rx_fq_destroy(&fq); + rq->hdr_fqes = NULL; + rq->hdr_pp = NULL; +} + /** * ice_clean_rx_ring - Free Rx buffers * @rx_ring: ring to be cleaned */ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) { - struct libeth_fq fq = { - .fqes = rx_ring->rx_fqes, - .pp = rx_ring->pp, - }; u32 size; if (rx_ring->xsk_pool) { @@ -533,9 +551,10 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) /* Free all the Rx ring sk_buffs */ for (u32 i = rx_ring->next_to_clean; i != rx_ring->next_to_use; ) { - const struct libeth_fqe *rx_fqes = &rx_ring->rx_fqes[i]; + libeth_rx_recycle_slow(rx_ring->rx_fqes[i].netmem); - libeth_rx_recycle_slow(rx_fqes->netmem); + if (rx_ring->hdr_pp) + libeth_rx_recycle_slow(rx_ring->hdr_fqes[i].netmem); if (unlikely(++i == rx_ring->count)) i = 0; @@ -547,12 +566,9 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) xdp_rxq_info_unreg(&rx_ring->xdp_rxq); } - libeth_rx_fq_destroy(&fq); - rx_ring->rx_fqes = NULL; - rx_ring->pp = NULL; + ice_rxq_pp_destroy(rx_ring); rx_skip_free: - /* Zero out the descriptor ring */ size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc), PAGE_SIZE); @@ -806,6 +822,12 @@ void ice_init_ctrl_rx_descs(struct ice_rx_ring *rx_ring, u32 count) */ bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count) { + const struct libeth_fq_fp hdr_fq = { + .pp = rx_ring->hdr_pp, + .fqes = rx_ring->hdr_fqes, + .truesize = rx_ring->hdr_truesize, + .count = rx_ring->count, + }; const struct libeth_fq_fp fq = { .pp = rx_ring->pp, .fqes = rx_ring->rx_fqes, @@ -836,6 +858,20 @@ bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count) */ rx_desc->read.pkt_addr = cpu_to_le64(addr); + if (!hdr_fq.pp) + goto next; + + addr = libeth_rx_alloc(&hdr_fq, ntu); + if (addr == DMA_MAPPING_ERROR) { + rx_ring->ring_stats->rx_stats.alloc_page_failed++; + + libeth_rx_recycle_slow(fq.fqes[ntu].netmem); + break; + } + + rx_desc->read.hdr_addr = cpu_to_le64(addr); + +next: rx_desc++; ntu++; if (unlikely(ntu == rx_ring->count)) { @@ -933,14 +969,16 @@ static int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) unsigned int size; u16 stat_err_bits; u16 vlan_tci; + bool rxe; /* get the Rx desc from Rx ring based on 'next_to_clean' */ rx_desc = ICE_RX_DESC(rx_ring, ntc); - /* status_error_len will always be zero for unused descriptors - * because it's cleared in cleanup, and overlaps with hdr_addr - * which is always zero because packet split isn't used, if the - * hardware wrote DD then it will be non-zero + /* + * The DD bit will always be zero for unused descriptors + * because it's cleared in cleanup or when setting the DMA + * address of the header buffer, which never uses the DD bit. + * If the hardware wrote the descriptor, it will be non-zero. */ stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S); if (!ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits)) @@ -954,12 +992,27 @@ static int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) ice_trace(clean_rx_irq, rx_ring, rx_desc); + stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_HBO_S) | + BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S); + rxe = ice_test_staterr(rx_desc->wb.status_error0, + stat_err_bits); + + if (!rx_ring->hdr_pp) + goto payload; + + size = le16_get_bits(rx_desc->wb.hdr_len_sph_flex_flags1, + ICE_RX_FLEX_DESC_HDR_LEN_M); + if (unlikely(rxe)) + size = 0; + + rx_buf = &rx_ring->hdr_fqes[ntc]; + libeth_xdp_process_buff(xdp, rx_buf, size); + rx_buf->netmem = 0; + +payload: size = le16_to_cpu(rx_desc->wb.pkt_len) & ICE_RX_FLX_DESC_PKT_LEN_M; - - stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S); - if (unlikely(ice_test_staterr(rx_desc->wb.status_error0, - stat_err_bits))) + if (unlikely(rxe)) size = 0; /* retrieve a buffer from the ring */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index e97a38ef3fe7e..e440c55d9e9f0 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -255,6 +255,9 @@ struct ice_rx_ring { }; /* CL2 - 2nd cacheline starts here */ + struct libeth_fqe *hdr_fqes; + struct page_pool *hdr_pp; + union { struct libeth_xdp_buff_stash xdp; struct libeth_xdp_buff *xsk; @@ -273,6 +276,8 @@ struct ice_rx_ring { /* used in interrupt processing */ u16 next_to_use; u16 next_to_clean; + + u32 hdr_truesize; u32 truesize; /* stats structs */ @@ -284,6 +289,7 @@ struct ice_rx_ring { struct ice_tx_ring *xdp_ring; struct ice_rx_ring *next; /* pointer to next ring in q_vector */ struct xsk_buff_pool *xsk_pool; + u16 rx_hdr_len; u16 rx_buf_len; dma_addr_t dma; /* physical address of ring */ u8 dcb_tc; /* Traffic class of ring */ @@ -396,6 +402,7 @@ static inline unsigned int ice_rx_pg_order(struct ice_rx_ring *ring) union ice_32b_rx_flex_desc; void ice_init_ctrl_rx_descs(struct ice_rx_ring *rx_ring, u32 num_descs); +void ice_rxq_pp_destroy(struct ice_rx_ring *rq); bool ice_alloc_rx_bufs(struct ice_rx_ring *rxr, unsigned int cleaned_count); netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev); u16 From ba2807b869a16d36b8bc76efa774fe433b3c45de Mon Sep 17 00:00:00 2001 From: Grzegorz Nitka Date: Mon, 24 Feb 2025 21:59:24 +0100 Subject: [PATCH 358/867] ice: Allow 100M speed for E825C SGMII device Add E825C 10GbE SGMII device to the list of devices supporting 100Mbit link mode. Without that change, 100Mbit link mode is ignored in ethtool interface. This change was missed while adding the support for E825C devices family. Testing hints (please note, for previous version, 100baseT/Full entry was missing): [root@localhost]# ethtool eth3 Settings for eth3: Supported ports: [ TP ] Supported link modes: 100baseT/Full 1000baseT/Full 10000baseT/Full Supported pause frame use: Symmetric Supports auto-negotiation: Yes Supported FEC modes: None Advertised link modes: 100baseT/Full 1000baseT/Full 10000baseT/Full ... Signed-off-by: Grzegorz Nitka Reviewed-by: Aleksandr Loktionov Reviewed-by: Paul Menzel Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index b097cc8b175cb..83f5217bce9f6 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -3392,6 +3392,7 @@ bool ice_is_100m_speed_supported(struct ice_hw *hw) case ICE_DEV_ID_E822L_SGMII: case ICE_DEV_ID_E823L_1GBE: case ICE_DEV_ID_E823C_SGMII: + case ICE_DEV_ID_E825C_SGMII: return true; default: return false; From a7ae783da0b919550e260aebfca1c6ef030b99a4 Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Wed, 22 Oct 2025 18:11:43 +0200 Subject: [PATCH 359/867] i40e: avoid redundant VF link state updates Multiple sources can request VF link state changes with identical parameters. For example, OpenStack Neutron may request to set the VF link state to IFLA_VF_LINK_STATE_AUTO during every initialization or user can issue: `ip link set vf 0 state auto` multiple times. Currently, the i40e driver processes each of these requests, even if the requested state is the same as the current one. This leads to unnecessary VF resets and can cause performance degradation or instability in the VF driver, particularly in environment using Data Plane Development Kit (DPDK). With this patch i40e will skip VF link state change requests when the desired link state matches the current configuration. This prevents unnecessary VF resets and reduces PF-VF communication overhead. To reproduce the problem run following command multiple times on the same interface: 'ip link set vf 0 state auto' Every time command is executed, PF driver will trigger VF reset. Co-developed-by: Robert Malz Signed-off-by: Robert Malz Signed-off-by: Jay Vosburgh Reviewed-by: Aleksandr Loktionov Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 081a4526a2f00..0fe0d52c796b3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -4788,6 +4788,7 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link) unsigned long q_map; struct i40e_vf *vf; int abs_vf_id; + int old_link; int ret = 0; int tmp; @@ -4806,6 +4807,17 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link) vf = &pf->vf[vf_id]; abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id; + /* skip VF link state change if requested state is already set */ + if (!vf->link_forced) + old_link = IFLA_VF_LINK_STATE_AUTO; + else if (vf->link_up) + old_link = IFLA_VF_LINK_STATE_ENABLE; + else + old_link = IFLA_VF_LINK_STATE_DISABLE; + + if (link == old_link) + goto error_out; + pfe.event = VIRTCHNL_EVENT_LINK_CHANGE; pfe.severity = PF_EVENT_SEVERITY_INFO; From 5d9b400e6f7e2bfb2bd80709e03a6db42e34076b Mon Sep 17 00:00:00 2001 From: Sreedevi Joshi Date: Fri, 19 Sep 2025 11:59:05 -0500 Subject: [PATCH 360/867] idpf: remove duplicate defines in IDPF_CAP_RSS Remove duplicate defines from the OR operation when defining IDPF_CAP_RSS. Duplicate definitions were introduced when IDPF_CAP_RSS was originally defined and were left behind and went unnoticed during a previous commit that renamed them. Review of the original out-of-tree code confirms these duplicates were the result of a typing error. Remove the duplicates to clean up the code and avoid potential confusion. Also verify no other duplicate occurrences of these defines exist elsewhere in the codebase. Reviewed-by: Aleksandr Loktionov Reviewed-by: Przemek Kitszel Signed-off-by: Sreedevi Joshi Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index ca4da0c899794..50fa7be0c00d4 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -734,13 +734,11 @@ static inline bool idpf_is_rdma_cap_ena(struct idpf_adapter *adapter) } #define IDPF_CAP_RSS (\ - VIRTCHNL2_FLOW_IPV4_TCP |\ VIRTCHNL2_FLOW_IPV4_TCP |\ VIRTCHNL2_FLOW_IPV4_UDP |\ VIRTCHNL2_FLOW_IPV4_SCTP |\ VIRTCHNL2_FLOW_IPV4_OTHER |\ VIRTCHNL2_FLOW_IPV6_TCP |\ - VIRTCHNL2_FLOW_IPV6_TCP |\ VIRTCHNL2_FLOW_IPV6_UDP |\ VIRTCHNL2_FLOW_IPV6_SCTP |\ VIRTCHNL2_FLOW_IPV6_OTHER) From 6ef670d833a8f3b142cb80739135c56ade8a25b0 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Tue, 21 Oct 2025 12:12:14 -0700 Subject: [PATCH 361/867] ixgbe: fix typos in ixgbe driver comments Corrected function reference: - "proc_autoc_read_82599" -> "prot_autoc_read_82599" Fixed spelling of: - "big-enian" -> "big-endian" - "Virtualiztion" -> "Virtualization" Signed-off-by: Alok Tiwari Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c | 4 ++-- drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c index d5b1b974b4a33..3069b583fd81a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c @@ -198,7 +198,7 @@ static int prot_autoc_read_82599(struct ixgbe_hw *hw, bool *locked, * @hw: pointer to hardware structure * @autoc: value to write to AUTOC * @locked: bool to indicate whether the SW/FW lock was already taken by - * previous proc_autoc_read_82599. + * previous prot_autoc_read_82599. * * This part (82599) may need to hold a the SW/FW lock around all writes to * AUTOC. Likewise after a write we need to do a pipeline reset. @@ -1622,7 +1622,7 @@ int ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, break; } - /* store source and destination IP masks (big-enian) */ + /* store source and destination IP masks (big-endian) */ IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIP4M, ~input_mask->formatted.src_ip[0]); IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRDIP4M, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index 170a29d162c65..a1d04914fbbca 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -318,7 +318,7 @@ static int ixgbe_xdp_queues(struct ixgbe_adapter *adapter) * ixgbe_set_dcb_sriov_queues: Allocate queues for SR-IOV devices w/ DCB * @adapter: board private structure to initialize * - * When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues + * When SR-IOV (Single Root IO Virtualization) is enabled, allocate queues * and VM pools where appropriate. Also assign queues based on DCB * priorities and map accordingly.. * @@ -492,7 +492,7 @@ static bool ixgbe_set_dcb_queues(struct ixgbe_adapter *adapter) * ixgbe_set_sriov_queues - Allocate queues for SR-IOV devices * @adapter: board private structure to initialize * - * When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues + * When SR-IOV (Single Root IO Virtualization) is enabled, allocate queues * and VM pools where appropriate. If RSS is available, then also try and * enable RSS and map accordingly. * From 9157b8a88c0bd769808caaecce4b8c96bd826304 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Tue, 21 Oct 2025 12:32:01 -0700 Subject: [PATCH 362/867] igbvf: fix misplaced newline in VLAN add warning message Corrected the dev_warn format string: - "Vlan id %d\n is not added" -> "Vlan id %d is not added\n" Signed-off-by: Alok Tiwari Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igbvf/netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 61dfcd8cb370a..ac57212ab02bd 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -1235,7 +1235,7 @@ static int igbvf_vlan_rx_add_vid(struct net_device *netdev, spin_lock_bh(&hw->mbx_lock); if (hw->mac.ops.set_vfta(hw, vid, true)) { - dev_warn(&adapter->pdev->dev, "Vlan id %d\n is not added", vid); + dev_warn(&adapter->pdev->dev, "Vlan id %d is not added\n", vid); spin_unlock_bh(&hw->mbx_lock); return -EINVAL; } From f58abec23da51f03ffe25f9464f2ce2d1a2a592a Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Sat, 25 Oct 2025 17:26:37 +0800 Subject: [PATCH 363/867] net: ipv4: Remove extern udp_v4_early_demux()/tcp_v4_early_demux() in .c files Function udp_v4_early_demux() was already declared in 'include/net/udp.h', no need to keep the extern in 'ip_input.c', which may produce the following checkpatch warning: WARNING: externs should be avoided in .c files #45: FILE: net/ipv4/ip_input.c:322: +enum skb_drop_reason udp_v4_early_demux(struct sk_buff *skb); Replace it by including 'net/udp.h'. Do the same for tcp_v4_early_demux(). Signed-off-by: Wang Liang Link: https://patch.msgid.link/20251025092637.1020960-1-wangliang74@huawei.com Signed-off-by: Jakub Kicinski --- net/ipv4/ip_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 273578579a6b0..19d3141dad1f8 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -141,6 +141,8 @@ #include #include #include +#include +#include /* * Process Router Attention IP option (RFC 2113) @@ -317,8 +319,6 @@ static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph, ip_hdr(hint)->tos == iph->tos; } -int tcp_v4_early_demux(struct sk_buff *skb); -enum skb_drop_reason udp_v4_early_demux(struct sk_buff *skb); static int ip_rcv_finish_core(struct net *net, struct sk_buff *skb, struct net_device *dev, const struct sk_buff *hint) From fc18b6e98cce2380e8f31b0e5089b11ceecb541d Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 29 Oct 2025 00:03:10 +0000 Subject: [PATCH 364/867] net: stmmac: move version handling into own function Move the version handling out of stmmac_hwif_init() and into its own function, returning the version information through a structure. Reviewed-by: Andrew Lunn Tested-by: Maxime Chevallier Tested-by: Mohd Ayaan Anwar Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vDteg-0000000CCBr-2m7q@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/hwif.c | 42 +++++++++++++++------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 41a7e18412276..4924e74997e4a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -13,6 +13,11 @@ #include "dwmac4_descs.h" #include "dwxgmac2.h" +struct stmmac_version { + u8 snpsver; + u8 dev_id; +}; + static u32 stmmac_get_id(struct stmmac_priv *priv, u32 id_reg) { u32 reg = readl(priv->ioaddr + id_reg); @@ -40,6 +45,24 @@ static u32 stmmac_get_dev_id(struct stmmac_priv *priv, u32 id_reg) return (reg & GENMASK(15, 8)) >> 8; } +static void stmmac_get_version(struct stmmac_priv *priv, + struct stmmac_version *ver) +{ + enum dwmac_core_type core_type = priv->plat->core_type; + + ver->dev_id = 0; + + if (core_type == DWMAC_CORE_GMAC) { + ver->snpsver = stmmac_get_id(priv, GMAC_VERSION); + } else if (dwmac_is_xmac(core_type)) { + ver->snpsver = stmmac_get_id(priv, GMAC4_VERSION); + if (core_type == DWMAC_CORE_XGMAC) + ver->dev_id = stmmac_get_dev_id(priv, GMAC4_VERSION); + } else { + ver->snpsver = 0; + } +} + static void stmmac_dwmac_mode_quirk(struct stmmac_priv *priv) { struct mac_device_info *mac = priv->hw; @@ -292,23 +315,15 @@ int stmmac_hwif_init(struct stmmac_priv *priv) { enum dwmac_core_type core_type = priv->plat->core_type; const struct stmmac_hwif_entry *entry; + struct stmmac_version version; struct mac_device_info *mac; bool needs_setup = true; - u32 id, dev_id = 0; int i, ret; - if (core_type == DWMAC_CORE_GMAC) { - id = stmmac_get_id(priv, GMAC_VERSION); - } else if (dwmac_is_xmac(core_type)) { - id = stmmac_get_id(priv, GMAC4_VERSION); - if (core_type == DWMAC_CORE_XGMAC) - dev_id = stmmac_get_dev_id(priv, GMAC4_VERSION); - } else { - id = 0; - } + stmmac_get_version(priv, &version); /* Save ID for later use */ - priv->synopsys_id = id; + priv->synopsys_id = version.snpsver; /* Lets assume some safe values first */ if (core_type == DWMAC_CORE_GMAC4) { @@ -344,7 +359,8 @@ int stmmac_hwif_init(struct stmmac_priv *priv) /* Use synopsys_id var because some setups can override this */ if (priv->synopsys_id < entry->min_id) continue; - if (core_type == DWMAC_CORE_XGMAC && (dev_id ^ entry->dev_id)) + if (core_type == DWMAC_CORE_XGMAC && + (version.dev_id ^ entry->dev_id)) continue; /* Only use generic HW helpers if needed */ @@ -380,7 +396,7 @@ int stmmac_hwif_init(struct stmmac_priv *priv) } dev_err(priv->device, "Failed to find HW IF (id=0x%x, gmac=%d/%d)\n", - id, core_type == DWMAC_CORE_GMAC, + version.snpsver, core_type == DWMAC_CORE_GMAC, core_type == DWMAC_CORE_GMAC4); return -EINVAL; } From f49838f77cf6c50961afa33a5bc1ea95e061d7f0 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 29 Oct 2025 00:03:15 +0000 Subject: [PATCH 365/867] net: stmmac: simplify stmmac_get_version() We can simplify stmmac_get_version() by pre-initialising the version members to zero, detecting the MAC100 core and returning, otherwise determining the version register offset separately from calling stmmac_get_id() and stmmac_get_dev_id(). Do this. Reviewed-by: Andrew Lunn Tested-by: Maxime Chevallier Tested-by: Mohd Ayaan Anwar Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vDtel-0000000CCBx-3Lpf@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/hwif.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 4924e74997e4a..f6ada5a905fe0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -49,18 +49,22 @@ static void stmmac_get_version(struct stmmac_priv *priv, struct stmmac_version *ver) { enum dwmac_core_type core_type = priv->plat->core_type; + unsigned int version_offset; + ver->snpsver = 0; ver->dev_id = 0; - if (core_type == DWMAC_CORE_GMAC) { - ver->snpsver = stmmac_get_id(priv, GMAC_VERSION); - } else if (dwmac_is_xmac(core_type)) { - ver->snpsver = stmmac_get_id(priv, GMAC4_VERSION); - if (core_type == DWMAC_CORE_XGMAC) - ver->dev_id = stmmac_get_dev_id(priv, GMAC4_VERSION); - } else { - ver->snpsver = 0; - } + if (core_type == DWMAC_CORE_MAC100) + return; + + if (core_type == DWMAC_CORE_GMAC) + version_offset = GMAC_VERSION; + else + version_offset = GMAC4_VERSION; + + ver->snpsver = stmmac_get_id(priv, version_offset); + if (core_type == DWMAC_CORE_XGMAC) + ver->dev_id = stmmac_get_dev_id(priv, version_offset); } static void stmmac_dwmac_mode_quirk(struct stmmac_priv *priv) From c36b97e4ca773f20ff20db34ed97e35cc36ad97c Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 29 Oct 2025 00:03:20 +0000 Subject: [PATCH 366/867] net: stmmac: consolidate version reading and validation There is no need to read the version register twice, once in stmmac_get_id() and then again in stmmac_get_dev_id(). Consolidate this into stmmac_get_version() and pass each of these this value. As both functions unnecessarily issue the same warning for a zero register value, also move this into stmmac_get_version(). Reviewed-by: Andrew Lunn Tested-by: Maxime Chevallier Tested-by: Mohd Ayaan Anwar Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vDteq-0000000CCC3-3zbJ@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/hwif.c | 29 ++++++++-------------- 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index f6ada5a905fe0..ffdc101ce3ce4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -18,30 +18,16 @@ struct stmmac_version { u8 dev_id; }; -static u32 stmmac_get_id(struct stmmac_priv *priv, u32 id_reg) +static u32 stmmac_get_id(struct stmmac_priv *priv, u32 reg) { - u32 reg = readl(priv->ioaddr + id_reg); - - if (!reg) { - dev_info(priv->device, "Version ID not available\n"); - return 0x0; - } - dev_info(priv->device, "User ID: 0x%x, Synopsys ID: 0x%x\n", (unsigned int)(reg & GENMASK(15, 8)) >> 8, (unsigned int)(reg & GENMASK(7, 0))); return reg & GENMASK(7, 0); } -static u32 stmmac_get_dev_id(struct stmmac_priv *priv, u32 id_reg) +static u32 stmmac_get_dev_id(struct stmmac_priv *priv, u32 reg) { - u32 reg = readl(priv->ioaddr + id_reg); - - if (!reg) { - dev_info(priv->device, "Version ID not available\n"); - return 0x0; - } - return (reg & GENMASK(15, 8)) >> 8; } @@ -50,6 +36,7 @@ static void stmmac_get_version(struct stmmac_priv *priv, { enum dwmac_core_type core_type = priv->plat->core_type; unsigned int version_offset; + u32 version; ver->snpsver = 0; ver->dev_id = 0; @@ -62,9 +49,15 @@ static void stmmac_get_version(struct stmmac_priv *priv, else version_offset = GMAC4_VERSION; - ver->snpsver = stmmac_get_id(priv, version_offset); + version = readl(priv->ioaddr + version_offset); + if (version == 0) { + dev_info(priv->device, "Version ID not available\n"); + return; + } + + ver->snpsver = stmmac_get_id(priv, version); if (core_type == DWMAC_CORE_XGMAC) - ver->dev_id = stmmac_get_dev_id(priv, version_offset); + ver->dev_id = stmmac_get_dev_id(priv, version); } static void stmmac_dwmac_mode_quirk(struct stmmac_priv *priv) From 7b2e41fff76f53b53e7e00222674e3d87cbd4a67 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 29 Oct 2025 00:03:26 +0000 Subject: [PATCH 367/867] net: stmmac: move stmmac_get_*id() into stmmac_get_version() Move the contents of both stmmac_get_id() and stmmac_get_dev_id() into stmmac_get_version() as it no longer makes sense for these to be separate functions. Reviewed-by: Andrew Lunn Tested-by: Maxime Chevallier Tested-by: Mohd Ayaan Anwar Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vDtew-0000000CCC9-0KeM@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/hwif.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index ffdc101ce3ce4..a4df51a7aef11 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -18,19 +18,6 @@ struct stmmac_version { u8 dev_id; }; -static u32 stmmac_get_id(struct stmmac_priv *priv, u32 reg) -{ - dev_info(priv->device, "User ID: 0x%x, Synopsys ID: 0x%x\n", - (unsigned int)(reg & GENMASK(15, 8)) >> 8, - (unsigned int)(reg & GENMASK(7, 0))); - return reg & GENMASK(7, 0); -} - -static u32 stmmac_get_dev_id(struct stmmac_priv *priv, u32 reg) -{ - return (reg & GENMASK(15, 8)) >> 8; -} - static void stmmac_get_version(struct stmmac_priv *priv, struct stmmac_version *ver) { @@ -55,9 +42,13 @@ static void stmmac_get_version(struct stmmac_priv *priv, return; } - ver->snpsver = stmmac_get_id(priv, version); + dev_info(priv->device, "User ID: 0x%x, Synopsys ID: 0x%x\n", + (unsigned int)(version & GENMASK(15, 8)) >> 8, + (unsigned int)(version & GENMASK(7, 0))); + + ver->snpsver = version & GENMASK(7, 0); if (core_type == DWMAC_CORE_XGMAC) - ver->dev_id = stmmac_get_dev_id(priv, version); + ver->dev_id = (version & GENMASK(15, 8)) >> 8; } static void stmmac_dwmac_mode_quirk(struct stmmac_priv *priv) From b2fe9e29b5f65aa5ad87e859966871061eb37303 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 29 Oct 2025 00:03:31 +0000 Subject: [PATCH 368/867] net: stmmac: use FIELD_GET() for version register Provide field definitions in common.h, and use these with FIELD_GET() to extract the fields from the version register. Reviewed-by: Andrew Lunn Tested-by: Maxime Chevallier Tested-by: Mohd Ayaan Anwar Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vDtf1-0000000CCCF-0uUV@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/common.h | 3 +++ drivers/net/ethernet/stmicro/stmmac/hwif.c | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 553a8897b005b..27083af545683 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -26,6 +26,9 @@ #include "hwif.h" #include "mmc.h" +#define DWMAC_SNPSVER GENMASK_U32(7, 0) +#define DWMAC_USERVER GENMASK_U32(15, 8) + /* Synopsys Core versions */ #define DWMAC_CORE_3_40 0x34 #define DWMAC_CORE_3_50 0x35 diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index a4df51a7aef11..26cc1bc758bfb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -43,12 +43,12 @@ static void stmmac_get_version(struct stmmac_priv *priv, } dev_info(priv->device, "User ID: 0x%x, Synopsys ID: 0x%x\n", - (unsigned int)(version & GENMASK(15, 8)) >> 8, - (unsigned int)(version & GENMASK(7, 0))); + FIELD_GET(DWMAC_USERVER, version), + FIELD_GET(DWMAC_SNPSVER, version)); - ver->snpsver = version & GENMASK(7, 0); + ver->snpsver = FIELD_GET(DWMAC_SNPSVER, version); if (core_type == DWMAC_CORE_XGMAC) - ver->dev_id = (version & GENMASK(15, 8)) >> 8; + ver->dev_id = FIELD_GET(DWMAC_USERVER, version); } static void stmmac_dwmac_mode_quirk(struct stmmac_priv *priv) From 7b510ea8e58eb0ead6bc41fd0f15ec064312dcbf Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 29 Oct 2025 00:03:36 +0000 Subject: [PATCH 369/867] net: stmmac: provide function to lookup hwif Provide a function to lookup the hwif entry given the core type, Synopsys version, and device ID (used for XGMAC cores). Reviewed-by: Andrew Lunn Tested-by: Maxime Chevallier Tested-by: Mohd Ayaan Anwar Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vDtf6-0000000CCCL-1cQA@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/hwif.c | 40 +++++++++++++++------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 26cc1bc758bfb..892cef79c4d10 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -299,6 +299,30 @@ static const struct stmmac_hwif_entry { }, }; +static const struct stmmac_hwif_entry * +stmmac_hwif_find(enum dwmac_core_type core_type, u8 snpsver, u8 dev_id) +{ + const struct stmmac_hwif_entry *entry; + int i; + + for (i = ARRAY_SIZE(stmmac_hw) - 1; i >= 0; i--) { + entry = &stmmac_hw[i]; + + if (core_type != entry->core_type) + continue; + /* Use synopsys_id var because some setups can override this */ + if (snpsver < entry->min_id) + continue; + if (core_type == DWMAC_CORE_XGMAC && + (dev_id ^ entry->dev_id)) + continue; + + return entry; + } + + return NULL; +} + int stmmac_hwif_init(struct stmmac_priv *priv) { enum dwmac_core_type core_type = priv->plat->core_type; @@ -306,7 +330,7 @@ int stmmac_hwif_init(struct stmmac_priv *priv) struct stmmac_version version; struct mac_device_info *mac; bool needs_setup = true; - int i, ret; + int ret; stmmac_get_version(priv, &version); @@ -339,18 +363,10 @@ int stmmac_hwif_init(struct stmmac_priv *priv) spin_lock_init(&mac->irq_ctrl_lock); /* Fallback to generic HW */ - for (i = ARRAY_SIZE(stmmac_hw) - 1; i >= 0; i--) { - entry = &stmmac_hw[i]; - - if (core_type != entry->core_type) - continue; - /* Use synopsys_id var because some setups can override this */ - if (priv->synopsys_id < entry->min_id) - continue; - if (core_type == DWMAC_CORE_XGMAC && - (version.dev_id ^ entry->dev_id)) - continue; + /* Use synopsys_id var because some setups can override this */ + entry = stmmac_hwif_find(core_type, priv->synopsys_id, version.dev_id); + if (entry) { /* Only use generic HW helpers if needed */ mac->desc = mac->desc ? : entry->desc; mac->dma = mac->dma ? : entry->dma; From f9326b139b4c77128564574ea2d7b83c8a758114 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 29 Oct 2025 00:03:41 +0000 Subject: [PATCH 370/867] net: stmmac: use != rather than ^ for comparing dev_id Use the more usual not-equals rather than exclusive-or operator when comparing the dev_id in stmmac_hwif_find(). Reviewed-by: Andrew Lunn Tested-by: Maxime Chevallier Tested-by: Mohd Ayaan Anwar Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vDtfB-0000000CCCR-25rr@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/hwif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 892cef79c4d10..e1f99b9d9d7f1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -314,7 +314,7 @@ stmmac_hwif_find(enum dwmac_core_type core_type, u8 snpsver, u8 dev_id) if (snpsver < entry->min_id) continue; if (core_type == DWMAC_CORE_XGMAC && - (dev_id ^ entry->dev_id)) + dev_id != entry->dev_id) continue; return entry; From 6436f408eb214bce9c5c308d51372a1611940dff Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 29 Oct 2025 00:03:46 +0000 Subject: [PATCH 371/867] net: stmmac: reorganise stmmac_hwif_init() Reorganise stmmac_hwif_init() to handle the error case of stmmac_hwif_find() in the indented block, which follows normal programming pattern. Reviewed-by: Andrew Lunn Tested-by: Maxime Chevallier Tested-by: Mohd Ayaan Anwar Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vDtfG-0000000CCCX-2YwQ@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/hwif.c | 72 ++++++++++++---------- 1 file changed, 38 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index e1f99b9d9d7f1..8212441f9826e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -366,41 +366,45 @@ int stmmac_hwif_init(struct stmmac_priv *priv) /* Use synopsys_id var because some setups can override this */ entry = stmmac_hwif_find(core_type, priv->synopsys_id, version.dev_id); - if (entry) { - /* Only use generic HW helpers if needed */ - mac->desc = mac->desc ? : entry->desc; - mac->dma = mac->dma ? : entry->dma; - mac->mac = mac->mac ? : entry->mac; - mac->ptp = mac->ptp ? : entry->hwtimestamp; - mac->mode = mac->mode ? : entry->mode; - mac->tc = mac->tc ? : entry->tc; - mac->mmc = mac->mmc ? : entry->mmc; - mac->est = mac->est ? : entry->est; - mac->vlan = mac->vlan ? : entry->vlan; - - priv->hw = mac; - priv->fpe_cfg.reg = entry->regs.fpe_reg; - priv->ptpaddr = priv->ioaddr + entry->regs.ptp_off; - priv->mmcaddr = priv->ioaddr + entry->regs.mmc_off; - memcpy(&priv->ptp_clock_ops, entry->ptp, - sizeof(struct ptp_clock_info)); - if (entry->est) - priv->estaddr = priv->ioaddr + entry->regs.est_off; - - /* Entry found */ - if (needs_setup) { - ret = entry->setup(priv); - if (ret) - return ret; - } + if (!entry) { + dev_err(priv->device, + "Failed to find HW IF (id=0x%x, gmac=%d/%d)\n", + version.snpsver, core_type == DWMAC_CORE_GMAC, + core_type == DWMAC_CORE_GMAC4); + + return -EINVAL; + } - /* Save quirks, if needed for posterior use */ - priv->hwif_quirks = entry->quirks; - return 0; + /* Only use generic HW helpers if needed */ + mac->desc = mac->desc ? : entry->desc; + mac->dma = mac->dma ? : entry->dma; + mac->mac = mac->mac ? : entry->mac; + mac->ptp = mac->ptp ? : entry->hwtimestamp; + mac->mode = mac->mode ? : entry->mode; + mac->tc = mac->tc ? : entry->tc; + mac->mmc = mac->mmc ? : entry->mmc; + mac->est = mac->est ? : entry->est; + mac->vlan = mac->vlan ? : entry->vlan; + + priv->hw = mac; + priv->fpe_cfg.reg = entry->regs.fpe_reg; + priv->ptpaddr = priv->ioaddr + entry->regs.ptp_off; + priv->mmcaddr = priv->ioaddr + entry->regs.mmc_off; + memcpy(&priv->ptp_clock_ops, entry->ptp, + sizeof(struct ptp_clock_info)); + + if (entry->est) + priv->estaddr = priv->ioaddr + entry->regs.est_off; + + /* Entry found */ + if (needs_setup) { + ret = entry->setup(priv); + if (ret) + return ret; } - dev_err(priv->device, "Failed to find HW IF (id=0x%x, gmac=%d/%d)\n", - version.snpsver, core_type == DWMAC_CORE_GMAC, - core_type == DWMAC_CORE_GMAC4); - return -EINVAL; + /* Save quirks, if needed for posterior use */ + priv->hwif_quirks = entry->quirks; + + return 0; } From afb8f6567a5b4bb4e673608048939fef854b8709 Mon Sep 17 00:00:00 2001 From: Ankit Khushwaha Date: Tue, 28 Oct 2025 22:59:47 +0530 Subject: [PATCH 372/867] selftest: net: fix socklen_t type mismatch in sctp_collision test Socket APIs like recvfrom(), accept(), and getsockname() expect socklen_t* arg, but tests were using int variables. This causes -Wpointer-sign warnings on platforms where socklen_t is unsigned. Change the variable type from int to socklen_t to resolve the warning and ensure type safety across platforms. warning fixed: sctp_collision.c:62:70: warning: passing 'int *' to parameter of type 'socklen_t *' (aka 'unsigned int *') converts between pointers to integer types with different sign [-Wpointer-sign] 62 | ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len); | ^~~~ /usr/include/sys/socket.h:165:27: note: passing argument to parameter '__addr_len' here 165 | socklen_t *__restrict __addr_len); | ^ Reviewed-by: Muhammad Usama Anjum Signed-off-by: Ankit Khushwaha Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251028172947.53153-1-ankitkhushwaha.linux@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/netfilter/sctp_collision.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/netfilter/sctp_collision.c b/tools/testing/selftests/net/netfilter/sctp_collision.c index 21bb1cfd8a856..b282d1785c9b0 100644 --- a/tools/testing/selftests/net/netfilter/sctp_collision.c +++ b/tools/testing/selftests/net/netfilter/sctp_collision.c @@ -9,9 +9,10 @@ int main(int argc, char *argv[]) { struct sockaddr_in saddr = {}, daddr = {}; - int sd, ret, len = sizeof(daddr); + socklen_t len = sizeof(daddr); struct timeval tv = {25, 0}; char buf[] = "hello"; + int sd, ret; if (argc != 6 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) { printf("%s \n", From b8a7826e4b1aab3fabb29cbf0b73da9993d356de Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 28 Oct 2025 03:58:23 +0000 Subject: [PATCH 373/867] net: sched: Don't use WARN_ON_ONCE() for -ENOMEM in tcf_classify(). As demonstrated by syzbot, WARN_ON_ONCE() in tcf_classify() can be easily triggered by fault injection. [0] We should not use WARN_ON_ONCE() for the simple -ENOMEM case. Also, we provide SKB_DROP_REASON_NOMEM for the same error. Let's remove WARN_ON_ONCE() there. [0]: FAULT_INJECTION: forcing a failure. name failslab, interval 1, probability 0, space 0, times 0 CPU: 0 UID: 0 PID: 31392 Comm: syz.8.7081 Not tainted syzkaller #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/02/2025 Call Trace: dump_stack_lvl+0x189/0x250 should_fail_ex+0x414/0x560 should_failslab+0xa8/0x100 kmem_cache_alloc_noprof+0x74/0x6e0 skb_ext_add+0x148/0x8f0 tcf_classify+0xeba/0x1140 multiq_enqueue+0xfd/0x4c0 net/sched/sch_multiq.c:66 ... WARNING: CPU: 0 PID: 31392 at net/sched/cls_api.c:1869 tcf_classify+0xfd7/0x1140 Modules linked in: CPU: 0 UID: 0 PID: 31392 Comm: syz.8.7081 Not tainted syzkaller #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/02/2025 RIP: 0010:tcf_classify+0xfd7/0x1140 Code: e8 03 42 0f b6 04 30 84 c0 0f 85 41 01 00 00 66 41 89 1f eb 05 e8 89 26 75 f8 bb ff ff ff ff e9 04 f9 ff ff e8 7a 26 75 f8 90 <0f> 0b 90 49 83 c5 44 4c 89 eb 49 c1 ed 03 43 0f b6 44 35 00 84 c0 RSP: 0018:ffffc9000b7671f0 EFLAGS: 00010293 RAX: ffffffff894addf6 RBX: 0000000000000002 RCX: ffff888025029e40 RDX: 0000000000000000 RSI: ffffffff8bbf05c0 RDI: ffffffff8bbf0580 RBP: 0000000000000000 R08: 00000000ffffffff R09: 1ffffffff1c0bfd6 R10: dffffc0000000000 R11: fffffbfff1c0bfd7 R12: ffff88805a90de5c R13: ffff88805a90ddc0 R14: dffffc0000000000 R15: ffffc9000b7672c0 FS: 00007f20739f66c0(0000) GS:ffff88812613e000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000110c2d2a80 CR3: 0000000024e36000 CR4: 00000000003526f0 Call Trace: multiq_classify net/sched/sch_multiq.c:39 [inline] multiq_enqueue+0xfd/0x4c0 net/sched/sch_multiq.c:66 dev_qdisc_enqueue+0x4e/0x260 net/core/dev.c:4118 __dev_xmit_skb net/core/dev.c:4214 [inline] __dev_queue_xmit+0xe83/0x3b50 net/core/dev.c:4729 packet_snd net/packet/af_packet.c:3076 [inline] packet_sendmsg+0x3e33/0x5080 net/packet/af_packet.c:3108 sock_sendmsg_nosec net/socket.c:727 [inline] __sock_sendmsg+0x21c/0x270 net/socket.c:742 ____sys_sendmsg+0x505/0x830 net/socket.c:2630 ___sys_sendmsg+0x21f/0x2a0 net/socket.c:2684 __sys_sendmsg net/socket.c:2716 [inline] __do_sys_sendmsg net/socket.c:2721 [inline] __se_sys_sendmsg net/socket.c:2719 [inline] __x64_sys_sendmsg+0x19b/0x260 net/socket.c:2719 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f207578efc9 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f20739f6038 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f20759e5fa0 RCX: 00007f207578efc9 RDX: 0000000000000004 RSI: 00002000000000c0 RDI: 0000000000000008 RBP: 00007f20739f6090 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 R13: 00007f20759e6038 R14: 00007f20759e5fa0 R15: 00007f2075b0fa28 Reported-by: syzbot+87e1289a044fcd0c5f62@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/69003e33.050a0220.32483.00e8.GAE@google.com/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Jamal Hadi Salim Link: https://patch.msgid.link/20251028035859.2067690-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/sched/cls_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index ecec0a1e1c1a0..f751cd5eeac8d 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1866,7 +1866,7 @@ int tcf_classify(struct sk_buff *skb, struct tc_skb_cb *cb = tc_skb_cb(skb); ext = tc_skb_ext_alloc(skb); - if (WARN_ON_ONCE(!ext)) { + if (!ext) { tcf_set_drop_reason(skb, SKB_DROP_REASON_NOMEM); return TC_ACT_SHOT; } From f0e7036fc9cb08bdfb27d64eee7fc003ba0bc2e5 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 27 Oct 2025 10:22:30 +0200 Subject: [PATCH 374/867] ipv4: icmp: Add RFC 5837 support Add the ability to append the incoming IP interface information to ICMPv4 error messages in accordance with RFC 5837 and RFC 4884. This is required for more meaningful traceroute results in unnumbered networks. The feature is disabled by default and controlled via a new sysctl ("net.ipv4.icmp_errors_extension_mask") which accepts a bitmask of ICMP extensions to append to ICMP error messages. Currently, only a single value is supported, but the interface and the implementation should be able to support more extensions, if needed. Clone the skb and copy the relevant data portions before modifying the skb as the caller of __icmp_send() still owns the skb after the function returns. This should be fine since by default ICMP error messages are rate limited to 1000 per second and no more than 1 per second per specific host. Trim or pad the packet to 128 bytes before appending the ICMP extension structure in order to be compatible with legacy applications that assume that the ICMP extension structure always starts at this offset (the minimum length specified by RFC 4884). Reviewed-by: Petr Machata Reviewed-by: David Ahern Reviewed-by: Willem de Bruijn Signed-off-by: Ido Schimmel Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251027082232.232571-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 17 +++ include/linux/icmp.h | 32 +++++ include/net/netns/ipv4.h | 1 + net/ipv4/icmp.c | 191 ++++++++++++++++++++++++- net/ipv4/sysctl_net_ipv4.c | 11 ++ 5 files changed, 251 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index a06cb99d66dcd..ece1187ba0f15 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1796,6 +1796,23 @@ icmp_errors_use_inbound_ifaddr - BOOLEAN Default: 0 (disabled) +icmp_errors_extension_mask - UNSIGNED INTEGER + Bitmask of ICMP extensions to append to ICMPv4 error messages + ("Destination Unreachable", "Time Exceeded" and "Parameter Problem"). + The original datagram is trimmed / padded to 128 bytes in order to be + compatible with applications that do not comply with RFC 4884. + + Possible extensions are: + + ==== ============================================================== + 0x01 Incoming IP interface information according to RFC 5837. + Extension will include the index, IPv4 address (if present), + name and MTU of the IP interface that received the datagram + which elicited the ICMP error. + ==== ============================================================== + + Default: 0x00 (no extensions) + igmp_max_memberships - INTEGER Change the maximum number of multicast groups we can subscribe to. Default: 20 diff --git a/include/linux/icmp.h b/include/linux/icmp.h index 0af4d210ee315..043ec5d9c8821 100644 --- a/include/linux/icmp.h +++ b/include/linux/icmp.h @@ -40,4 +40,36 @@ void ip_icmp_error_rfc4884(const struct sk_buff *skb, struct sock_ee_data_rfc4884 *out, int thlen, int off); +/* RFC 4884 */ +#define ICMP_EXT_ORIG_DGRAM_MIN_LEN 128 +#define ICMP_EXT_VERSION_2 2 + +/* ICMP Extension Object Classes */ +#define ICMP_EXT_OBJ_CLASS_IIO 2 /* RFC 5837 */ + +/* Interface Information Object - RFC 5837 */ +enum { + ICMP_EXT_CTYPE_IIO_ROLE_IIF, +}; + +#define ICMP_EXT_CTYPE_IIO_ROLE(ROLE) ((ROLE) << 6) +#define ICMP_EXT_CTYPE_IIO_MTU BIT(0) +#define ICMP_EXT_CTYPE_IIO_NAME BIT(1) +#define ICMP_EXT_CTYPE_IIO_IPADDR BIT(2) +#define ICMP_EXT_CTYPE_IIO_IFINDEX BIT(3) + +struct icmp_ext_iio_name_subobj { + u8 len; + char name[IFNAMSIZ]; +}; + +enum { + /* RFC 5837 - Incoming IP Interface Role */ + ICMP_ERR_EXT_IIO_IIF, + /* Add new constants above. Used by "icmp_errors_extension_mask" + * sysctl. + */ + ICMP_ERR_EXT_COUNT, +}; + #endif /* _LINUX_ICMP_H */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 34eb3aecb3f21..0e96c90e56c6d 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -135,6 +135,7 @@ struct netns_ipv4 { u8 sysctl_icmp_echo_ignore_broadcasts; u8 sysctl_icmp_ignore_bogus_error_responses; u8 sysctl_icmp_errors_use_inbound_ifaddr; + u8 sysctl_icmp_errors_extension_mask; int sysctl_icmp_ratelimit; int sysctl_icmp_ratemask; int sysctl_icmp_msgs_per_sec; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 1b7fb5d935edf..4abbec2f47ef5 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -582,6 +582,185 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, return ERR_PTR(err); } +struct icmp_ext_iio_addr4_subobj { + __be16 afi; + __be16 reserved; + __be32 addr4; +}; + +static unsigned int icmp_ext_iio_len(void) +{ + return sizeof(struct icmp_extobj_hdr) + + /* ifIndex */ + sizeof(__be32) + + /* Interface Address Sub-Object */ + sizeof(struct icmp_ext_iio_addr4_subobj) + + /* Interface Name Sub-Object. Length must be a multiple of 4 + * bytes. + */ + ALIGN(sizeof(struct icmp_ext_iio_name_subobj), 4) + + /* MTU */ + sizeof(__be32); +} + +static unsigned int icmp_ext_max_len(u8 ext_objs) +{ + unsigned int ext_max_len; + + ext_max_len = sizeof(struct icmp_ext_hdr); + + if (ext_objs & BIT(ICMP_ERR_EXT_IIO_IIF)) + ext_max_len += icmp_ext_iio_len(); + + return ext_max_len; +} + +static __be32 icmp_ext_iio_addr4_find(const struct net_device *dev) +{ + struct in_device *in_dev; + struct in_ifaddr *ifa; + + in_dev = __in_dev_get_rcu(dev); + if (!in_dev) + return 0; + + /* It is unclear from RFC 5837 which IP address should be chosen, but + * it makes sense to choose a global unicast address. + */ + in_dev_for_each_ifa_rcu(ifa, in_dev) { + if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY) + continue; + if (ifa->ifa_scope != RT_SCOPE_UNIVERSE || + ipv4_is_multicast(ifa->ifa_address)) + continue; + return ifa->ifa_address; + } + + return 0; +} + +static void icmp_ext_iio_iif_append(struct net *net, struct sk_buff *skb, + int iif) +{ + struct icmp_ext_iio_name_subobj *name_subobj; + struct icmp_extobj_hdr *objh; + struct net_device *dev; + __be32 data; + + if (!iif) + return; + + /* Add the fields in the order specified by RFC 5837. */ + objh = skb_put(skb, sizeof(*objh)); + objh->class_num = ICMP_EXT_OBJ_CLASS_IIO; + objh->class_type = ICMP_EXT_CTYPE_IIO_ROLE(ICMP_EXT_CTYPE_IIO_ROLE_IIF); + + data = htonl(iif); + skb_put_data(skb, &data, sizeof(__be32)); + objh->class_type |= ICMP_EXT_CTYPE_IIO_IFINDEX; + + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, iif); + if (!dev) + goto out; + + data = icmp_ext_iio_addr4_find(dev); + if (data) { + struct icmp_ext_iio_addr4_subobj *addr4_subobj; + + addr4_subobj = skb_put_zero(skb, sizeof(*addr4_subobj)); + addr4_subobj->afi = htons(ICMP_AFI_IP); + addr4_subobj->addr4 = data; + objh->class_type |= ICMP_EXT_CTYPE_IIO_IPADDR; + } + + name_subobj = skb_put_zero(skb, ALIGN(sizeof(*name_subobj), 4)); + name_subobj->len = ALIGN(sizeof(*name_subobj), 4); + netdev_copy_name(dev, name_subobj->name); + objh->class_type |= ICMP_EXT_CTYPE_IIO_NAME; + + data = htonl(READ_ONCE(dev->mtu)); + skb_put_data(skb, &data, sizeof(__be32)); + objh->class_type |= ICMP_EXT_CTYPE_IIO_MTU; + +out: + rcu_read_unlock(); + objh->length = htons(skb_tail_pointer(skb) - (unsigned char *)objh); +} + +static void icmp_ext_objs_append(struct net *net, struct sk_buff *skb, + u8 ext_objs, int iif) +{ + if (ext_objs & BIT(ICMP_ERR_EXT_IIO_IIF)) + icmp_ext_iio_iif_append(net, skb, iif); +} + +static struct sk_buff * +icmp_ext_append(struct net *net, struct sk_buff *skb_in, struct icmphdr *icmph, + unsigned int room, int iif) +{ + unsigned int payload_len, ext_max_len, ext_len; + struct icmp_ext_hdr *ext_hdr; + struct sk_buff *skb; + u8 ext_objs; + int nhoff; + + switch (icmph->type) { + case ICMP_DEST_UNREACH: + case ICMP_TIME_EXCEEDED: + case ICMP_PARAMETERPROB: + break; + default: + return NULL; + } + + ext_objs = READ_ONCE(net->ipv4.sysctl_icmp_errors_extension_mask); + if (!ext_objs) + return NULL; + + ext_max_len = icmp_ext_max_len(ext_objs); + if (ICMP_EXT_ORIG_DGRAM_MIN_LEN + ext_max_len > room) + return NULL; + + skb = skb_clone(skb_in, GFP_ATOMIC); + if (!skb) + return NULL; + + nhoff = skb_network_offset(skb); + payload_len = min(skb->len - nhoff, ICMP_EXT_ORIG_DGRAM_MIN_LEN); + + if (!pskb_network_may_pull(skb, payload_len)) + goto free_skb; + + if (pskb_trim(skb, nhoff + ICMP_EXT_ORIG_DGRAM_MIN_LEN) || + __skb_put_padto(skb, nhoff + ICMP_EXT_ORIG_DGRAM_MIN_LEN, false)) + goto free_skb; + + if (pskb_expand_head(skb, 0, ext_max_len, GFP_ATOMIC)) + goto free_skb; + + ext_hdr = skb_put_zero(skb, sizeof(*ext_hdr)); + ext_hdr->version = ICMP_EXT_VERSION_2; + + icmp_ext_objs_append(net, skb, ext_objs, iif); + + /* Do not send an empty extension structure. */ + ext_len = skb_tail_pointer(skb) - (unsigned char *)ext_hdr; + if (ext_len == sizeof(*ext_hdr)) + goto free_skb; + + ext_hdr->checksum = ip_compute_csum(ext_hdr, ext_len); + /* The length of the original datagram in 32-bit words (RFC 4884). */ + icmph->un.reserved[1] = ICMP_EXT_ORIG_DGRAM_MIN_LEN / sizeof(u32); + + return skb; + +free_skb: + consume_skb(skb); + return NULL; +} + /* * Send an ICMP message in response to a situation * @@ -601,6 +780,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, struct icmp_bxm icmp_param; struct rtable *rt = skb_rtable(skb_in); bool apply_ratelimit = false; + struct sk_buff *ext_skb; struct ipcm_cookie ipc; struct flowi4 fl4; __be32 saddr; @@ -770,7 +950,12 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, if (room <= (int)sizeof(struct iphdr)) goto ende; - icmp_param.data_len = skb_in->len - icmp_param.offset; + ext_skb = icmp_ext_append(net, skb_in, &icmp_param.data.icmph, room, + parm->iif); + if (ext_skb) + icmp_param.skb = ext_skb; + + icmp_param.data_len = icmp_param.skb->len - icmp_param.offset; if (icmp_param.data_len > room) icmp_param.data_len = room; icmp_param.head_len = sizeof(struct icmphdr); @@ -785,6 +970,9 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, trace_icmp_send(skb_in, type, code); icmp_push_reply(sk, &icmp_param, &fl4, &ipc, &rt); + + if (ext_skb) + consume_skb(ext_skb); ende: ip_rt_put(rt); out_unlock: @@ -1502,6 +1690,7 @@ static int __net_init icmp_sk_init(struct net *net) net->ipv4.sysctl_icmp_ratelimit = 1 * HZ; net->ipv4.sysctl_icmp_ratemask = 0x1818; net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0; + net->ipv4.sysctl_icmp_errors_extension_mask = 0; net->ipv4.sysctl_icmp_msgs_per_sec = 1000; net->ipv4.sysctl_icmp_msgs_burst = 50; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 24dbc603cc44d..0c7c8f9041cbf 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -48,6 +48,8 @@ static int tcp_plb_max_rounds = 31; static int tcp_plb_max_cong_thresh = 256; static unsigned int tcp_tw_reuse_delay_max = TCP_PAWS_MSL * MSEC_PER_SEC; static int tcp_ecn_mode_max = 2; +static u32 icmp_errors_extension_mask_all = + GENMASK_U8(ICMP_ERR_EXT_COUNT - 1, 0); /* obsolete */ static int sysctl_tcp_low_latency __read_mostly; @@ -674,6 +676,15 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE }, + { + .procname = "icmp_errors_extension_mask", + .data = &init_net.ipv4.sysctl_icmp_errors_extension_mask, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &icmp_errors_extension_mask_all, + }, { .procname = "icmp_ratelimit", .data = &init_net.ipv4.sysctl_icmp_ratelimit, From d12d04d221f8d928a27a66236228e7501cd4cad5 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 27 Oct 2025 10:22:31 +0200 Subject: [PATCH 375/867] ipv6: icmp: Add RFC 5837 support Add the ability to append the incoming IP interface information to ICMPv6 error messages in accordance with RFC 5837 and RFC 4884. This is required for more meaningful traceroute results in unnumbered networks. The feature is disabled by default and controlled via a new sysctl ("net.ipv6.icmp.errors_extension_mask") which accepts a bitmask of ICMP extensions to append to ICMP error messages. Currently, only a single value is supported, but the interface and the implementation should be able to support more extensions, if needed. Clone the skb and copy the relevant data portions before modifying the skb as the caller of icmp6_send() still owns the skb after the function returns. This should be fine since by default ICMP error messages are rate limited to 1000 per second and no more than 1 per second per specific host. Trim or pad the packet to 128 bytes before appending the ICMP extension structure in order to be compatible with legacy applications that assume that the ICMP extension structure always starts at this offset (the minimum length specified by RFC 4884). Since commit 20e1954fe238 ("ipv6: RFC 4884 partial support for SIT/GRE tunnels") it is possible for icmp6_send() to be called with an skb that already contains ICMP extensions. This can happen when we receive an ICMPv4 message with extensions from a tunnel and translate it to an ICMPv6 message towards an IPv6 host in the overlay network. I could not find an RFC that supports this behavior, but it makes sense to not overwrite the original extensions that were appended to the packet. Therefore, avoid appending extensions if the length field in the provided ICMPv6 header is already filled. Export netdev_copy_name() using EXPORT_IPV6_MOD_GPL() to make it available to IPv6 when it is built as a module. Reviewed-by: Petr Machata Reviewed-by: David Ahern Reviewed-by: Willem de Bruijn Signed-off-by: Ido Schimmel Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251027082232.232571-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 17 ++ include/net/netns/ipv6.h | 1 + net/core/dev.c | 1 + net/ipv6/af_inet6.c | 1 + net/ipv6/icmp.c | 214 ++++++++++++++++++++++++- 5 files changed, 232 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index ece1187ba0f15..7cd35bfd39e68 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -3279,6 +3279,23 @@ error_anycast_as_unicast - BOOLEAN Default: 0 (disabled) +errors_extension_mask - UNSIGNED INTEGER + Bitmask of ICMP extensions to append to ICMPv6 error messages + ("Destination Unreachable" and "Time Exceeded"). The original datagram + is trimmed / padded to 128 bytes in order to be compatible with + applications that do not comply with RFC 4884. + + Possible extensions are: + + ==== ============================================================== + 0x01 Incoming IP interface information according to RFC 5837. + Extension will include the index, IPv6 address (if present), + name and MTU of the IP interface that received the datagram + which elicited the ICMP error. + ==== ============================================================== + + Default: 0x00 (no extensions) + xfrm6_gc_thresh - INTEGER (Obsolete since linux-4.14) The threshold at which we will start garbage collecting for IPv6 diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 47dc70d8100a4..08d2ecc96e2b4 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -56,6 +56,7 @@ struct netns_sysctl_ipv6 { u8 skip_notify_on_dev_down; u8 fib_notify_on_flag_change; u8 icmpv6_error_anycast_as_unicast; + u8 icmpv6_errors_extension_mask; }; struct netns_ipv6 { diff --git a/net/core/dev.c b/net/core/dev.c index d32f0b0c03bbd..dccc1176f3c65 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1163,6 +1163,7 @@ void netdev_copy_name(struct net_device *dev, char *name) strscpy(name, dev->name, IFNAMSIZ); } while (read_seqretry(&netdev_rename_lock, seq)); } +EXPORT_IPV6_MOD_GPL(netdev_copy_name); /** * netdev_get_name - get a netdevice name, knowing its ifindex. diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 1b0314644e0cc..44d7de1eec4f7 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -960,6 +960,7 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0; net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0; net->ipv6.sysctl.icmpv6_error_anycast_as_unicast = 0; + net->ipv6.sysctl.icmpv6_errors_extension_mask = 0; /* By default, rate limit error messages. * Except for pmtu discovery, it would break it. diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 56c974cf75d15..5d2f90babaa5f 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -444,6 +444,193 @@ static int icmp6_iif(const struct sk_buff *skb) return icmp6_dev(skb)->ifindex; } +struct icmp6_ext_iio_addr6_subobj { + __be16 afi; + __be16 reserved; + struct in6_addr addr6; +}; + +static unsigned int icmp6_ext_iio_len(void) +{ + return sizeof(struct icmp_extobj_hdr) + + /* ifIndex */ + sizeof(__be32) + + /* Interface Address Sub-Object */ + sizeof(struct icmp6_ext_iio_addr6_subobj) + + /* Interface Name Sub-Object. Length must be a multiple of 4 + * bytes. + */ + ALIGN(sizeof(struct icmp_ext_iio_name_subobj), 4) + + /* MTU */ + sizeof(__be32); +} + +static unsigned int icmp6_ext_max_len(u8 ext_objs) +{ + unsigned int ext_max_len; + + ext_max_len = sizeof(struct icmp_ext_hdr); + + if (ext_objs & BIT(ICMP_ERR_EXT_IIO_IIF)) + ext_max_len += icmp6_ext_iio_len(); + + return ext_max_len; +} + +static struct in6_addr *icmp6_ext_iio_addr6_find(const struct net_device *dev) +{ + struct inet6_dev *in6_dev; + struct inet6_ifaddr *ifa; + + in6_dev = __in6_dev_get(dev); + if (!in6_dev) + return NULL; + + /* It is unclear from RFC 5837 which IP address should be chosen, but + * it makes sense to choose a global unicast address. + */ + list_for_each_entry_rcu(ifa, &in6_dev->addr_list, if_list) { + if (ifa->flags & (IFA_F_TENTATIVE | IFA_F_DADFAILED)) + continue; + if (ipv6_addr_type(&ifa->addr) != IPV6_ADDR_UNICAST || + ipv6_addr_src_scope(&ifa->addr) != IPV6_ADDR_SCOPE_GLOBAL) + continue; + return &ifa->addr; + } + + return NULL; +} + +static void icmp6_ext_iio_iif_append(struct net *net, struct sk_buff *skb, + int iif) +{ + struct icmp_ext_iio_name_subobj *name_subobj; + struct icmp_extobj_hdr *objh; + struct net_device *dev; + struct in6_addr *addr6; + __be32 data; + + if (!iif) + return; + + /* Add the fields in the order specified by RFC 5837. */ + objh = skb_put(skb, sizeof(*objh)); + objh->class_num = ICMP_EXT_OBJ_CLASS_IIO; + objh->class_type = ICMP_EXT_CTYPE_IIO_ROLE(ICMP_EXT_CTYPE_IIO_ROLE_IIF); + + data = htonl(iif); + skb_put_data(skb, &data, sizeof(__be32)); + objh->class_type |= ICMP_EXT_CTYPE_IIO_IFINDEX; + + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, iif); + if (!dev) + goto out; + + addr6 = icmp6_ext_iio_addr6_find(dev); + if (addr6) { + struct icmp6_ext_iio_addr6_subobj *addr6_subobj; + + addr6_subobj = skb_put_zero(skb, sizeof(*addr6_subobj)); + addr6_subobj->afi = htons(ICMP_AFI_IP6); + addr6_subobj->addr6 = *addr6; + objh->class_type |= ICMP_EXT_CTYPE_IIO_IPADDR; + } + + name_subobj = skb_put_zero(skb, ALIGN(sizeof(*name_subobj), 4)); + name_subobj->len = ALIGN(sizeof(*name_subobj), 4); + netdev_copy_name(dev, name_subobj->name); + objh->class_type |= ICMP_EXT_CTYPE_IIO_NAME; + + data = htonl(READ_ONCE(dev->mtu)); + skb_put_data(skb, &data, sizeof(__be32)); + objh->class_type |= ICMP_EXT_CTYPE_IIO_MTU; + +out: + rcu_read_unlock(); + objh->length = htons(skb_tail_pointer(skb) - (unsigned char *)objh); +} + +static void icmp6_ext_objs_append(struct net *net, struct sk_buff *skb, + u8 ext_objs, int iif) +{ + if (ext_objs & BIT(ICMP_ERR_EXT_IIO_IIF)) + icmp6_ext_iio_iif_append(net, skb, iif); +} + +static struct sk_buff * +icmp6_ext_append(struct net *net, struct sk_buff *skb_in, + struct icmp6hdr *icmp6h, unsigned int room, int iif) +{ + unsigned int payload_len, ext_max_len, ext_len; + struct icmp_ext_hdr *ext_hdr; + struct sk_buff *skb; + u8 ext_objs; + int nhoff; + + switch (icmp6h->icmp6_type) { + case ICMPV6_DEST_UNREACH: + case ICMPV6_TIME_EXCEED: + break; + default: + return NULL; + } + + /* Do not overwrite existing extensions. This can happen when we + * receive an ICMPv4 message with extensions from a tunnel and + * translate it to an ICMPv6 message towards an IPv6 host in the + * overlay network. + */ + if (icmp6h->icmp6_datagram_len) + return NULL; + + ext_objs = READ_ONCE(net->ipv6.sysctl.icmpv6_errors_extension_mask); + if (!ext_objs) + return NULL; + + ext_max_len = icmp6_ext_max_len(ext_objs); + if (ICMP_EXT_ORIG_DGRAM_MIN_LEN + ext_max_len > room) + return NULL; + + skb = skb_clone(skb_in, GFP_ATOMIC); + if (!skb) + return NULL; + + nhoff = skb_network_offset(skb); + payload_len = min(skb->len - nhoff, ICMP_EXT_ORIG_DGRAM_MIN_LEN); + + if (!pskb_network_may_pull(skb, payload_len)) + goto free_skb; + + if (pskb_trim(skb, nhoff + ICMP_EXT_ORIG_DGRAM_MIN_LEN) || + __skb_put_padto(skb, nhoff + ICMP_EXT_ORIG_DGRAM_MIN_LEN, false)) + goto free_skb; + + if (pskb_expand_head(skb, 0, ext_max_len, GFP_ATOMIC)) + goto free_skb; + + ext_hdr = skb_put_zero(skb, sizeof(*ext_hdr)); + ext_hdr->version = ICMP_EXT_VERSION_2; + + icmp6_ext_objs_append(net, skb, ext_objs, iif); + + /* Do not send an empty extension structure. */ + ext_len = skb_tail_pointer(skb) - (unsigned char *)ext_hdr; + if (ext_len == sizeof(*ext_hdr)) + goto free_skb; + + ext_hdr->checksum = ip_compute_csum(ext_hdr, ext_len); + /* The length of the original datagram in 64-bit words (RFC 4884). */ + icmp6h->icmp6_datagram_len = ICMP_EXT_ORIG_DGRAM_MIN_LEN / sizeof(u64); + + return skb; + +free_skb: + consume_skb(skb); + return NULL; +} + /* * Send an ICMP message in response to a packet in error */ @@ -458,7 +645,9 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, struct ipv6_pinfo *np; const struct in6_addr *saddr = NULL; bool apply_ratelimit = false; + struct sk_buff *ext_skb; struct dst_entry *dst; + unsigned int room; struct icmp6hdr tmp_hdr; struct flowi6 fl6; struct icmpv6_msg msg; @@ -612,8 +801,13 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, msg.offset = skb_network_offset(skb); msg.type = type; - len = skb->len - msg.offset; - len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr)); + room = IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr); + ext_skb = icmp6_ext_append(net, skb, &tmp_hdr, room, parm->iif); + if (ext_skb) + msg.skb = ext_skb; + + len = msg.skb->len - msg.offset; + len = min_t(unsigned int, len, room); if (len < 0) { net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n", &hdr->saddr, &hdr->daddr); @@ -635,6 +829,8 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, } out_dst_release: + if (ext_skb) + consume_skb(ext_skb); dst_release(dst); out_unlock: icmpv6_xmit_unlock(sk); @@ -1171,6 +1367,10 @@ int icmpv6_err_convert(u8 type, u8 code, int *err) EXPORT_SYMBOL(icmpv6_err_convert); #ifdef CONFIG_SYSCTL + +static u32 icmpv6_errors_extension_mask_all = + GENMASK_U8(ICMP_ERR_EXT_COUNT - 1, 0); + static struct ctl_table ipv6_icmp_table_template[] = { { .procname = "ratelimit", @@ -1216,6 +1416,15 @@ static struct ctl_table ipv6_icmp_table_template[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, + { + .procname = "errors_extension_mask", + .data = &init_net.ipv6.sysctl.icmpv6_errors_extension_mask, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &icmpv6_errors_extension_mask_all, + }, }; struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net) @@ -1233,6 +1442,7 @@ struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net) table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast; table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr; table[5].data = &net->ipv6.sysctl.icmpv6_error_anycast_as_unicast; + table[6].data = &net->ipv6.sysctl.icmpv6_errors_extension_mask; } return table; } From 02da595751833a272ce0e30438a544a77eb7c103 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 27 Oct 2025 10:22:32 +0200 Subject: [PATCH 376/867] selftests: traceroute: Add ICMP extensions tests Test that ICMP extensions are reported correctly when enabled and not reported when disabled. Test both IPv4 and IPv6 and using different packet sizes, to make sure trimming / padding works correctly. Disable ICMP rate limiting (defaults to 1 per-second per-target) so that the kernel will always generate ICMP errors when needed. Signed-off-by: Ido Schimmel Link: https://patch.msgid.link/20251027082232.232571-4-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/traceroute.sh | 313 ++++++++++++++++++++++ 1 file changed, 313 insertions(+) diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh index dbb34c7e09ce5..a7c6ab8a03475 100755 --- a/tools/testing/selftests/net/traceroute.sh +++ b/tools/testing/selftests/net/traceroute.sh @@ -36,6 +36,35 @@ run_cmd() return $rc } +__check_traceroute_version() +{ + local cmd=$1; shift + local req_ver=$1; shift + local ver + + req_ver=$(echo "$req_ver" | sed 's/\.//g') + ver=$($cmd -V 2>&1 | grep -Eo '[0-9]+.[0-9]+.[0-9]+' | sed 's/\.//g') + if [[ $ver -lt $req_ver ]]; then + return 1 + else + return 0 + fi +} + +check_traceroute6_version() +{ + local req_ver=$1; shift + + __check_traceroute_version traceroute6 "$req_ver" +} + +check_traceroute_version() +{ + local req_ver=$1; shift + + __check_traceroute_version traceroute "$req_ver" +} + ################################################################################ # create namespaces and interconnects @@ -59,6 +88,8 @@ create_ns() ip netns exec ${ns} ip -6 ro add unreachable default metric 8192 ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1 + ip netns exec ${ns} sysctl -qw net.ipv4.icmp_ratelimit=0 + ip netns exec ${ns} sysctl -qw net.ipv6.icmp.ratelimit=0 ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1 ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1 @@ -297,6 +328,144 @@ run_traceroute6_vrf() cleanup_traceroute6_vrf } +################################################################################ +# traceroute6 with ICMP extensions test +# +# Verify that in this scenario +# +# ---- ---- ---- +# |H1|--------------------------|R1|--------------------------|H2| +# ---- N1 ---- N2 ---- +# +# ICMP extensions are correctly reported. The loopback interfaces on all the +# nodes are assigned global addresses and the interfaces connecting the nodes +# are assigned IPv6 link-local addresses. + +cleanup_traceroute6_ext() +{ + cleanup_all_ns +} + +setup_traceroute6_ext() +{ + # Start clean + cleanup_traceroute6_ext + + setup_ns h1 r1 h2 + create_ns "$h1" + create_ns "$r1" + create_ns "$h2" + + # Setup N1 + connect_ns "$h1" eth1 - fe80::1/64 "$r1" eth1 - fe80::2/64 + # Setup N2 + connect_ns "$r1" eth2 - fe80::3/64 "$h2" eth2 - fe80::4/64 + + # Setup H1 + ip -n "$h1" address add 2001:db8:1::1/128 dev lo + ip -n "$h1" route add ::/0 nexthop via fe80::2 dev eth1 + + # Setup R1 + ip -n "$r1" address add 2001:db8:1::2/128 dev lo + ip -n "$r1" route add 2001:db8:1::1/128 nexthop via fe80::1 dev eth1 + ip -n "$r1" route add 2001:db8:1::3/128 nexthop via fe80::4 dev eth2 + + # Setup H2 + ip -n "$h2" address add 2001:db8:1::3/128 dev lo + ip -n "$h2" route add ::/0 nexthop via fe80::3 dev eth2 + + # Prime the network + ip netns exec "$h1" ping6 -c5 2001:db8:1::3 >/dev/null 2>&1 +} + +traceroute6_ext_iio_iif_test() +{ + local r1_ifindex h2_ifindex + local pkt_len=$1; shift + + # Test that incoming interface info is not appended by default. + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended by default when should not" + + # Test that the extension is appended when enabled. + run_cmd "$r1" "bash -c \"echo 0x01 > /proc/sys/net/ipv6/icmp/errors_extension_mask\"" + check_err $? "Failed to enable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC" + check_err $? "Incoming interface info not appended after enable" + + # Test that the extension is not appended when disabled. + run_cmd "$r1" "bash -c \"echo 0x00 > /proc/sys/net/ipv6/icmp/errors_extension_mask\"" + check_err $? "Failed to disable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended after disable" + + # Test that the extension is sent correctly from both R1 and H2. + run_cmd "$r1" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x01" + r1_ifindex=$(ip -n "$r1" -j link show dev eth1 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep ''" + check_err $? "Wrong incoming interface info reported from R1" + + run_cmd "$h2" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x01" + h2_ifindex=$(ip -n "$h2" -j link show dev eth2 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep ''" + check_err $? "Wrong incoming interface info reported from H2" + + # Add a global address on the incoming interface of R1 and check that + # it is reported. + run_cmd "$r1" "ip address add 2001:db8:100::1/64 dev eth1 nodad" + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep ''" + check_err $? "Wrong incoming interface info reported from R1 after address addition" + run_cmd "$r1" "ip address del 2001:db8:100::1/64 dev eth1" + + # Change name and MTU and make sure the result is still correct. + run_cmd "$r1" "ip link set dev eth1 name eth1tag mtu 1501" + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep ''" + check_err $? "Wrong incoming interface info reported from R1 after name and MTU change" + run_cmd "$r1" "ip link set dev eth1tag name eth1 mtu 1500" + + run_cmd "$r1" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x00" + run_cmd "$h2" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x00" +} + +run_traceroute6_ext() +{ + # Need at least version 2.1.5 for RFC 5837 support. + if ! check_traceroute6_version 2.1.5; then + log_test_skip "traceroute6 too old, missing ICMP extensions support" + return + fi + + setup_traceroute6_ext + + RET=0 + + ## General ICMP extensions tests + + # Test that ICMP extensions are disabled by default. + run_cmd "$h1" "sysctl net.ipv6.icmp.errors_extension_mask | grep \"= 0$\"" + check_err $? "ICMP extensions are not disabled by default" + + # Test that unsupported values are rejected. Do not use "sysctl" as + # older versions do not return an error code upon failure. + run_cmd "$h1" "bash -c \"echo 0x80 > /proc/sys/net/ipv6/icmp/errors_extension_mask\"" + check_fail $? "Unsupported sysctl value was not rejected" + + ## Extension-specific tests + + # Incoming interface info test. Test with various packet sizes, + # including the default one. + traceroute6_ext_iio_iif_test + traceroute6_ext_iio_iif_test 127 + traceroute6_ext_iio_iif_test 128 + traceroute6_ext_iio_iif_test 129 + + log_test "IPv6 traceroute with ICMP extensions" + + cleanup_traceroute6_ext +} + ################################################################################ # traceroute test # @@ -437,6 +606,147 @@ run_traceroute_vrf() cleanup_traceroute_vrf } +################################################################################ +# traceroute with ICMP extensions test +# +# Verify that in this scenario +# +# ---- ---- ---- +# |H1|--------------------------|R1|--------------------------|H2| +# ---- N1 ---- N2 ---- +# +# ICMP extensions are correctly reported. The loopback interfaces on all the +# nodes are assigned global addresses and the interfaces connecting the nodes +# are assigned IPv6 link-local addresses. + +cleanup_traceroute_ext() +{ + cleanup_all_ns +} + +setup_traceroute_ext() +{ + # Start clean + cleanup_traceroute_ext + + setup_ns h1 r1 h2 + create_ns "$h1" + create_ns "$r1" + create_ns "$h2" + + # Setup N1 + connect_ns "$h1" eth1 - fe80::1/64 "$r1" eth1 - fe80::2/64 + # Setup N2 + connect_ns "$r1" eth2 - fe80::3/64 "$h2" eth2 - fe80::4/64 + + # Setup H1 + ip -n "$h1" address add 192.0.2.1/32 dev lo + ip -n "$h1" route add 0.0.0.0/0 nexthop via inet6 fe80::2 dev eth1 + + # Setup R1 + ip -n "$r1" address add 192.0.2.2/32 dev lo + ip -n "$r1" route add 192.0.2.1/32 nexthop via inet6 fe80::1 dev eth1 + ip -n "$r1" route add 192.0.2.3/32 nexthop via inet6 fe80::4 dev eth2 + + # Setup H2 + ip -n "$h2" address add 192.0.2.3/32 dev lo + ip -n "$h2" route add 0.0.0.0/0 nexthop via inet6 fe80::3 dev eth2 + + # Prime the network + ip netns exec "$h1" ping -c5 192.0.2.3 >/dev/null 2>&1 +} + +traceroute_ext_iio_iif_test() +{ + local r1_ifindex h2_ifindex + local pkt_len=$1; shift + + # Test that incoming interface info is not appended by default. + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended by default when should not" + + # Test that the extension is appended when enabled. + run_cmd "$r1" "bash -c \"echo 0x01 > /proc/sys/net/ipv4/icmp_errors_extension_mask\"" + check_err $? "Failed to enable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC" + check_err $? "Incoming interface info not appended after enable" + + # Test that the extension is not appended when disabled. + run_cmd "$r1" "bash -c \"echo 0x00 > /proc/sys/net/ipv4/icmp_errors_extension_mask\"" + check_err $? "Failed to disable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended after disable" + + # Test that the extension is sent correctly from both R1 and H2. + run_cmd "$r1" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x01" + r1_ifindex=$(ip -n "$r1" -j link show dev eth1 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep ''" + check_err $? "Wrong incoming interface info reported from R1" + + run_cmd "$h2" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x01" + h2_ifindex=$(ip -n "$h2" -j link show dev eth2 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep ''" + check_err $? "Wrong incoming interface info reported from H2" + + # Add a global address on the incoming interface of R1 and check that + # it is reported. + run_cmd "$r1" "ip address add 198.51.100.1/24 dev eth1" + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep ''" + check_err $? "Wrong incoming interface info reported from R1 after address addition" + run_cmd "$r1" "ip address del 198.51.100.1/24 dev eth1" + + # Change name and MTU and make sure the result is still correct. + # Re-add the route towards H1 since it was deleted when we removed the + # last IPv4 address from eth1 on R1. + run_cmd "$r1" "ip route add 192.0.2.1/32 nexthop via inet6 fe80::1 dev eth1" + run_cmd "$r1" "ip link set dev eth1 name eth1tag mtu 1501" + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep ''" + check_err $? "Wrong incoming interface info reported from R1 after name and MTU change" + run_cmd "$r1" "ip link set dev eth1tag name eth1 mtu 1500" + + run_cmd "$r1" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x00" + run_cmd "$h2" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x00" +} + +run_traceroute_ext() +{ + # Need at least version 2.1.5 for RFC 5837 support. + if ! check_traceroute_version 2.1.5; then + log_test_skip "traceroute too old, missing ICMP extensions support" + return + fi + + setup_traceroute_ext + + RET=0 + + ## General ICMP extensions tests + + # Test that ICMP extensions are disabled by default. + run_cmd "$h1" "sysctl net.ipv4.icmp_errors_extension_mask | grep \"= 0$\"" + check_err $? "ICMP extensions are not disabled by default" + + # Test that unsupported values are rejected. Do not use "sysctl" as + # older versions do not return an error code upon failure. + run_cmd "$h1" "bash -c \"echo 0x80 > /proc/sys/net/ipv4/icmp_errors_extension_mask\"" + check_fail $? "Unsupported sysctl value was not rejected" + + ## Extension-specific tests + + # Incoming interface info test. Test with various packet sizes, + # including the default one. + traceroute_ext_iio_iif_test + traceroute_ext_iio_iif_test 127 + traceroute_ext_iio_iif_test 128 + traceroute_ext_iio_iif_test 129 + + log_test "IPv4 traceroute with ICMP extensions" + + cleanup_traceroute_ext +} + ################################################################################ # Run tests @@ -444,8 +754,10 @@ run_tests() { run_traceroute6 run_traceroute6_vrf + run_traceroute6_ext run_traceroute run_traceroute_vrf + run_traceroute_ext } ################################################################################ @@ -462,6 +774,7 @@ done require_command traceroute6 require_command traceroute +require_command jq run_tests From 3a85ec37bc11e77bda8b1cba34501ef3308200b0 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 29 Oct 2025 09:38:55 +0800 Subject: [PATCH 377/867] dt-bindings: net: netc-blk-ctrl: add compatible string for i.MX94 platforms Add the compatible string "nxp,imx94-netc-blk-ctrl" for i.MX94 platforms. Signed-off-by: Wei Fang Acked-by: Rob Herring (Arm) Link: https://patch.msgid.link/20251029013900.407583-2-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/nxp,netc-blk-ctrl.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/nxp,netc-blk-ctrl.yaml b/Documentation/devicetree/bindings/net/nxp,netc-blk-ctrl.yaml index 97389fd5dbbfa..deea4fd73d767 100644 --- a/Documentation/devicetree/bindings/net/nxp,netc-blk-ctrl.yaml +++ b/Documentation/devicetree/bindings/net/nxp,netc-blk-ctrl.yaml @@ -21,6 +21,7 @@ maintainers: properties: compatible: enum: + - nxp,imx94-netc-blk-ctrl - nxp,imx95-netc-blk-ctrl reg: From c4430f2ac0475e3525d9b2d0550e4421af54d7e9 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 29 Oct 2025 09:38:56 +0800 Subject: [PATCH 378/867] dt-bindings: net: enetc: add compatible string for ENETC with pseduo MAC The ENETC with pseudo MAC is used to connect to the CPU port of the NETC switch. This ENETC has a different PCI device ID, so add a standard PCI device compatible string to it. Signed-off-by: Wei Fang Acked-by: Rob Herring (Arm) Link: https://patch.msgid.link/20251029013900.407583-3-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/fsl,enetc.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/fsl,enetc.yaml b/Documentation/devicetree/bindings/net/fsl,enetc.yaml index ca70f00501711..aac20ab72ace7 100644 --- a/Documentation/devicetree/bindings/net/fsl,enetc.yaml +++ b/Documentation/devicetree/bindings/net/fsl,enetc.yaml @@ -27,6 +27,7 @@ properties: - const: fsl,enetc - enum: - pci1131,e101 + - pci1131,e110 reg: maxItems: 1 From ba5d7d45ce8eec93fafd171da110386f97ecfed4 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 29 Oct 2025 09:38:57 +0800 Subject: [PATCH 379/867] net: enetc: add preliminary i.MX94 NETC blocks control support NETC blocks control is used for warm reset and pre-boot initialization. Different versions of NETC blocks control are not exactly the same. We need to add corresponding netc_devinfo data for each version. i.MX94 series are launched after i.MX95, so its NETC version (v4.3) is higher than i.MX95 NETC (v4.1). Currently, the patch adds the following configurations for ENETCs. 1. Set the link's MII protocol. 2. ENETC 0 (MAC 3) and the switch port 2 (MAC 2) share the same parallel interface, but due to SoC constraint, they cannot be used simultaneously. Since the switch is not supported yet, so the interface is assigned to ENETC 0 by default. The switch configuration will be added separately in a subsequent patch. Signed-off-by: Wei Fang Reviewed-by: Frank Li Link: https://patch.msgid.link/20251029013900.407583-4-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- .../ethernet/freescale/enetc/netc_blk_ctrl.c | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c b/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c index bcb8eefeb93c0..5978ea096e80f 100644 --- a/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c +++ b/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c @@ -47,6 +47,13 @@ #define PCS_PROT_SFI BIT(4) #define PCS_PROT_10G_SXGMII BIT(6) +#define IMX94_EXT_PIN_CONTROL 0x10 +#define MAC2_MAC3_SEL BIT(1) + +#define IMX94_NETC_LINK_CFG(a) (0x4c + (a) * 4) +#define NETC_LINK_CFG_MII_PROT GENMASK(3, 0) +#define NETC_LINK_CFG_IO_VAR GENMASK(19, 16) + /* NETC privileged register block register */ #define PRB_NETCRR 0x100 #define NETCRR_SR BIT(0) @@ -68,6 +75,13 @@ #define IMX95_ENETC1_BUS_DEVFN 0x40 #define IMX95_ENETC2_BUS_DEVFN 0x80 +#define IMX94_ENETC0_BUS_DEVFN 0x100 +#define IMX94_ENETC1_BUS_DEVFN 0x140 +#define IMX94_ENETC2_BUS_DEVFN 0x180 +#define IMX94_ENETC0_LINK 3 +#define IMX94_ENETC1_LINK 4 +#define IMX94_ENETC2_LINK 5 + /* Flags for different platforms */ #define NETC_HAS_NETCMIX BIT(0) @@ -192,6 +206,90 @@ static int imx95_netcmix_init(struct platform_device *pdev) return 0; } +static int imx94_enetc_get_link_id(struct device_node *np) +{ + int bus_devfn = netc_of_pci_get_bus_devfn(np); + + /* Parse ENETC link number */ + switch (bus_devfn) { + case IMX94_ENETC0_BUS_DEVFN: + return IMX94_ENETC0_LINK; + case IMX94_ENETC1_BUS_DEVFN: + return IMX94_ENETC1_LINK; + case IMX94_ENETC2_BUS_DEVFN: + return IMX94_ENETC2_LINK; + default: + return -EINVAL; + } +} + +static int imx94_link_config(struct netc_blk_ctrl *priv, + struct device_node *np, int link_id) +{ + phy_interface_t interface; + int mii_proto; + u32 val; + + /* The node may be disabled and does not have a 'phy-mode' + * or 'phy-connection-type' property. + */ + if (of_get_phy_mode(np, &interface)) + return 0; + + mii_proto = netc_get_link_mii_protocol(interface); + if (mii_proto < 0) + return mii_proto; + + val = mii_proto & NETC_LINK_CFG_MII_PROT; + if (val == MII_PROT_SERIAL) + val = u32_replace_bits(val, IO_VAR_16FF_16G_SERDES, + NETC_LINK_CFG_IO_VAR); + + netc_reg_write(priv->netcmix, IMX94_NETC_LINK_CFG(link_id), val); + + return 0; +} + +static int imx94_enetc_link_config(struct netc_blk_ctrl *priv, + struct device_node *np) +{ + int link_id = imx94_enetc_get_link_id(np); + + if (link_id < 0) + return link_id; + + return imx94_link_config(priv, np, link_id); +} + +static int imx94_netcmix_init(struct platform_device *pdev) +{ + struct netc_blk_ctrl *priv = platform_get_drvdata(pdev); + struct device_node *np = pdev->dev.of_node; + u32 val; + int err; + + for_each_child_of_node_scoped(np, child) { + for_each_child_of_node_scoped(child, gchild) { + if (!of_device_is_compatible(gchild, "pci1131,e101")) + continue; + + err = imx94_enetc_link_config(priv, gchild); + if (err) + return err; + } + } + + /* ENETC 0 and switch port 2 share the same parallel interface. + * Currently, the switch is not supported, so this interface is + * used by ENETC 0 by default. + */ + val = netc_reg_read(priv->netcmix, IMX94_EXT_PIN_CONTROL); + val |= MAC2_MAC3_SEL; + netc_reg_write(priv->netcmix, IMX94_EXT_PIN_CONTROL, val); + + return 0; +} + static bool netc_ierb_is_locked(struct netc_blk_ctrl *priv) { return !!(netc_reg_read(priv->prb, PRB_NETCRR) & NETCRR_LOCK); @@ -340,8 +438,14 @@ static const struct netc_devinfo imx95_devinfo = { .ierb_init = imx95_ierb_init, }; +static const struct netc_devinfo imx94_devinfo = { + .flags = NETC_HAS_NETCMIX, + .netcmix_init = imx94_netcmix_init, +}; + static const struct of_device_id netc_blk_ctrl_match[] = { { .compatible = "nxp,imx95-netc-blk-ctrl", .data = &imx95_devinfo }, + { .compatible = "nxp,imx94-netc-blk-ctrl", .data = &imx94_devinfo }, {}, }; MODULE_DEVICE_TABLE(of, netc_blk_ctrl_match); From 1cd3f21c18c293392a8b4abcb1f0a51d7a9efe8c Mon Sep 17 00:00:00 2001 From: Clark Wang Date: Wed, 29 Oct 2025 09:38:58 +0800 Subject: [PATCH 380/867] net: enetc: add ptp timer binding support for i.MX94 The i.MX94 has three PTP timers, and all standalone ENETCs can select one of them to bind to as their PHC. The 'ptp-timer' property is used to represent the PTP device of the Ethernet controller. So users can add 'ptp-timer' to the ENETC node to specify the PTP timer. The driver parses this property to bind the two hardware devices. If the "ptp-timer" property is not present, the first timer of the PCIe bus where the ENETC is located is used as the default bound PTP timer. Signed-off-by: Clark Wang Signed-off-by: Wei Fang Link: https://patch.msgid.link/20251029013900.407583-5-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- .../ethernet/freescale/enetc/netc_blk_ctrl.c | 100 ++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c b/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c index 5978ea096e80f..d7aee3c934d3a 100644 --- a/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c +++ b/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c @@ -66,6 +66,7 @@ /* NETC integrated endpoint register block register */ #define IERB_EMDIOFAUXR 0x344 #define IERB_T0FAUXR 0x444 +#define IERB_ETBCR(a) (0x300c + 0x100 * (a)) #define IERB_EFAUXR(a) (0x3044 + 0x100 * (a)) #define IERB_VFAUXR(a) (0x4004 + 0x40 * (a)) #define FAUXR_LDID GENMASK(3, 0) @@ -78,10 +79,16 @@ #define IMX94_ENETC0_BUS_DEVFN 0x100 #define IMX94_ENETC1_BUS_DEVFN 0x140 #define IMX94_ENETC2_BUS_DEVFN 0x180 +#define IMX94_TIMER0_BUS_DEVFN 0x1 +#define IMX94_TIMER1_BUS_DEVFN 0x101 +#define IMX94_TIMER2_BUS_DEVFN 0x181 #define IMX94_ENETC0_LINK 3 #define IMX94_ENETC1_LINK 4 #define IMX94_ENETC2_LINK 5 +#define NETC_ENETC_ID(a) (a) +#define NETC_TIMER_ID(a) (a) + /* Flags for different platforms */ #define NETC_HAS_NETCMIX BIT(0) @@ -345,6 +352,98 @@ static int imx95_ierb_init(struct platform_device *pdev) return 0; } +static int imx94_get_enetc_id(struct device_node *np) +{ + int bus_devfn = netc_of_pci_get_bus_devfn(np); + + /* Parse ENETC offset */ + switch (bus_devfn) { + case IMX94_ENETC0_BUS_DEVFN: + return NETC_ENETC_ID(0); + case IMX94_ENETC1_BUS_DEVFN: + return NETC_ENETC_ID(1); + case IMX94_ENETC2_BUS_DEVFN: + return NETC_ENETC_ID(2); + default: + return -EINVAL; + } +} + +static int imx94_get_timer_id(struct device_node *np) +{ + int bus_devfn = netc_of_pci_get_bus_devfn(np); + + /* Parse NETC PTP timer ID, the timer0 is on bus 0, + * the timer 1 and timer2 is on bus 1. + */ + switch (bus_devfn) { + case IMX94_TIMER0_BUS_DEVFN: + return NETC_TIMER_ID(0); + case IMX94_TIMER1_BUS_DEVFN: + return NETC_TIMER_ID(1); + case IMX94_TIMER2_BUS_DEVFN: + return NETC_TIMER_ID(2); + default: + return -EINVAL; + } +} + +static int imx94_enetc_update_tid(struct netc_blk_ctrl *priv, + struct device_node *np) +{ + struct device *dev = &priv->pdev->dev; + struct device_node *timer_np; + int eid, tid; + + eid = imx94_get_enetc_id(np); + if (eid < 0) { + dev_err(dev, "Failed to get ENETC ID\n"); + return eid; + } + + timer_np = of_parse_phandle(np, "ptp-timer", 0); + if (!timer_np) { + /* If 'ptp-timer' is not present, the timer1 is the default + * timer of all standalone ENETCs, which is on the same PCIe + * bus as these ENETCs. + */ + tid = NETC_TIMER_ID(1); + goto end; + } + + tid = imx94_get_timer_id(timer_np); + of_node_put(timer_np); + if (tid < 0) { + dev_err(dev, "Failed to get NETC Timer ID\n"); + return tid; + } + +end: + netc_reg_write(priv->ierb, IERB_ETBCR(eid), tid); + + return 0; +} + +static int imx94_ierb_init(struct platform_device *pdev) +{ + struct netc_blk_ctrl *priv = platform_get_drvdata(pdev); + struct device_node *np = pdev->dev.of_node; + int err; + + for_each_child_of_node_scoped(np, child) { + for_each_child_of_node_scoped(child, gchild) { + if (!of_device_is_compatible(gchild, "pci1131,e101")) + continue; + + err = imx94_enetc_update_tid(priv, gchild); + if (err) + return err; + } + } + + return 0; +} + static int netc_ierb_init(struct platform_device *pdev) { struct netc_blk_ctrl *priv = platform_get_drvdata(pdev); @@ -441,6 +540,7 @@ static const struct netc_devinfo imx95_devinfo = { static const struct netc_devinfo imx94_devinfo = { .flags = NETC_HAS_NETCMIX, .netcmix_init = imx94_netcmix_init, + .ierb_init = imx94_ierb_init, }; static const struct of_device_id netc_blk_ctrl_match[] = { From 5175c1e4adcad3af2cf9d0e5f7a7d1142c3ed320 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 29 Oct 2025 09:38:59 +0800 Subject: [PATCH 381/867] net: enetc: add basic support for the ENETC with pseudo MAC for i.MX94 The ENETC with pseudo MAC is an internal port which connects to the CPU port of the switch. The switch CPU/host ENETC is fully integrated with the switch and does not require a back-to-back MAC, instead a light weight "pseudo MAC" provides the delineation between switch and ENETC. This translates to lower power (less logic and memory) and lower delay (as there is no serialization delay across this link). Different from the standalone ENETC which is used as the external port, the internal ENETC has a different PCIe device ID, and it does not have Ethernet MAC port registers, instead, it has a small number of pseudo MAC port registers, so some features are not supported by pseudo MAC, such as loopback, half duplex, one-step timestamping and so on. Therefore, the configuration of this internal ENETC is also somewhat different from that of the standalone ENETC. So add the basic support for ENETC with pseudo MAC. More supports will be added in the future. Signed-off-by: Wei Fang Reviewed-by: Claudiu Manoil Link: https://patch.msgid.link/20251029013900.407583-6-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 24 +++++++- drivers/net/ethernet/freescale/enetc/enetc.h | 8 +++ .../net/ethernet/freescale/enetc/enetc4_hw.h | 30 +++++++++ .../net/ethernet/freescale/enetc/enetc4_pf.c | 15 +++++ .../ethernet/freescale/enetc/enetc_ethtool.c | 61 +++++++++++++++++++ .../net/ethernet/freescale/enetc/enetc_hw.h | 1 + .../freescale/enetc/enetc_pf_common.c | 5 +- 7 files changed, 142 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 0535e92404e3c..3ed0e04eb589f 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -14,12 +14,21 @@ u32 enetc_port_mac_rd(struct enetc_si *si, u32 reg) { + /* ENETC with pseudo MAC does not have Ethernet MAC + * port registers. + */ + if (enetc_is_pseudo_mac(si)) + return 0; + return enetc_port_rd(&si->hw, reg); } EXPORT_SYMBOL_GPL(enetc_port_mac_rd); void enetc_port_mac_wr(struct enetc_si *si, u32 reg, u32 val) { + if (enetc_is_pseudo_mac(si)) + return; + enetc_port_wr(&si->hw, reg, val); if (si->hw_features & ENETC_SI_F_QBU) enetc_port_wr(&si->hw, reg + si->drvdata->pmac_offset, val); @@ -3367,7 +3376,8 @@ int enetc_hwtstamp_set(struct net_device *ndev, new_offloads |= ENETC_F_TX_TSTAMP; break; case HWTSTAMP_TX_ONESTEP_SYNC: - if (!enetc_si_is_pf(priv->si)) + if (!enetc_si_is_pf(priv->si) || + enetc_is_pseudo_mac(priv->si)) return -EOPNOTSUPP; new_offloads &= ~ENETC_F_TX_TSTAMP_MASK; @@ -3708,6 +3718,13 @@ static const struct enetc_drvdata enetc4_pf_data = { .eth_ops = &enetc4_pf_ethtool_ops, }; +static const struct enetc_drvdata enetc4_ppm_data = { + .sysclk_freq = ENETC_CLK_333M, + .tx_csum = true, + .max_frags = ENETC4_MAX_SKB_FRAGS, + .eth_ops = &enetc4_ppm_ethtool_ops, +}; + static const struct enetc_drvdata enetc_vf_data = { .sysclk_freq = ENETC_CLK_400M, .max_frags = ENETC_MAX_SKB_FRAGS, @@ -3727,6 +3744,11 @@ static const struct enetc_platform_info enetc_info[] = { .dev_id = ENETC_DEV_ID_VF, .data = &enetc_vf_data, }, + { + .revision = ENETC_REV_4_3, + .dev_id = NXP_ENETC_PPM_DEV_ID, + .data = &enetc4_ppm_data, + }, }; int enetc_get_driver_data(struct enetc_si *si) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index f279fa597991e..dce27bd67a7d1 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -273,6 +273,7 @@ enum enetc_errata { #define ENETC_SI_F_QBV BIT(1) #define ENETC_SI_F_QBU BIT(2) #define ENETC_SI_F_LSO BIT(3) +#define ENETC_SI_F_PPM BIT(4) /* pseudo MAC */ struct enetc_drvdata { u32 pmac_offset; /* Only valid for PSI which supports 802.1Qbu */ @@ -362,6 +363,11 @@ static inline int enetc_pf_to_port(struct pci_dev *pf_pdev) } } +static inline bool enetc_is_pseudo_mac(struct enetc_si *si) +{ + return si->hw_features & ENETC_SI_F_PPM; +} + #define ENETC_MAX_NUM_TXQS 8 #define ENETC_INT_NAME_MAX (IFNAMSIZ + 8) @@ -534,6 +540,8 @@ int enetc_hwtstamp_set(struct net_device *ndev, extern const struct ethtool_ops enetc_pf_ethtool_ops; extern const struct ethtool_ops enetc4_pf_ethtool_ops; extern const struct ethtool_ops enetc_vf_ethtool_ops; +extern const struct ethtool_ops enetc4_ppm_ethtool_ops; + void enetc_set_ethtool_ops(struct net_device *ndev); void enetc_mm_link_state_update(struct enetc_ndev_priv *priv, bool link); void enetc_mm_commit_preemptible_tcs(struct enetc_ndev_priv *priv); diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h index 19bf0e89cdc25..ebea4298791ce 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h @@ -11,6 +11,7 @@ #define NXP_ENETC_VENDOR_ID 0x1131 #define NXP_ENETC_PF_DEV_ID 0xe101 +#define NXP_ENETC_PPM_DEV_ID 0xe110 /**********************Station interface registers************************/ /* Station interface LSO segmentation flag mask register 0/1 */ @@ -115,6 +116,10 @@ #define PMCAPR_HD BIT(8) #define PMCAPR_FP GENMASK(10, 9) +/* Port capability register */ +#define ENETC4_PCAPR 0x4000 +#define PCAPR_LINK_TYPE BIT(4) + /* Port configuration register */ #define ENETC4_PCR 0x4010 #define PCR_HDR_FMT BIT(0) @@ -193,4 +198,29 @@ #define SSP_1G 2 #define PM_IF_MODE_ENA BIT(15) +/**********************ENETC Pseudo MAC port registers************************/ +/* Port pseudo MAC receive octets counter (64-bit) */ +#define ENETC4_PPMROCR 0x5080 + +/* Port pseudo MAC receive unicast frame counter register (64-bit) */ +#define ENETC4_PPMRUFCR 0x5088 + +/* Port pseudo MAC receive multicast frame counter register (64-bit) */ +#define ENETC4_PPMRMFCR 0x5090 + +/* Port pseudo MAC receive broadcast frame counter register (64-bit) */ +#define ENETC4_PPMRBFCR 0x5098 + +/* Port pseudo MAC transmit octets counter (64-bit) */ +#define ENETC4_PPMTOCR 0x50c0 + +/* Port pseudo MAC transmit unicast frame counter register (64-bit) */ +#define ENETC4_PPMTUFCR 0x50c8 + +/* Port pseudo MAC transmit multicast frame counter register (64-bit) */ +#define ENETC4_PPMTMFCR 0x50d0 + +/* Port pseudo MAC transmit broadcast frame counter register (64-bit) */ +#define ENETC4_PPMTBFCR 0x50d8 + #endif diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c index 82c443b28b154..498346dd996aa 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c @@ -41,6 +41,16 @@ static void enetc4_get_port_caps(struct enetc_pf *pf) pf->caps.mac_filter_num = val & PSIMAFCAPR_NUM_MAC_AFTE; } +static void enetc4_get_psi_hw_features(struct enetc_si *si) +{ + struct enetc_hw *hw = &si->hw; + u32 val; + + val = enetc_port_rd(hw, ENETC4_PCAPR); + if (val & PCAPR_LINK_TYPE) + si->hw_features |= ENETC_SI_F_PPM; +} + static void enetc4_pf_set_si_primary_mac(struct enetc_hw *hw, int si, const u8 *addr) { @@ -277,6 +287,7 @@ static int enetc4_pf_struct_init(struct enetc_si *si) pf->ops = &enetc4_pf_ops; enetc4_get_port_caps(pf); + enetc4_get_psi_hw_features(si); return 0; } @@ -589,6 +600,9 @@ static void enetc4_mac_config(struct enetc_pf *pf, unsigned int mode, struct enetc_si *si = pf->si; u32 val; + if (enetc_is_pseudo_mac(si)) + return; + val = enetc_port_mac_rd(si, ENETC4_PM_IF_MODE(0)); val &= ~(PM_IF_MODE_IFMODE | PM_IF_MODE_ENA); @@ -1071,6 +1085,7 @@ static void enetc4_pf_remove(struct pci_dev *pdev) static const struct pci_device_id enetc4_pf_id_table[] = { { PCI_DEVICE(NXP_ENETC_VENDOR_ID, NXP_ENETC_PF_DEV_ID) }, + { PCI_DEVICE(NXP_ENETC_VENDOR_ID, NXP_ENETC_PPM_DEV_ID) }, { 0, } /* End of table. */ }; MODULE_DEVICE_TABLE(pci, enetc4_pf_id_table); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index 71d052de669ab..5ef2c5f3ff8f6 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -435,6 +435,48 @@ static void enetc_get_eth_mac_stats(struct net_device *ndev, } } +static void enetc_ppm_mac_stats(struct enetc_si *si, + struct ethtool_eth_mac_stats *s) +{ + struct enetc_hw *hw = &si->hw; + u64 rufcr, rmfcr, rbfcr; + u64 tufcr, tmfcr, tbfcr; + + rufcr = enetc_port_rd64(hw, ENETC4_PPMRUFCR); + rmfcr = enetc_port_rd64(hw, ENETC4_PPMRMFCR); + rbfcr = enetc_port_rd64(hw, ENETC4_PPMRBFCR); + + tufcr = enetc_port_rd64(hw, ENETC4_PPMTUFCR); + tmfcr = enetc_port_rd64(hw, ENETC4_PPMTMFCR); + tbfcr = enetc_port_rd64(hw, ENETC4_PPMTBFCR); + + s->FramesTransmittedOK = tufcr + tmfcr + tbfcr; + s->FramesReceivedOK = rufcr + rmfcr + rbfcr; + s->OctetsTransmittedOK = enetc_port_rd64(hw, ENETC4_PPMTOCR); + s->OctetsReceivedOK = enetc_port_rd64(hw, ENETC4_PPMROCR); + s->MulticastFramesXmittedOK = tmfcr; + s->BroadcastFramesXmittedOK = tbfcr; + s->MulticastFramesReceivedOK = rmfcr; + s->BroadcastFramesReceivedOK = rbfcr; +} + +static void enetc_ppm_get_eth_mac_stats(struct net_device *ndev, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + + switch (mac_stats->src) { + case ETHTOOL_MAC_STATS_SRC_EMAC: + enetc_ppm_mac_stats(priv->si, mac_stats); + break; + case ETHTOOL_MAC_STATS_SRC_PMAC: + break; + case ETHTOOL_MAC_STATS_SRC_AGGREGATE: + ethtool_aggregate_mac_stats(ndev, mac_stats); + break; + } +} + static void enetc_get_eth_ctrl_stats(struct net_device *ndev, struct ethtool_eth_ctrl_stats *ctrl_stats) { @@ -1313,6 +1355,25 @@ const struct ethtool_ops enetc_pf_ethtool_ops = { .get_mm_stats = enetc_get_mm_stats, }; +const struct ethtool_ops enetc4_ppm_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USE_ADAPTIVE_RX, + .get_eth_mac_stats = enetc_ppm_get_eth_mac_stats, + .get_rxnfc = enetc4_get_rxnfc, + .get_rxfh_key_size = enetc_get_rxfh_key_size, + .get_rxfh_indir_size = enetc_get_rxfh_indir_size, + .get_rxfh = enetc_get_rxfh, + .set_rxfh = enetc_set_rxfh, + .get_rxfh_fields = enetc_get_rxfh_fields, + .get_ringparam = enetc_get_ringparam, + .get_coalesce = enetc_get_coalesce, + .set_coalesce = enetc_set_coalesce, + .get_link_ksettings = enetc_get_link_ksettings, + .set_link_ksettings = enetc_set_link_ksettings, + .get_link = ethtool_op_get_link, +}; + const struct ethtool_ops enetc_vf_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index 377c963258147..7b882b8921fe9 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -378,6 +378,7 @@ enum enetc_bdr_type {TX, RX}; #define EIPBRR0_REVISION GENMASK(15, 0) #define ENETC_REV_1_0 0x0100 #define ENETC_REV_4_1 0X0401 +#define ENETC_REV_4_3 0x0403 #define ENETC_G_EIPBRR1 0x0bfc #define ENETC_G_EPFBLPR(n) (0xd00 + 4 * (n)) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c index edf14a95cab73..9c634205e2a76 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c @@ -109,7 +109,7 @@ void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, ndev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_LOOPBACK | + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4; ndev->features = NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_RXCSUM | @@ -133,6 +133,9 @@ void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, ndev->features |= NETIF_F_RXHASH; } + if (!enetc_is_pseudo_mac(si)) + ndev->hw_features |= NETIF_F_LOOPBACK; + /* TODO: currently, i.MX95 ENETC driver does not support advanced features */ if (!is_enetc_rev1(si)) goto end; From 2d673b0e2f8d75b66aa21e5ca5d95c415dec5be7 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 29 Oct 2025 09:39:00 +0800 Subject: [PATCH 382/867] net: enetc: add standalone ENETC support for i.MX94 The revision of i.MX94 ENETC is changed to v4.3, so add this revision to enetc_info to support i.MX94 ENETC. And add PTP suspport for i.MX94. Signed-off-by: Wei Fang Reviewed-by: Frank Li Link: https://patch.msgid.link/20251029013900.407583-7-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 4 ++++ drivers/net/ethernet/freescale/enetc/enetc_ethtool.c | 3 +++ 2 files changed, 7 insertions(+) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 3ed0e04eb589f..d5e5800b84eff 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -3749,6 +3749,10 @@ static const struct enetc_platform_info enetc_info[] = { .dev_id = NXP_ENETC_PPM_DEV_ID, .data = &enetc4_ppm_data, }, + { .revision = ENETC_REV_4_3, + .dev_id = NXP_ENETC_PF_DEV_ID, + .data = &enetc4_pf_data, + }, }; int enetc_get_driver_data(struct enetc_si *si) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index 5ef2c5f3ff8f6..3e222321b937c 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -936,6 +936,9 @@ static int enetc_get_phc_index_by_pdev(struct enetc_si *si) case ENETC_REV_4_1: devfn = PCI_DEVFN(24, 0); break; + case ENETC_REV_4_3: + devfn = PCI_DEVFN(0, 1); + break; default: return -1; } From 9f2674e1c3356ec3895d2da404b5c11af965776c Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 27 Oct 2025 13:50:19 +0200 Subject: [PATCH 383/867] net: ethernet: Remove redundant pm_runtime_mark_last_busy() calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pm_runtime_put_autosuspend(), pm_runtime_put_sync_autosuspend(), pm_runtime_autosuspend() and pm_request_autosuspend() now include a call to pm_runtime_mark_last_busy(). Remove the now-reduntant explicit call to pm_runtime_mark_last_busy(). Signed-off-by: Sakari Ailus Reviewed-by: Niklas Söderlund Link: https://patch.msgid.link/20251027115022.390997-1-sakari.ailus@linux.intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 5 ----- drivers/net/ethernet/freescale/fec_main.c | 8 -------- drivers/net/ethernet/renesas/ravb_main.c | 7 +------ drivers/net/ethernet/ti/davinci_mdio.c | 7 ------- 4 files changed, 1 insertion(+), 26 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index b1ed98d9c4380..e461f5072884e 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -327,7 +327,6 @@ static int macb_mdio_read_c22(struct mii_bus *bus, int mii_id, int regnum) status = MACB_BFEXT(DATA, macb_readl(bp, MAN)); mdio_read_exit: - pm_runtime_mark_last_busy(&bp->pdev->dev); pm_runtime_put_autosuspend(&bp->pdev->dev); mdio_pm_exit: return status; @@ -373,7 +372,6 @@ static int macb_mdio_read_c45(struct mii_bus *bus, int mii_id, int devad, status = MACB_BFEXT(DATA, macb_readl(bp, MAN)); mdio_read_exit: - pm_runtime_mark_last_busy(&bp->pdev->dev); pm_runtime_put_autosuspend(&bp->pdev->dev); mdio_pm_exit: return status; @@ -405,7 +403,6 @@ static int macb_mdio_write_c22(struct mii_bus *bus, int mii_id, int regnum, goto mdio_write_exit; mdio_write_exit: - pm_runtime_mark_last_busy(&bp->pdev->dev); pm_runtime_put_autosuspend(&bp->pdev->dev); mdio_pm_exit: return status; @@ -451,7 +448,6 @@ static int macb_mdio_write_c45(struct mii_bus *bus, int mii_id, goto mdio_write_exit; mdio_write_exit: - pm_runtime_mark_last_busy(&bp->pdev->dev); pm_runtime_put_autosuspend(&bp->pdev->dev); mdio_pm_exit: return status; @@ -5622,7 +5618,6 @@ static int macb_probe(struct platform_device *pdev) macb_is_gem(bp) ? "GEM" : "MACB", macb_readl(bp, MID), dev->base_addr, dev->irq, dev->dev_addr); - pm_runtime_mark_last_busy(&bp->pdev->dev); pm_runtime_put_autosuspend(&bp->pdev->dev); return 0; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 1edcfaee6819e..4875101c4f3f5 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2231,7 +2231,6 @@ static int fec_enet_mdio_read_c22(struct mii_bus *bus, int mii_id, int regnum) ret = FEC_MMFR_DATA(readl(fep->hwp + FEC_MII_DATA)); out: - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return ret; @@ -2280,7 +2279,6 @@ static int fec_enet_mdio_read_c45(struct mii_bus *bus, int mii_id, ret = FEC_MMFR_DATA(readl(fep->hwp + FEC_MII_DATA)); out: - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return ret; @@ -2312,7 +2310,6 @@ static int fec_enet_mdio_write_c22(struct mii_bus *bus, int mii_id, int regnum, if (ret) netdev_err(fep->netdev, "MDIO write timeout\n"); - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return ret; @@ -2356,7 +2353,6 @@ static int fec_enet_mdio_write_c45(struct mii_bus *bus, int mii_id, netdev_err(fep->netdev, "MDIO write timeout\n"); out: - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return ret; @@ -2839,7 +2835,6 @@ static void fec_enet_get_regs(struct net_device *ndev, buf[off] = readl(&theregs[off]); } - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); } @@ -3616,7 +3611,6 @@ fec_enet_open(struct net_device *ndev) err_enet_alloc: fec_enet_clk_enable(ndev, false); clk_enable: - pm_runtime_mark_last_busy(&fep->pdev->dev); pm_runtime_put_autosuspend(&fep->pdev->dev); pinctrl_pm_select_sleep_state(&fep->pdev->dev); return ret; @@ -3647,7 +3641,6 @@ fec_enet_close(struct net_device *ndev) cpu_latency_qos_remove_request(&fep->pm_qos_req); pinctrl_pm_select_sleep_state(&fep->pdev->dev); - pm_runtime_mark_last_busy(&fep->pdev->dev); pm_runtime_put_autosuspend(&fep->pdev->dev); fec_enet_free_buffers(ndev); @@ -4616,7 +4609,6 @@ fec_probe(struct platform_device *pdev) INIT_WORK(&fep->tx_timeout_work, fec_enet_timeout_work); - pm_runtime_mark_last_busy(&pdev->dev); pm_runtime_put_autosuspend(&pdev->dev); return 0; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index edfaa58505275..cc619dbebf9d8 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1975,7 +1975,6 @@ static int ravb_open(struct net_device *ndev) out_set_reset: ravb_set_opmode(ndev, CCC_OPC_RESET); out_rpm_put: - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); out_napi_off: if (info->nc_queues) @@ -2404,7 +2403,6 @@ static int ravb_close(struct net_device *ndev) if (error) return error; - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; @@ -3093,7 +3091,6 @@ static int ravb_probe(struct platform_device *pdev) netdev_info(ndev, "Base address at %#x, %pM, IRQ %d.\n", (u32)ndev->base_addr, ndev->dev_addr, ndev->irq); - pm_runtime_mark_last_busy(&pdev->dev); pm_runtime_put_autosuspend(&pdev->dev); return 0; @@ -3277,10 +3274,8 @@ static int ravb_resume(struct device *dev) return 0; out_rpm_put: - if (!priv->wol_enabled) { - pm_runtime_mark_last_busy(dev); + if (!priv->wol_enabled) pm_runtime_put_autosuspend(dev); - } return ret; } diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c index 68507126be8e4..9f049ebbf1079 100644 --- a/drivers/net/ethernet/ti/davinci_mdio.c +++ b/drivers/net/ethernet/ti/davinci_mdio.c @@ -234,7 +234,6 @@ static int davinci_mdiobb_read_c22(struct mii_bus *bus, int phy, int reg) ret = mdiobb_read_c22(bus, phy, reg); - pm_runtime_mark_last_busy(bus->parent); pm_runtime_put_autosuspend(bus->parent); return ret; @@ -251,7 +250,6 @@ static int davinci_mdiobb_write_c22(struct mii_bus *bus, int phy, int reg, ret = mdiobb_write_c22(bus, phy, reg, val); - pm_runtime_mark_last_busy(bus->parent); pm_runtime_put_autosuspend(bus->parent); return ret; @@ -268,7 +266,6 @@ static int davinci_mdiobb_read_c45(struct mii_bus *bus, int phy, int devad, ret = mdiobb_read_c45(bus, phy, devad, reg); - pm_runtime_mark_last_busy(bus->parent); pm_runtime_put_autosuspend(bus->parent); return ret; @@ -285,7 +282,6 @@ static int davinci_mdiobb_write_c45(struct mii_bus *bus, int phy, int devad, ret = mdiobb_write_c45(bus, phy, devad, reg, val); - pm_runtime_mark_last_busy(bus->parent); pm_runtime_put_autosuspend(bus->parent); return ret; @@ -332,7 +328,6 @@ static int davinci_mdio_common_reset(struct davinci_mdio_data *data) data->bus->phy_mask = phy_mask; done: - pm_runtime_mark_last_busy(data->dev); pm_runtime_put_autosuspend(data->dev); return 0; @@ -441,7 +436,6 @@ static int davinci_mdio_read(struct mii_bus *bus, int phy_id, int phy_reg) break; } - pm_runtime_mark_last_busy(data->dev); pm_runtime_put_autosuspend(data->dev); return ret; } @@ -478,7 +472,6 @@ static int davinci_mdio_write(struct mii_bus *bus, int phy_id, break; } - pm_runtime_mark_last_busy(data->dev); pm_runtime_put_autosuspend(data->dev); return ret; From a5d937dd0ead5ff826f94eb926f26106b6d178fa Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 27 Oct 2025 13:50:20 +0200 Subject: [PATCH 384/867] net: ipa: Remove redundant pm_runtime_mark_last_busy() calls pm_runtime_put_autosuspend(), pm_runtime_put_sync_autosuspend(), pm_runtime_autosuspend() and pm_request_autosuspend() now include a call to pm_runtime_mark_last_busy(). Remove the now-reduntant explicit call to pm_runtime_mark_last_busy(). Signed-off-by: Sakari Ailus Reviewed-by: Dmitry Baryshkov Link: https://patch.msgid.link/20251027115022.390997-2-sakari.ailus@linux.intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_interrupt.c | 1 - drivers/net/ipa/ipa_main.c | 1 - drivers/net/ipa/ipa_modem.c | 4 ---- drivers/net/ipa/ipa_smp2p.c | 2 -- drivers/net/ipa/ipa_uc.c | 2 -- 5 files changed, 10 deletions(-) diff --git a/drivers/net/ipa/ipa_interrupt.c b/drivers/net/ipa/ipa_interrupt.c index 245a069970556..8336596b12476 100644 --- a/drivers/net/ipa/ipa_interrupt.c +++ b/drivers/net/ipa/ipa_interrupt.c @@ -149,7 +149,6 @@ static irqreturn_t ipa_isr_thread(int irq, void *dev_id) iowrite32(pending, ipa->reg_virt + reg_offset(reg)); } out_power_put: - pm_runtime_mark_last_busy(dev); (void)pm_runtime_put_autosuspend(dev); return IRQ_HANDLED; diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index 25500c5a6928e..95a61bae3124e 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -903,7 +903,6 @@ static int ipa_probe(struct platform_device *pdev) if (ret) goto err_deconfig; done: - pm_runtime_mark_last_busy(dev); (void)pm_runtime_put_autosuspend(dev); return 0; diff --git a/drivers/net/ipa/ipa_modem.c b/drivers/net/ipa/ipa_modem.c index 8fe0d0e1a00fd..9b136f6b8b4ae 100644 --- a/drivers/net/ipa/ipa_modem.c +++ b/drivers/net/ipa/ipa_modem.c @@ -71,7 +71,6 @@ static int ipa_open(struct net_device *netdev) netif_start_queue(netdev); - pm_runtime_mark_last_busy(dev); (void)pm_runtime_put_autosuspend(dev); return 0; @@ -102,7 +101,6 @@ static int ipa_stop(struct net_device *netdev) ipa_endpoint_disable_one(priv->rx); ipa_endpoint_disable_one(priv->tx); out_power_put: - pm_runtime_mark_last_busy(dev); (void)pm_runtime_put_autosuspend(dev); return 0; @@ -175,7 +173,6 @@ ipa_start_xmit(struct sk_buff *skb, struct net_device *netdev) ret = ipa_endpoint_skb_tx(endpoint, skb); - pm_runtime_mark_last_busy(dev); (void)pm_runtime_put_autosuspend(dev); if (ret) { @@ -432,7 +429,6 @@ static void ipa_modem_crashed(struct ipa *ipa) dev_err(dev, "error %d zeroing modem memory regions\n", ret); out_power_put: - pm_runtime_mark_last_busy(dev); (void)pm_runtime_put_autosuspend(dev); } diff --git a/drivers/net/ipa/ipa_smp2p.c b/drivers/net/ipa/ipa_smp2p.c index fcaadd111a8a3..420098796eec7 100644 --- a/drivers/net/ipa/ipa_smp2p.c +++ b/drivers/net/ipa/ipa_smp2p.c @@ -171,7 +171,6 @@ static irqreturn_t ipa_smp2p_modem_setup_ready_isr(int irq, void *dev_id) WARN(ret != 0, "error %d from ipa_setup()\n", ret); out_power_put: - pm_runtime_mark_last_busy(dev); (void)pm_runtime_put_autosuspend(dev); return IRQ_HANDLED; @@ -213,7 +212,6 @@ static void ipa_smp2p_power_release(struct ipa *ipa) if (!ipa->smp2p->power_on) return; - pm_runtime_mark_last_busy(dev); (void)pm_runtime_put_autosuspend(dev); ipa->smp2p->power_on = false; } diff --git a/drivers/net/ipa/ipa_uc.c b/drivers/net/ipa/ipa_uc.c index 2963db83ab6b0..dc7e92f2a4fbb 100644 --- a/drivers/net/ipa/ipa_uc.c +++ b/drivers/net/ipa/ipa_uc.c @@ -158,7 +158,6 @@ static void ipa_uc_response_hdlr(struct ipa *ipa) if (ipa->uc_powered) { ipa->uc_loaded = true; ipa_power_retention(ipa, true); - pm_runtime_mark_last_busy(dev); (void)pm_runtime_put_autosuspend(dev); ipa->uc_powered = false; } else { @@ -203,7 +202,6 @@ void ipa_uc_deconfig(struct ipa *ipa) if (!ipa->uc_powered) return; - pm_runtime_mark_last_busy(dev); (void)pm_runtime_put_autosuspend(dev); } From 10c7b9be47e683634535c360e75ec2a48e73b2ce Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 27 Oct 2025 13:50:22 +0200 Subject: [PATCH 385/867] net: wwan: Remove redundant pm_runtime_mark_last_busy() calls pm_runtime_put_autosuspend(), pm_runtime_put_sync_autosuspend(), pm_runtime_autosuspend() and pm_request_autosuspend() now include a call to pm_runtime_mark_last_busy(). Remove the now-reduntant explicit call to pm_runtime_mark_last_busy(). Signed-off-by: Sakari Ailus Link: https://patch.msgid.link/20251027115022.390997-4-sakari.ailus@linux.intel.com Signed-off-by: Jakub Kicinski --- drivers/net/wwan/qcom_bam_dmux.c | 2 -- drivers/net/wwan/t7xx/t7xx_hif_cldma.c | 3 --- drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c | 2 -- drivers/net/wwan/t7xx/t7xx_hif_dpmaif_tx.c | 2 -- 4 files changed, 9 deletions(-) diff --git a/drivers/net/wwan/qcom_bam_dmux.c b/drivers/net/wwan/qcom_bam_dmux.c index 64dab8b57611c..6a5b22589af48 100644 --- a/drivers/net/wwan/qcom_bam_dmux.c +++ b/drivers/net/wwan/qcom_bam_dmux.c @@ -162,7 +162,6 @@ static void bam_dmux_tx_done(struct bam_dmux_skb_dma *skb_dma) struct bam_dmux *dmux = skb_dma->dmux; unsigned long flags; - pm_runtime_mark_last_busy(dmux->dev); pm_runtime_put_autosuspend(dmux->dev); if (skb_dma->addr) @@ -397,7 +396,6 @@ static void bam_dmux_tx_wakeup_work(struct work_struct *work) dma_async_issue_pending(dmux->tx); out: - pm_runtime_mark_last_busy(dmux->dev); pm_runtime_put_autosuspend(dmux->dev); } diff --git a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c index 97163e1e5783e..689c920ca8981 100644 --- a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c +++ b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c @@ -250,7 +250,6 @@ static void t7xx_cldma_rx_done(struct work_struct *work) t7xx_cldma_clear_ip_busy(&md_ctrl->hw_info); t7xx_cldma_hw_irq_en_txrx(&md_ctrl->hw_info, queue->index, MTK_RX); t7xx_cldma_hw_irq_en_eq(&md_ctrl->hw_info, queue->index, MTK_RX); - pm_runtime_mark_last_busy(md_ctrl->dev); pm_runtime_put_autosuspend(md_ctrl->dev); } @@ -362,7 +361,6 @@ static void t7xx_cldma_tx_done(struct work_struct *work) } spin_unlock_irqrestore(&md_ctrl->cldma_lock, flags); - pm_runtime_mark_last_busy(md_ctrl->dev); pm_runtime_put_autosuspend(md_ctrl->dev); } @@ -987,7 +985,6 @@ int t7xx_cldma_send_skb(struct cldma_ctrl *md_ctrl, int qno, struct sk_buff *skb allow_sleep: t7xx_pci_enable_sleep(md_ctrl->t7xx_dev); - pm_runtime_mark_last_busy(md_ctrl->dev); pm_runtime_put_autosuspend(md_ctrl->dev); return ret; } diff --git a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c index 2310493203d3c..b76bea6ab2d76 100644 --- a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c +++ b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c @@ -877,7 +877,6 @@ int t7xx_dpmaif_napi_rx_poll(struct napi_struct *napi, const int budget) t7xx_dpmaif_clr_ip_busy_sts(&rxq->dpmaif_ctrl->hw_info); t7xx_dpmaif_dlq_unmask_rx_done(&rxq->dpmaif_ctrl->hw_info, rxq->index); t7xx_pci_enable_sleep(rxq->dpmaif_ctrl->t7xx_dev); - pm_runtime_mark_last_busy(rxq->dpmaif_ctrl->dev); pm_runtime_put_autosuspend(rxq->dpmaif_ctrl->dev); atomic_set(&rxq->rx_processing, 0); } else { @@ -1078,7 +1077,6 @@ static void t7xx_dpmaif_bat_release_work(struct work_struct *work) } t7xx_pci_enable_sleep(dpmaif_ctrl->t7xx_dev); - pm_runtime_mark_last_busy(dpmaif_ctrl->dev); pm_runtime_put_autosuspend(dpmaif_ctrl->dev); } diff --git a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_tx.c b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_tx.c index 8dab025a088a2..236d632cf591b 100644 --- a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_tx.c +++ b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_tx.c @@ -185,7 +185,6 @@ static void t7xx_dpmaif_tx_done(struct work_struct *work) } t7xx_pci_enable_sleep(dpmaif_ctrl->t7xx_dev); - pm_runtime_mark_last_busy(dpmaif_ctrl->dev); pm_runtime_put_autosuspend(dpmaif_ctrl->dev); } @@ -468,7 +467,6 @@ static int t7xx_dpmaif_tx_hw_push_thread(void *arg) t7xx_pci_disable_sleep(dpmaif_ctrl->t7xx_dev); t7xx_do_tx_hw_push(dpmaif_ctrl); t7xx_pci_enable_sleep(dpmaif_ctrl->t7xx_dev); - pm_runtime_mark_last_busy(dpmaif_ctrl->dev); pm_runtime_put_autosuspend(dpmaif_ctrl->dev); } From cf35f4347ddd42ceca156e57a87ec489788f1bd7 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 25 Oct 2025 20:35:47 +0200 Subject: [PATCH 386/867] net: stmmac: mdio: fix incorrect phy address check max_addr is the max number of addresses, not the highest possible address, therefore check phydev->mdio.addr > max_addr isn't correct. To fix this change the semantics of max_addr, so that it represents the highest possible address. IMO this is also a little bit more intuitive wrt name max_addr. Fixes: 4a107a0e8361 ("net: stmmac: mdio: use phy_find_first to simplify stmmac_mdio_register") Reported-by: Dan Carpenter Reported-by: Simon Horman Signed-off-by: Heiner Kallweit Reviewed-by: Simon Horman Link: https://patch.msgid.link/e869999b-2d4b-4dc1-9890-c2d3d1e8d0f8@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 3f8cc3293964c..1e82850f2a25c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -583,9 +583,9 @@ int stmmac_mdio_register(struct net_device *ndev) struct device_node *mdio_node = priv->plat->mdio_node; struct device *dev = ndev->dev.parent; struct fwnode_handle *fixed_node; + int max_addr = PHY_MAX_ADDR - 1; struct fwnode_handle *fwnode; struct phy_device *phydev; - int max_addr; if (!mdio_bus_data) return 0; @@ -609,15 +609,12 @@ int stmmac_mdio_register(struct net_device *ndev) if (priv->synopsys_id < DWXGMAC_CORE_2_20) { /* Right now only C22 phys are supported */ - max_addr = MII_XGMAC_MAX_C22ADDR + 1; + max_addr = MII_XGMAC_MAX_C22ADDR; /* Check if DT specified an unsupported phy addr */ if (priv->plat->phy_addr > MII_XGMAC_MAX_C22ADDR) dev_err(dev, "Unsupported phy_addr (max=%d)\n", MII_XGMAC_MAX_C22ADDR); - } else { - /* XGMAC version 2.20 onwards support 32 phy addr */ - max_addr = PHY_MAX_ADDR; } } else { new_bus->read = &stmmac_mdio_read_c22; @@ -626,8 +623,6 @@ int stmmac_mdio_register(struct net_device *ndev) new_bus->read_c45 = &stmmac_mdio_read_c45; new_bus->write_c45 = &stmmac_mdio_write_c45; } - - max_addr = PHY_MAX_ADDR; } if (mdio_bus_data->needs_reset) From 26888de97b2ffe0267c12dd4e9fcd552545903f1 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 25 Oct 2025 20:49:50 +0200 Subject: [PATCH 387/867] net: phy: add iterator mdiobus_for_each_phy Add an iterator for all PHY's on a MII bus, and phy_find_next() as a prerequisite. Signed-off-by: Heiner Kallweit Reviewed-by: Wei Fang Link: https://patch.msgid.link/cd112f15-401a-43d9-8525-9ff0965a68cd@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_device.c | 16 +++++++++------- include/linux/phy.h | 11 ++++++++++- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index b7feaf0cb1df1..737747cf19069 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1224,22 +1224,24 @@ int phy_get_c45_ids(struct phy_device *phydev) EXPORT_SYMBOL(phy_get_c45_ids); /** - * phy_find_first - finds the first PHY device on the bus + * phy_find_next - finds the next PHY device on the bus * @bus: the target MII bus + * @pos: cursor + * + * Return: next phy_device on the bus, or NULL */ -struct phy_device *phy_find_first(struct mii_bus *bus) +struct phy_device *phy_find_next(struct mii_bus *bus, struct phy_device *pos) { - struct phy_device *phydev; - int addr; + for (int addr = pos ? pos->mdio.addr + 1 : 0; + addr < PHY_MAX_ADDR; addr++) { + struct phy_device *phydev = mdiobus_get_phy(bus, addr); - for (addr = 0; addr < PHY_MAX_ADDR; addr++) { - phydev = mdiobus_get_phy(bus, addr); if (phydev) return phydev; } return NULL; } -EXPORT_SYMBOL(phy_find_first); +EXPORT_SYMBOL_GPL(phy_find_next); /** * phy_prepare_link - prepares the PHY layer to monitor link status diff --git a/include/linux/phy.h b/include/linux/phy.h index 17a2cdc9f1a0f..358dd6f0ff965 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1869,7 +1869,7 @@ int phy_sfp_probe(struct phy_device *phydev, const struct sfp_upstream_ops *ops); struct phy_device *phy_attach(struct net_device *dev, const char *bus_id, phy_interface_t interface); -struct phy_device *phy_find_first(struct mii_bus *bus); +struct phy_device *phy_find_next(struct mii_bus *bus, struct phy_device *pos); int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, u32 flags, phy_interface_t interface); int phy_connect_direct(struct net_device *dev, struct phy_device *phydev, @@ -1896,6 +1896,15 @@ bool phy_check_valid(int speed, int duplex, unsigned long *features); int phy_restart_aneg(struct phy_device *phydev); int phy_reset_after_clk_enable(struct phy_device *phydev); +static inline struct phy_device *phy_find_first(struct mii_bus *bus) +{ + return phy_find_next(bus, NULL); +} + +#define mdiobus_for_each_phy(_bus, _phydev) \ + for (_phydev = phy_find_first(_bus); _phydev; \ + _phydev = phy_find_next(_bus, _phydev)) + #if IS_ENABLED(CONFIG_PHYLIB) int phy_start_cable_test(struct phy_device *phydev, struct netlink_ext_ack *extack); From 0514010d553aa8e820edaee5227cc4f3ae183ae3 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 25 Oct 2025 20:50:41 +0200 Subject: [PATCH 388/867] net: fec: use new iterator mdiobus_for_each_phy Use new iterator mdiobus_for_each_phy() to simplify the code. Signed-off-by: Heiner Kallweit Reviewed-by: Wei Fang Link: https://patch.msgid.link/65eb9490-5666-4b4a-8d26-3fca738b1315@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec_main.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 4875101c4f3f5..024dd443bfbc3 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2548,7 +2548,6 @@ static int fec_enet_mii_init(struct platform_device *pdev) int err = -ENXIO; u32 mii_speed, holdtime; u32 bus_freq; - int addr; /* * The i.MX28 dual fec interfaces are not equal. @@ -2663,11 +2662,8 @@ static int fec_enet_mii_init(struct platform_device *pdev) of_node_put(node); /* find all the PHY devices on the bus and set mac_managed_pm to true */ - for (addr = 0; addr < PHY_MAX_ADDR; addr++) { - phydev = mdiobus_get_phy(fep->mii_bus, addr); - if (phydev) - phydev->mac_managed_pm = true; - } + mdiobus_for_each_phy(fep->mii_bus, phydev) + phydev->mac_managed_pm = true; mii_cnt++; From 4575875065dee7f60452b557230e12f45c4bb012 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 25 Oct 2025 20:52:09 +0200 Subject: [PATCH 389/867] net: davinci_mdio: use new iterator mdiobus_for_each_phy Use new iterator mdiobus_for_each_phy() to simplify the code. Reviewed-by: Siddharth Vadapalli Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/326d1337-2c22-42e3-a152-046ac5c43095@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/davinci_mdio.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c index 9f049ebbf1079..48f85a3649b26 100644 --- a/drivers/net/ethernet/ti/davinci_mdio.c +++ b/drivers/net/ethernet/ti/davinci_mdio.c @@ -541,8 +541,8 @@ static int davinci_mdio_probe(struct platform_device *pdev) struct davinci_mdio_data *data; struct resource *res; struct phy_device *phy; - int ret, addr; int autosuspend_delay_ms = -1; + int ret; data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!data) @@ -645,14 +645,10 @@ static int davinci_mdio_probe(struct platform_device *pdev) goto bail_out; /* scan and dump the bus */ - for (addr = 0; addr < PHY_MAX_ADDR; addr++) { - phy = mdiobus_get_phy(data->bus, addr); - if (phy) { - dev_info(dev, "phy[%d]: device %s, driver %s\n", - phy->mdio.addr, phydev_name(phy), - phy->drv ? phy->drv->name : "unknown"); - } - } + mdiobus_for_each_phy(data->bus, phy) + dev_info(dev, "phy[%d]: device %s, driver %s\n", + phy->mdio.addr, phydev_name(phy), + phy->drv ? phy->drv->name : "unknown"); return 0; From d4780abb8cce3692f24ea2f1cf162092075e91e8 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 25 Oct 2025 20:52:56 +0200 Subject: [PATCH 390/867] net: phy: use new iterator mdiobus_for_each_phy in mdiobus_prevent_c45_scan Use new iterator mdiobus_for_each_phy() to simplify the code. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/6d792b1e-d23d-4b7e-a94f-89c6617b620f@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mdio_bus_provider.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/net/phy/mdio_bus_provider.c b/drivers/net/phy/mdio_bus_provider.c index a2391d4b7e5c8..4b0637405740e 100644 --- a/drivers/net/phy/mdio_bus_provider.c +++ b/drivers/net/phy/mdio_bus_provider.c @@ -249,20 +249,15 @@ static int mdiobus_scan_bus_c45(struct mii_bus *bus) */ static bool mdiobus_prevent_c45_scan(struct mii_bus *bus) { - int i; + struct phy_device *phydev; - for (i = 0; i < PHY_MAX_ADDR; i++) { - struct phy_device *phydev; - u32 oui; - - phydev = mdiobus_get_phy(bus, i); - if (!phydev) - continue; - oui = phydev->phy_id >> 10; + mdiobus_for_each_phy(bus, phydev) { + u32 oui = phydev->phy_id >> 10; if (oui == MICREL_OUI) return true; } + return false; } From 0a119c68d0e6b64f460516ad832a7f171be55594 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 27 Oct 2025 13:50:21 +0200 Subject: [PATCH 391/867] net: wireless: Remove redundant pm_runtime_mark_last_busy() calls pm_runtime_put_autosuspend(), pm_runtime_put_sync_autosuspend(), pm_runtime_autosuspend() and pm_request_autosuspend() now include a call to pm_runtime_mark_last_busy(). Remove the now-reduntant explicit call to pm_runtime_mark_last_busy(). Signed-off-by: Sakari Ailus Link: https://patch.msgid.link/20251027115022.390997-3-sakari.ailus@linux.intel.com Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/wil6210/pm.c | 1 - drivers/net/wireless/ti/wl18xx/debugfs.c | 3 -- drivers/net/wireless/ti/wlcore/cmd.c | 1 - drivers/net/wireless/ti/wlcore/debugfs.c | 11 ------- drivers/net/wireless/ti/wlcore/main.c | 36 --------------------- drivers/net/wireless/ti/wlcore/scan.c | 1 - drivers/net/wireless/ti/wlcore/sysfs.c | 1 - drivers/net/wireless/ti/wlcore/testmode.c | 2 -- drivers/net/wireless/ti/wlcore/tx.c | 1 - drivers/net/wireless/ti/wlcore/vendor_cmd.c | 3 -- 10 files changed, 60 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/pm.c b/drivers/net/wireless/ath/wil6210/pm.c index f521af575e9ba..c866cfd144c76 100644 --- a/drivers/net/wireless/ath/wil6210/pm.c +++ b/drivers/net/wireless/ath/wil6210/pm.c @@ -458,6 +458,5 @@ void wil_pm_runtime_put(struct wil6210_priv *wil) { struct device *dev = wil_to_dev(wil); - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); } diff --git a/drivers/net/wireless/ti/wl18xx/debugfs.c b/drivers/net/wireless/ti/wl18xx/debugfs.c index 80fbf740fe6d7..ac756318e8ea5 100644 --- a/drivers/net/wireless/ti/wl18xx/debugfs.c +++ b/drivers/net/wireless/ti/wl18xx/debugfs.c @@ -272,7 +272,6 @@ static ssize_t radar_detection_write(struct file *file, if (ret < 0) count = ret; - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -312,7 +311,6 @@ static ssize_t dynamic_fw_traces_write(struct file *file, if (ret < 0) count = ret; - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -374,7 +372,6 @@ static ssize_t radar_debug_mode_write(struct file *file, wl->radar_debug_mode, 0); } - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); diff --git a/drivers/net/wireless/ti/wlcore/cmd.c b/drivers/net/wireless/ti/wlcore/cmd.c index fa3a3f71dd156..9d73ba933a16c 100644 --- a/drivers/net/wireless/ti/wlcore/cmd.c +++ b/drivers/net/wireless/ti/wlcore/cmd.c @@ -213,7 +213,6 @@ int wlcore_cmd_wait_for_event_or_timeout(struct wl1271 *wl, } while (!event); out: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); free_vector: kfree(events_vector); diff --git a/drivers/net/wireless/ti/wlcore/debugfs.c b/drivers/net/wireless/ti/wlcore/debugfs.c index eb3d3f0e0b4df..bbfd2725215b4 100644 --- a/drivers/net/wireless/ti/wlcore/debugfs.c +++ b/drivers/net/wireless/ti/wlcore/debugfs.c @@ -63,7 +63,6 @@ void wl1271_debugfs_update_stats(struct wl1271 *wl) wl->stats.fw_stats_update = jiffies; } - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: @@ -113,7 +112,6 @@ static void chip_op_handler(struct wl1271 *wl, unsigned long value, chip_op = arg; chip_op(wl); - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); } @@ -287,7 +285,6 @@ static ssize_t dynamic_ps_timeout_write(struct file *file, wl1271_ps_set_mode(wl, wlvif, STATION_AUTO_PS_MODE); } - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: @@ -357,7 +354,6 @@ static ssize_t forced_ps_write(struct file *file, wl1271_ps_set_mode(wl, wlvif, ps_mode); } - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: @@ -830,7 +826,6 @@ static ssize_t rx_streaming_interval_write(struct file *file, wl1271_recalc_rx_streaming(wl, wlvif); } - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -886,7 +881,6 @@ static ssize_t rx_streaming_always_write(struct file *file, wl1271_recalc_rx_streaming(wl, wlvif); } - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -934,7 +928,6 @@ static ssize_t beacon_filtering_write(struct file *file, ret = wl1271_acx_beacon_filter_opt(wl, wlvif, !!value); } - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -1015,7 +1008,6 @@ static ssize_t sleep_auth_write(struct file *file, goto out_sleep; out_sleep: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -1090,7 +1082,6 @@ static ssize_t dev_mem_read(struct file *file, goto part_err; part_err: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); skip_read: @@ -1172,7 +1163,6 @@ static ssize_t dev_mem_write(struct file *file, const char __user *user_buf, goto part_err; part_err: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); skip_write: @@ -1247,7 +1237,6 @@ static ssize_t fw_logger_write(struct file *file, ret = wl12xx_cmd_config_fwlog(wl); - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 6116a8522d960..12f0167d7380e 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -154,7 +154,6 @@ static void wl1271_rx_streaming_enable_work(struct work_struct *work) jiffies + msecs_to_jiffies(wl->conf.rx_streaming.duration)); out_sleep: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -181,7 +180,6 @@ static void wl1271_rx_streaming_disable_work(struct work_struct *work) goto out_sleep; out_sleep: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -234,7 +232,6 @@ static void wlcore_rc_update_work(struct work_struct *work) } out_sleep: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -711,7 +708,6 @@ static int wlcore_irq_locked(struct wl1271 *wl) } err_ret: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: @@ -1047,7 +1043,6 @@ static void wl1271_recovery_work(struct work_struct *work) } wlcore_op_stop_locked(wl); - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); ieee80211_restart_hw(wl->hw); @@ -1943,7 +1938,6 @@ static int __maybe_unused wl1271_op_resume(struct ieee80211_hw *hw) goto out_sleep; out_sleep: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: @@ -2131,7 +2125,6 @@ static void wlcore_channel_switch_work(struct work_struct *work) wl12xx_cmd_stop_channel_switch(wl, wlvif); - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -2201,7 +2194,6 @@ static void wlcore_pending_auth_complete_work(struct work_struct *work) /* cancel the ROC if active */ wlcore_update_inconn_sta(wl, wlvif, NULL, false); - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -2694,7 +2686,6 @@ static int wl1271_op_add_interface(struct ieee80211_hw *hw, else wl->sta_count++; out: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out_unlock: mutex_unlock(&wl->mutex); @@ -2774,7 +2765,6 @@ static void __wl1271_op_remove_interface(struct wl1271 *wl, } } - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); } deinit: @@ -3200,7 +3190,6 @@ static int wl1271_op_config(struct ieee80211_hw *hw, int radio_idx, u32 changed) } out_sleep: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: @@ -3315,7 +3304,6 @@ static void wl1271_op_configure_filter(struct ieee80211_hw *hw, */ out_sleep: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: @@ -3531,7 +3519,6 @@ static int wlcore_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, ret = wlcore_hw_set_key(wl, cmd, vif, sta, key_conf); - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out_wake_queues: @@ -3695,7 +3682,6 @@ static void wl1271_op_set_default_key_idx(struct ieee80211_hw *hw, } out_sleep: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out_unlock: @@ -3724,7 +3710,6 @@ void wlcore_regdomain_config(struct wl1271 *wl) goto out; } - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -3772,7 +3757,6 @@ static int wl1271_op_hw_scan(struct ieee80211_hw *hw, ret = wlcore_scan(hw->priv, vif, ssid, len, req); out_sleep: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -3823,7 +3807,6 @@ static void wl1271_op_cancel_hw_scan(struct ieee80211_hw *hw, ieee80211_scan_completed(wl->hw, &info); out_sleep: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -3860,7 +3843,6 @@ static int wl1271_op_sched_scan_start(struct ieee80211_hw *hw, wl->sched_vif = wlvif; out_sleep: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -3887,7 +3869,6 @@ static int wl1271_op_sched_scan_stop(struct ieee80211_hw *hw, wl->ops->sched_scan_stop(wl, wlvif); - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -3916,7 +3897,6 @@ static int wl1271_op_set_frag_threshold(struct ieee80211_hw *hw, if (ret < 0) wl1271_warning("wl1271_op_set_frag_threshold failed: %d", ret); - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: @@ -3948,7 +3928,6 @@ static int wl1271_op_set_rts_threshold(struct ieee80211_hw *hw, int radio_idx, if (ret < 0) wl1271_warning("set rts threshold failed: %d", ret); } - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: @@ -4714,7 +4693,6 @@ static void wl1271_op_bss_info_changed(struct ieee80211_hw *hw, else wl1271_bss_info_changed_sta(wl, vif, bss_conf, changed); - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: @@ -4779,7 +4757,6 @@ static void wlcore_op_change_chanctx(struct ieee80211_hw *hw, } } - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -4828,7 +4805,6 @@ static int wlcore_op_assign_vif_chanctx(struct ieee80211_hw *hw, wlvif->radar_enabled = true; } - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -4871,7 +4847,6 @@ static void wlcore_op_unassign_vif_chanctx(struct ieee80211_hw *hw, wlvif->radar_enabled = false; } - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -4941,7 +4916,6 @@ wlcore_op_switch_vif_chanctx(struct ieee80211_hw *hw, goto out_sleep; } out_sleep: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -4995,7 +4969,6 @@ static int wl1271_op_conf_tx(struct ieee80211_hw *hw, 0, 0); out_sleep: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: @@ -5029,7 +5002,6 @@ static u64 wl1271_op_get_tsf(struct ieee80211_hw *hw, goto out_sleep; out_sleep: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: @@ -5342,7 +5314,6 @@ static int wl12xx_op_sta_state(struct ieee80211_hw *hw, ret = wl12xx_update_sta_state(wl, wlvif, sta, old_state, new_state); - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -5467,7 +5438,6 @@ static int wl1271_op_ampdu_action(struct ieee80211_hw *hw, ret = -EINVAL; } - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: @@ -5511,7 +5481,6 @@ static int wl12xx_set_bitrate_mask(struct ieee80211_hw *hw, wl1271_tx_min_rate_get(wl, wlvif->basic_rate_set); ret = wl1271_acx_sta_rate_policies(wl, wlvif); - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); } out: @@ -5566,7 +5535,6 @@ static void wl12xx_op_channel_switch(struct ieee80211_hw *hw, } out_sleep: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: @@ -5645,7 +5613,6 @@ static void wlcore_op_channel_switch_beacon(struct ieee80211_hw *hw, set_bit(WLVIF_FLAG_CS_PROGRESS, &wlvif->flags); out_sleep: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -5699,7 +5666,6 @@ static int wlcore_op_remain_on_channel(struct ieee80211_hw *hw, ieee80211_queue_delayed_work(hw, &wl->roc_complete_work, msecs_to_jiffies(duration)); out_sleep: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -5748,7 +5714,6 @@ static int wlcore_roc_completed(struct wl1271 *wl) ret = __wlcore_roc_completed(wl); - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -5839,7 +5804,6 @@ static void wlcore_op_sta_statistics(struct ieee80211_hw *hw, sinfo->signal = rssi_dbm; out_sleep: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: diff --git a/drivers/net/wireless/ti/wlcore/scan.c b/drivers/net/wireless/ti/wlcore/scan.c index b414305acc322..f6dc54c1dbade 100644 --- a/drivers/net/wireless/ti/wlcore/scan.c +++ b/drivers/net/wireless/ti/wlcore/scan.c @@ -69,7 +69,6 @@ void wl1271_scan_complete_work(struct work_struct *work) wlcore_cmd_regdomain_config_locked(wl); - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); ieee80211_scan_completed(wl->hw, &info); diff --git a/drivers/net/wireless/ti/wlcore/sysfs.c b/drivers/net/wireless/ti/wlcore/sysfs.c index 65ca5dc569a04..5ab6c16836756 100644 --- a/drivers/net/wireless/ti/wlcore/sysfs.c +++ b/drivers/net/wireless/ti/wlcore/sysfs.c @@ -58,7 +58,6 @@ static ssize_t bt_coex_state_store(struct device *dev, goto out; wl1271_acx_sg_enable(wl, wl->sg_enabled); - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: diff --git a/drivers/net/wireless/ti/wlcore/testmode.c b/drivers/net/wireless/ti/wlcore/testmode.c index fc8ea58bc1659..7c0cb1b7fef0b 100644 --- a/drivers/net/wireless/ti/wlcore/testmode.c +++ b/drivers/net/wireless/ti/wlcore/testmode.c @@ -127,7 +127,6 @@ static int wl1271_tm_cmd_test(struct wl1271 *wl, struct nlattr *tb[]) } out_sleep: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -192,7 +191,6 @@ static int wl1271_tm_cmd_interrogate(struct wl1271 *wl, struct nlattr *tb[]) out_free: kfree(cmd); out_sleep: - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); diff --git a/drivers/net/wireless/ti/wlcore/tx.c b/drivers/net/wireless/ti/wlcore/tx.c index 464587d16ab20..f76087be2f758 100644 --- a/drivers/net/wireless/ti/wlcore/tx.c +++ b/drivers/net/wireless/ti/wlcore/tx.c @@ -863,7 +863,6 @@ void wl1271_tx_work(struct work_struct *work) goto out; } - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); diff --git a/drivers/net/wireless/ti/wlcore/vendor_cmd.c b/drivers/net/wireless/ti/wlcore/vendor_cmd.c index e4269e2b00981..5bb9eb300f979 100644 --- a/drivers/net/wireless/ti/wlcore/vendor_cmd.c +++ b/drivers/net/wireless/ti/wlcore/vendor_cmd.c @@ -60,7 +60,6 @@ wlcore_vendor_cmd_smart_config_start(struct wiphy *wiphy, ret = wlcore_smart_config_start(wl, nla_get_u32(tb[WLCORE_VENDOR_ATTR_GROUP_ID])); - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -92,7 +91,6 @@ wlcore_vendor_cmd_smart_config_stop(struct wiphy *wiphy, ret = wlcore_smart_config_stop(wl); - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); @@ -140,7 +138,6 @@ wlcore_vendor_cmd_smart_config_set_group_key(struct wiphy *wiphy, nla_len(tb[WLCORE_VENDOR_ATTR_GROUP_KEY]), nla_data(tb[WLCORE_VENDOR_ATTR_GROUP_KEY])); - pm_runtime_mark_last_busy(wl->dev); pm_runtime_put_autosuspend(wl->dev); out: mutex_unlock(&wl->mutex); From db82ddeaf42b93799a52df347284062893ea2ad6 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Mon, 27 Oct 2025 14:22:14 +0200 Subject: [PATCH 392/867] wifi: mac80211: add RX flag to report radiotap VHT information mac80211 already reports some basic information in the radiotap header with the known fields declared by the driver. However, drivers may want to report more accurate information and in that case the full VHT radiotap structure needs to be provided. Add a new RX_FLAG_RADIOTAP_VHT which is set when the VHT information should be pulled from the skb. Update the code to fill in the VHT fields to only do so when requested by the driver or if the information has not yet been set. This way the driver can fully control the information if it chooses so. Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251027142118.0bad1c307a21.I2cf285c20a822698039603f2af00ed9c548f2ee0@changeid Signed-off-by: Johannes Berg --- include/net/ieee80211_radiotap.h | 20 +++++- include/net/mac80211.h | 2 + net/mac80211/rx.c | 104 ++++++++++++++++++++----------- 3 files changed, 89 insertions(+), 37 deletions(-) diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index 813e163ce27cc..c60867e7e43c9 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2017 Intel Deutschland GmbH - * Copyright (c) 2018-2019, 2021-2022 Intel Corporation + * Copyright (c) 2018-2019, 2021-2022, 2025 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -202,6 +202,24 @@ enum ieee80211_radiotap_vht_coding { IEEE80211_RADIOTAP_CODING_LDPC_USER3 = 0x08, }; +enum ieee80211_radiotap_vht_bandwidth { + /* Note: more values are defined but can't really be used */ + IEEE80211_RADIOTAP_VHT_BW_20 = 0, + IEEE80211_RADIOTAP_VHT_BW_40 = 1, + IEEE80211_RADIOTAP_VHT_BW_80 = 4, + IEEE80211_RADIOTAP_VHT_BW_160 = 11, +}; + +struct ieee80211_radiotap_vht { + __le16 known; + u8 flags; + u8 bandwidth; + u8 mcs_nss[4]; + u8 coding; + u8 group_id; + __le16 partial_aid; +} __packed; + /* for IEEE80211_RADIOTAP_TIMESTAMP */ enum ieee80211_radiotap_timestamp_unit_spos { IEEE80211_RADIOTAP_TIMESTAMP_UNIT_MASK = 0x000F, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a55085cf4ec49..c326243e1f014 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1529,6 +1529,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * known the frame shouldn't be reported. * @RX_FLAG_8023: the frame has an 802.3 header (decap offload performed by * hardware or driver) + * @RX_FLAG_RADIOTAP_VHT: VHT radiotap data is present */ enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = BIT(0), @@ -1564,6 +1565,7 @@ enum mac80211_rx_flags { RX_FLAG_RADIOTAP_LSIG = BIT(28), RX_FLAG_NO_PSDU = BIT(29), RX_FLAG_8023 = BIT(30), + RX_FLAG_RADIOTAP_VHT = BIT(31), }; /** diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 4641a2a80856a..b59aeed340b31 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -59,7 +59,8 @@ static struct sk_buff *ieee80211_clean_skb(struct sk_buff *skb, status->flag &= ~(RX_FLAG_RADIOTAP_TLV_AT_END | RX_FLAG_RADIOTAP_LSIG | RX_FLAG_RADIOTAP_HE_MU | - RX_FLAG_RADIOTAP_HE); + RX_FLAG_RADIOTAP_HE | + RX_FLAG_RADIOTAP_VHT); hdr = (void *)skb->data; fc = hdr->frame_control; @@ -151,8 +152,10 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, } if (status->encoding == RX_ENC_VHT) { + /* Included even if RX_FLAG_RADIOTAP_VHT is not set */ len = ALIGN(len, 2); len += 12; + BUILD_BUG_ON(sizeof(struct ieee80211_radiotap_vht) != 12); } if (local->hw.radiotap_timestamp.units_pos >= 0) { @@ -195,6 +198,9 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, * The position to look at depends on the existence (or non- * existence) of other elements, so take that into account... */ + if (status->flag & RX_FLAG_RADIOTAP_VHT) + tlv_offset += + sizeof(struct ieee80211_radiotap_vht); if (status->flag & RX_FLAG_RADIOTAP_HE) tlv_offset += sizeof(struct ieee80211_radiotap_he); @@ -319,10 +325,17 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, u32 tlvs_len = 0; int mpdulen, chain; unsigned long chains = status->chains; + struct ieee80211_radiotap_vht vht = {}; struct ieee80211_radiotap_he he = {}; struct ieee80211_radiotap_he_mu he_mu = {}; struct ieee80211_radiotap_lsig lsig = {}; + if (status->flag & RX_FLAG_RADIOTAP_VHT) { + vht = *(struct ieee80211_radiotap_vht *)skb->data; + skb_pull(skb, sizeof(vht)); + WARN_ON_ONCE(status->encoding != RX_ENC_VHT); + } + if (status->flag & RX_FLAG_RADIOTAP_HE) { he = *(struct ieee80211_radiotap_he *)skb->data; skb_pull(skb, sizeof(he)); @@ -530,45 +543,61 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, } if (status->encoding == RX_ENC_VHT) { - u16 known = local->hw.radiotap_vht_details; + u16 fill = local->hw.radiotap_vht_details; - rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_VHT)); - put_unaligned_le16(known, pos); - pos += 2; - /* flags */ - if (status->enc_flags & RX_ENC_FLAG_SHORT_GI) - *pos |= IEEE80211_RADIOTAP_VHT_FLAG_SGI; + /* Leave driver filled fields alone */ + fill &= ~le16_to_cpu(vht.known); + vht.known |= cpu_to_le16(fill); + + if (fill & IEEE80211_RADIOTAP_VHT_KNOWN_GI && + status->enc_flags & RX_ENC_FLAG_SHORT_GI) + vht.flags |= IEEE80211_RADIOTAP_VHT_FLAG_SGI; /* in VHT, STBC is binary */ - if (status->enc_flags & RX_ENC_FLAG_STBC_MASK) - *pos |= IEEE80211_RADIOTAP_VHT_FLAG_STBC; - if (status->enc_flags & RX_ENC_FLAG_BF) + if (fill & IEEE80211_RADIOTAP_VHT_KNOWN_STBC && + status->enc_flags & RX_ENC_FLAG_STBC_MASK) + vht.flags |= IEEE80211_RADIOTAP_VHT_FLAG_STBC; + if (fill & IEEE80211_RADIOTAP_VHT_KNOWN_BEAMFORMED && + status->enc_flags & RX_ENC_FLAG_BF) *pos |= IEEE80211_RADIOTAP_VHT_FLAG_BEAMFORMED; - pos++; - /* bandwidth */ - switch (status->bw) { - case RATE_INFO_BW_80: - *pos++ = 4; - break; - case RATE_INFO_BW_160: - *pos++ = 11; - break; - case RATE_INFO_BW_40: - *pos++ = 1; - break; - default: - *pos++ = 0; + + if (fill & IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH) { + switch (status->bw) { + case RATE_INFO_BW_40: + vht.bandwidth = IEEE80211_RADIOTAP_VHT_BW_40; + break; + case RATE_INFO_BW_80: + vht.bandwidth = IEEE80211_RADIOTAP_VHT_BW_80; + break; + case RATE_INFO_BW_160: + vht.bandwidth = IEEE80211_RADIOTAP_VHT_BW_160; + break; + default: + vht.bandwidth = IEEE80211_RADIOTAP_VHT_BW_20; + break; + } } - /* MCS/NSS */ - *pos = (status->rate_idx << 4) | status->nss; - pos += 4; - /* coding field */ - if (status->enc_flags & RX_ENC_FLAG_LDPC) - *pos |= IEEE80211_RADIOTAP_CODING_LDPC_USER0; - pos++; - /* group ID */ - pos++; - /* partial_aid */ - pos += 2; + + /* + * If the driver filled in mcs_nss[0], then do not touch it. + * + * Otherwise, put some information about MCS/NSS into the + * user 0 field. Note that this is not technically correct for + * an MU frame as we might have decoded a different user. + */ + if (!vht.mcs_nss[0]) { + vht.mcs_nss[0] = (status->rate_idx << 4) | status->nss; + + /* coding field */ + if (status->enc_flags & RX_ENC_FLAG_LDPC) + vht.coding |= IEEE80211_RADIOTAP_CODING_LDPC_USER0; + } + + /* ensure 2 byte alignment */ + while ((pos - (u8 *)rthdr) & 1) + pos++; + rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_VHT)); + memcpy(pos, &vht, sizeof(vht)); + pos += sizeof(vht); } if (local->hw.radiotap_timestamp.units_pos >= 0) { @@ -834,6 +863,9 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, return NULL; } + if (status->flag & RX_FLAG_RADIOTAP_VHT) + rtap_space += sizeof(struct ieee80211_radiotap_vht); + if (status->flag & RX_FLAG_RADIOTAP_HE) rtap_space += sizeof(struct ieee80211_radiotap_he); From ec81b33b23bed73d7f407dc2079867a3f271aba0 Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Mon, 27 Oct 2025 11:06:38 -0700 Subject: [PATCH 393/867] wifi: rt2x00: add nvmem eeprom support Some embedded platforms have eeproms located in flash. Add nvmem support to handle this. Support is added for PCI and SOC backends. Signed-off-by: Rosen Penev Link: https://patch.msgid.link/20251027180639.3797-1-rosenp@gmail.com Signed-off-by: Johannes Berg --- .../net/wireless/ralink/rt2x00/rt2800lib.c | 31 +++++++++++++++++++ .../net/wireless/ralink/rt2x00/rt2800lib.h | 2 ++ .../net/wireless/ralink/rt2x00/rt2800pci.c | 3 ++ .../net/wireless/ralink/rt2x00/rt2800soc.c | 6 +++- 4 files changed, 41 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c index f07152fa37255..65d0f805459ce 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include "rt2x00.h" @@ -10962,6 +10963,36 @@ int rt2800_read_eeprom_efuse(struct rt2x00_dev *rt2x00dev) } EXPORT_SYMBOL_GPL(rt2800_read_eeprom_efuse); +int rt2800_read_eeprom_nvmem(struct rt2x00_dev *rt2x00dev) +{ + struct device_node *np = rt2x00dev->dev->of_node; + unsigned int len = rt2x00dev->ops->eeprom_size; + struct nvmem_cell *cell; + const void *data; + size_t retlen; + + cell = of_nvmem_cell_get(np, "eeprom"); + if (IS_ERR(cell)) + return PTR_ERR(cell); + + data = nvmem_cell_read(cell, &retlen); + nvmem_cell_put(cell); + + if (IS_ERR(data)) + return PTR_ERR(data); + + if (retlen != len) { + dev_err(rt2x00dev->dev, "invalid eeprom size, required: 0x%04x\n", len); + kfree(data); + return -EINVAL; + } + + memcpy(rt2x00dev->eeprom, data, len); + kfree(data); + return 0; +} +EXPORT_SYMBOL_GPL(rt2800_read_eeprom_nvmem); + static u8 rt2800_get_txmixer_gain_24g(struct rt2x00_dev *rt2x00dev) { u16 word; diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.h b/drivers/net/wireless/ralink/rt2x00/rt2800lib.h index 620a3d9872cec..a3c3a751f57e3 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.h +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.h @@ -248,6 +248,8 @@ void rt2800_disable_radio(struct rt2x00_dev *rt2x00dev); int rt2800_efuse_detect(struct rt2x00_dev *rt2x00dev); int rt2800_read_eeprom_efuse(struct rt2x00_dev *rt2x00dev); +int rt2800_read_eeprom_nvmem(struct rt2x00_dev *rt2x00dev); + int rt2800_probe_hw(struct rt2x00_dev *rt2x00dev); void rt2800_get_key_seq(struct ieee80211_hw *hw, diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800pci.c b/drivers/net/wireless/ralink/rt2x00/rt2800pci.c index 14c45aba836f2..4fa14bb573add 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800pci.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800pci.c @@ -278,6 +278,9 @@ static int rt2800pci_read_eeprom(struct rt2x00_dev *rt2x00dev) { int retval; + if (!rt2800_read_eeprom_nvmem(rt2x00dev)) + return 0; + if (rt2800pci_efuse_detect(rt2x00dev)) retval = rt2800pci_read_eeprom_efuse(rt2x00dev); else diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800soc.c b/drivers/net/wireless/ralink/rt2x00/rt2800soc.c index 8f510a84e7f16..5c29201b34c8b 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800soc.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800soc.c @@ -92,8 +92,12 @@ static int rt2800soc_set_device_state(struct rt2x00_dev *rt2x00dev, static int rt2800soc_read_eeprom(struct rt2x00_dev *rt2x00dev) { - void __iomem *base_addr = ioremap(0x1F040000, EEPROM_SIZE); + void __iomem *base_addr; + if (!rt2800_read_eeprom_nvmem(rt2x00dev)) + return 0; + + base_addr = ioremap(0x1F040000, EEPROM_SIZE); if (!base_addr) return -ENOMEM; From 508dfc1f2ccdc480893332aaeb5e8d076769f7c3 Mon Sep 17 00:00:00 2001 From: Thomas Wu Date: Tue, 28 Oct 2025 10:04:42 +0530 Subject: [PATCH 394/867] wifi: mac80211: Allow HT Action frame processing on 6 GHz when HE is supported Management frames on 6 GHz do not include HT Capabilities, causing HT Action frames to be dropped in ieee80211_rx_h_action(). The current logic checks only ht_cap.ht_supported, which fails for 6 GHz radios that support only HE and EHT. Update the condition to also allow HT Action frame processing when he_cap.has_he is true. This enables support for HE dynamic SM power save as defined in IEEE Std 802.11ax-2021, section 26.14.4. Signed-off-by: Thomas Wu Signed-off-by: Aaradhana Sahu Link: https://patch.msgid.link/20251028043442.523647-1-aaradhana.sahu@oss.qualcomm.com Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index b59aeed340b31..80067ed1da2f9 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3602,8 +3602,11 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) switch (mgmt->u.action.category) { case WLAN_CATEGORY_HT: - /* reject HT action frames from stations not supporting HT */ - if (!rx->link_sta->pub->ht_cap.ht_supported) + /* reject HT action frames from stations not supporting HT + * or not HE Capable + */ + if (!rx->link_sta->pub->ht_cap.ht_supported && + !rx->link_sta->pub->he_cap.has_he) goto invalid; if (sdata->vif.type != NL80211_IFTYPE_STATION && From 70e8335485966d7d4ed85976dceab52803b151a2 Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Tue, 28 Oct 2025 23:13:39 +0530 Subject: [PATCH 395/867] wifi: zd1211rw: fix potential memory leak in __zd_usb_enable_rx() The memory allocated for urbs with kcalloc() is not freed on any error path. Fix that by freeing it in the error path. Fixes: e85d0918b54f ("[PATCH] ZyDAS ZD1211 USB-WLAN driver") Signed-off-by: Abdun Nihaal Link: https://patch.msgid.link/20251028174341.139134-1-nihaal@cse.iitm.ac.in Signed-off-by: Johannes Berg --- drivers/net/wireless/zydas/zd1211rw/zd_usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c index 2faa0de2a36ec..8ee15a15f4ca2 100644 --- a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c @@ -791,6 +791,7 @@ static int __zd_usb_enable_rx(struct zd_usb *usb) if (urbs) { for (i = 0; i < RX_URBS_COUNT; i++) free_rx_urb(urbs[i]); + kfree(urbs); } return r; } From 85708c5d5f5bd2a1e63a1fcfab6a1030cc195bfc Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Sat, 25 Oct 2025 16:02:37 +0530 Subject: [PATCH 396/867] octeontx2-af: Simplify context writing and reading to hardware Simplify NIX context reading and writing by using hardware maximum context size instead of using individual sizes of each context type. Signed-off-by: Subbaraya Sundeep Link: https://patch.msgid.link/1761388367-16579-2-git-send-email-sbhatta@marvell.com Signed-off-by: Paolo Abeni --- .../ethernet/marvell/octeontx2/af/rvu_nix.c | 44 +++++++++---------- .../marvell/octeontx2/af/rvu_struct.h | 25 ++++++++++- 2 files changed, 46 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 828316211b245..26dc0fbeafa67 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -1149,36 +1149,36 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw, case NIX_AQ_INSTOP_WRITE: if (req->ctype == NIX_AQ_CTYPE_RQ) memcpy(mask, &req->rq_mask, - sizeof(struct nix_rq_ctx_s)); + NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_SQ) memcpy(mask, &req->sq_mask, - sizeof(struct nix_sq_ctx_s)); + NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_CQ) memcpy(mask, &req->cq_mask, - sizeof(struct nix_cq_ctx_s)); + NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_RSS) memcpy(mask, &req->rss_mask, - sizeof(struct nix_rsse_s)); + NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_MCE) memcpy(mask, &req->mce_mask, - sizeof(struct nix_rx_mce_s)); + NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_BANDPROF) memcpy(mask, &req->prof_mask, - sizeof(struct nix_bandprof_s)); + NIX_MAX_CTX_SIZE); fallthrough; case NIX_AQ_INSTOP_INIT: if (req->ctype == NIX_AQ_CTYPE_RQ) - memcpy(ctx, &req->rq, sizeof(struct nix_rq_ctx_s)); + memcpy(ctx, &req->rq, NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_SQ) - memcpy(ctx, &req->sq, sizeof(struct nix_sq_ctx_s)); + memcpy(ctx, &req->sq, NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_CQ) - memcpy(ctx, &req->cq, sizeof(struct nix_cq_ctx_s)); + memcpy(ctx, &req->cq, NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_RSS) - memcpy(ctx, &req->rss, sizeof(struct nix_rsse_s)); + memcpy(ctx, &req->rss, NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_MCE) - memcpy(ctx, &req->mce, sizeof(struct nix_rx_mce_s)); + memcpy(ctx, &req->mce, NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_BANDPROF) - memcpy(ctx, &req->prof, sizeof(struct nix_bandprof_s)); + memcpy(ctx, &req->prof, NIX_MAX_CTX_SIZE); break; case NIX_AQ_INSTOP_NOP: case NIX_AQ_INSTOP_READ: @@ -1243,22 +1243,22 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw, if (req->op == NIX_AQ_INSTOP_READ) { if (req->ctype == NIX_AQ_CTYPE_RQ) memcpy(&rsp->rq, ctx, - sizeof(struct nix_rq_ctx_s)); + NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_SQ) memcpy(&rsp->sq, ctx, - sizeof(struct nix_sq_ctx_s)); + NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_CQ) memcpy(&rsp->cq, ctx, - sizeof(struct nix_cq_ctx_s)); + NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_RSS) memcpy(&rsp->rss, ctx, - sizeof(struct nix_rsse_s)); + NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_MCE) memcpy(&rsp->mce, ctx, - sizeof(struct nix_rx_mce_s)); + NIX_MAX_CTX_SIZE); else if (req->ctype == NIX_AQ_CTYPE_BANDPROF) memcpy(&rsp->prof, ctx, - sizeof(struct nix_bandprof_s)); + NIX_MAX_CTX_SIZE); } } @@ -1289,8 +1289,8 @@ static int rvu_nix_verify_aq_ctx(struct rvu *rvu, struct nix_hw *nix_hw, /* Make copy of original context & mask which are required * for resubmission */ - memcpy(&aq_req.cq_mask, &req->cq_mask, sizeof(struct nix_cq_ctx_s)); - memcpy(&aq_req.cq, &req->cq, sizeof(struct nix_cq_ctx_s)); + memcpy(&aq_req.cq_mask, &req->cq_mask, NIX_MAX_CTX_SIZE); + memcpy(&aq_req.cq, &req->cq, NIX_MAX_CTX_SIZE); /* exclude fields which HW can update */ aq_req.cq_mask.cq_err = 0; @@ -1309,7 +1309,7 @@ static int rvu_nix_verify_aq_ctx(struct rvu *rvu, struct nix_hw *nix_hw, * updated fields are masked out for request and response * comparison */ - for (word = 0; word < sizeof(struct nix_cq_ctx_s) / sizeof(u64); + for (word = 0; word < NIX_MAX_CTX_SIZE / sizeof(u64); word++) { *(u64 *)((u8 *)&aq_rsp.cq + word * 8) &= (*(u64 *)((u8 *)&aq_req.cq_mask + word * 8)); @@ -1317,7 +1317,7 @@ static int rvu_nix_verify_aq_ctx(struct rvu *rvu, struct nix_hw *nix_hw, (*(u64 *)((u8 *)&aq_req.cq_mask + word * 8)); } - if (memcmp(&aq_req.cq, &aq_rsp.cq, sizeof(struct nix_cq_ctx_s))) + if (memcmp(&aq_req.cq, &aq_rsp.cq, NIX_MAX_CTX_SIZE)) return NIX_AF_ERR_AQ_CTX_RETRY_WRITE; return 0; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h index 0596a3ac4c12b..8d41cb8f85ef5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h @@ -13,6 +13,8 @@ #define RVU_MULTI_BLK_VER 0x7ULL +#define NIX_MAX_CTX_SIZE 128 + /* RVU Block Address Enumeration */ enum rvu_block_addr_e { BLKADDR_RVUM = 0x0ULL, @@ -370,8 +372,12 @@ struct nix_cq_ctx_s { u64 qsize : 4; u64 cq_err_int : 8; u64 cq_err_int_ena : 8; + /* Ensure all context sizes are 128 bytes */ + u64 padding[12]; }; +static_assert(sizeof(struct nix_cq_ctx_s) == NIX_MAX_CTX_SIZE); + /* CN10K NIX Receive queue context structure */ struct nix_cn10k_rq_ctx_s { u64 ena : 1; @@ -460,6 +466,8 @@ struct nix_cn10k_rq_ctx_s { u64 rsvd_1023_960; /* W15 */ }; +static_assert(sizeof(struct nix_cn10k_rq_ctx_s) == NIX_MAX_CTX_SIZE); + /* CN10K NIX Send queue context structure */ struct nix_cn10k_sq_ctx_s { u64 ena : 1; @@ -523,6 +531,8 @@ struct nix_cn10k_sq_ctx_s { u64 rsvd_1023_1008 : 16; }; +static_assert(sizeof(struct nix_cn10k_sq_ctx_s) == NIX_MAX_CTX_SIZE); + /* NIX Receive queue context structure */ struct nix_rq_ctx_s { u64 ena : 1; @@ -594,6 +604,8 @@ struct nix_rq_ctx_s { u64 rsvd_1023_960; /* W15 */ }; +static_assert(sizeof(struct nix_rq_ctx_s) == NIX_MAX_CTX_SIZE); + /* NIX sqe sizes */ enum nix_maxsqesz { NIX_MAXSQESZ_W16 = 0x0, @@ -668,13 +680,18 @@ struct nix_sq_ctx_s { u64 rsvd_1023_1008 : 16; }; +static_assert(sizeof(struct nix_sq_ctx_s) == NIX_MAX_CTX_SIZE); + /* NIX Receive side scaling entry structure*/ struct nix_rsse_s { uint32_t rq : 20; uint32_t reserved_20_31 : 12; - + /* Ensure all context sizes are minimum 128 bytes */ + u64 padding[15]; }; +static_assert(sizeof(struct nix_rsse_s) == NIX_MAX_CTX_SIZE); + /* NIX receive multicast/mirror entry structure */ struct nix_rx_mce_s { uint64_t op : 2; @@ -684,8 +701,12 @@ struct nix_rx_mce_s { uint64_t rsvd_31_24 : 8; uint64_t pf_func : 16; uint64_t next : 16; + /* Ensure all context sizes are minimum 128 bytes */ + u64 padding[15]; }; +static_assert(sizeof(struct nix_rx_mce_s) == NIX_MAX_CTX_SIZE); + enum nix_band_prof_layers { BAND_PROF_LEAF_LAYER = 0, BAND_PROF_INVAL_LAYER = 1, @@ -769,6 +790,8 @@ struct nix_bandprof_s { uint64_t reserved_1008_1023 : 16; }; +static_assert(sizeof(struct nix_bandprof_s) == NIX_MAX_CTX_SIZE); + enum nix_lsoalg { NIX_LSOALG_NOP, NIX_LSOALG_ADD_SEGNUM, From b5dcdde074d5560bc513007e2104f5e0a33e1726 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Sat, 25 Oct 2025 16:02:38 +0530 Subject: [PATCH 397/867] octeontx2-af: Add cn20k NIX block contexts New CN20K silicon has NIX hardware context structures different from previous silicons. Add NIX send and completion queue context definitions for cn20k. Extend NIX context handling support to cn20k. Signed-off-by: Subbaraya Sundeep Link: https://patch.msgid.link/1761388367-16579-3-git-send-email-sbhatta@marvell.com Signed-off-by: Paolo Abeni --- .../ethernet/marvell/octeontx2/af/Makefile | 2 +- .../ethernet/marvell/octeontx2/af/cn20k/nix.c | 20 ++ .../marvell/octeontx2/af/cn20k/struct.h | 205 ++++++++++++++++++ .../net/ethernet/marvell/octeontx2/af/mbox.h | 38 ++++ .../net/ethernet/marvell/octeontx2/af/rvu.h | 15 +- .../ethernet/marvell/octeontx2/af/rvu_nix.c | 10 +- 6 files changed, 286 insertions(+), 4 deletions(-) create mode 100644 drivers/net/ethernet/marvell/octeontx2/af/cn20k/nix.c diff --git a/drivers/net/ethernet/marvell/octeontx2/af/Makefile b/drivers/net/ethernet/marvell/octeontx2/af/Makefile index 532813d8d0281..cb77b978eda58 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/af/Makefile @@ -12,4 +12,4 @@ rvu_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o \ rvu_reg.o rvu_npc.o rvu_debugfs.o ptp.o rvu_npc_fs.o \ rvu_cpt.o rvu_devlink.o rpm.o rvu_cn10k.o rvu_switch.o \ rvu_sdp.o rvu_npc_hash.o mcs.o mcs_rvu_if.o mcs_cnf10kb.o \ - rvu_rep.o cn20k/mbox_init.o + rvu_rep.o cn20k/mbox_init.o cn20k/nix.o diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/nix.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/nix.c new file mode 100644 index 0000000000000..aa2016fd1bba9 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/nix.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell RVU Admin Function driver + * + * Copyright (C) 2024 Marvell. + * + */ + +#include +#include + +#include "struct.h" +#include "../rvu.h" + +int rvu_mbox_handler_nix_cn20k_aq_enq(struct rvu *rvu, + struct nix_cn20k_aq_enq_req *req, + struct nix_cn20k_aq_enq_rsp *rsp) +{ + return rvu_nix_aq_enq_inst(rvu, (struct nix_aq_enq_req *)req, + (struct nix_aq_enq_rsp *)rsp); +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/struct.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/struct.h index 76ce3ec6da9cd..ff8f0d1809c8c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/struct.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/struct.h @@ -8,6 +8,8 @@ #ifndef STRUCT_H #define STRUCT_H +#define NIX_MAX_CTX_SIZE 128 + /* * CN20k RVU PF MBOX Interrupt Vector Enumeration * @@ -37,4 +39,207 @@ enum rvu_af_cn20k_int_vec_e { RVU_AF_CN20K_INT_VEC_PFAF1_MBOX1 = 0x9, RVU_AF_CN20K_INT_VEC_CNT = 0xa, }; + +struct nix_cn20k_sq_ctx_s { + u64 ena : 1; /* W0 */ + u64 qint_idx : 6; + u64 substream : 20; + u64 sdp_mcast : 1; + u64 cq : 20; + u64 sqe_way_mask : 16; + u64 smq : 11; /* W1 */ + u64 cq_ena : 1; + u64 xoff : 1; + u64 sso_ena : 1; + u64 smq_rr_weight : 14; + u64 default_chan : 12; + u64 sqb_count : 16; + u64 reserved_120_120 : 1; + u64 smq_rr_count_lb : 7; + u64 smq_rr_count_ub : 25; /* W2 */ + u64 sqb_aura : 20; + u64 sq_int : 8; + u64 sq_int_ena : 8; + u64 sqe_stype : 2; + u64 reserved_191_191 : 1; + u64 max_sqe_size : 2; /* W3 */ + u64 cq_limit : 8; + u64 lmt_dis : 1; + u64 mnq_dis : 1; + u64 smq_next_sq : 20; + u64 smq_lso_segnum : 8; + u64 tail_offset : 6; + u64 smenq_offset : 6; + u64 head_offset : 6; + u64 smenq_next_sqb_vld : 1; + u64 smq_pend : 1; + u64 smq_next_sq_vld : 1; + u64 reserved_253_255 : 3; + u64 next_sqb : 64; /* W4 */ + u64 tail_sqb : 64; /* W5 */ + u64 smenq_sqb : 64; /* W6 */ + u64 smenq_next_sqb : 64; /* W7 */ + u64 head_sqb : 64; /* W8 */ + u64 reserved_576_583 : 8; /* W9 */ + u64 vfi_lso_total : 18; + u64 vfi_lso_sizem1 : 3; + u64 vfi_lso_sb : 8; + u64 vfi_lso_mps : 14; + u64 vfi_lso_vlan0_ins_ena : 1; + u64 vfi_lso_vlan1_ins_ena : 1; + u64 vfi_lso_vld : 1; + u64 reserved_630_639 : 10; + u64 scm_lso_rem : 18; /* W10 */ + u64 reserved_658_703 : 46; + u64 octs : 48; /* W11 */ + u64 reserved_752_767 : 16; + u64 pkts : 48; /* W12 */ + u64 reserved_816_831 : 16; + u64 aged_drop_octs : 32; /* W13 */ + u64 aged_drop_pkts : 32; + u64 dropped_octs : 48; /* W14 */ + u64 reserved_944_959 : 16; + u64 dropped_pkts : 48; /* W15 */ + u64 reserved_1008_1023 : 16; +}; + +static_assert(sizeof(struct nix_cn20k_sq_ctx_s) == NIX_MAX_CTX_SIZE); + +struct nix_cn20k_cq_ctx_s { + u64 base : 64; /* W0 */ + u64 lbp_ena : 1; /* W1 */ + u64 lbpid_low : 3; + u64 bp_ena : 1; + u64 lbpid_med : 3; + u64 bpid : 9; + u64 lbpid_high : 3; + u64 qint_idx : 7; + u64 cq_err : 1; + u64 cint_idx : 7; + u64 avg_con : 9; + u64 wrptr : 20; + u64 tail : 20; /* W2 */ + u64 head : 20; + u64 avg_level : 8; + u64 update_time : 16; + u64 bp : 8; /* W3 */ + u64 drop : 8; + u64 drop_ena : 1; + u64 ena : 1; + u64 cpt_drop_err_en : 1; + u64 reserved_211_211 : 1; + u64 msh_dst : 11; + u64 msh_valid : 1; + u64 stash_thresh : 4; + u64 lbp_frac : 4; + u64 caching : 1; + u64 stashing : 1; + u64 reserved_234_235 : 2; + u64 qsize : 4; + u64 cq_err_int : 8; + u64 cq_err_int_ena : 8; + u64 bpid_ext : 2; /* W4 */ + u64 reserved_258_259 : 2; + u64 lbpid_ext : 2; + u64 reserved_262_319 : 58; + u64 reserved_320_383 : 64; /* W5 */ + u64 reserved_384_447 : 64; /* W6 */ + u64 reserved_448_511 : 64; /* W7 */ + u64 padding[8]; +}; + +static_assert(sizeof(struct nix_cn20k_sq_ctx_s) == NIX_MAX_CTX_SIZE); + +struct nix_cn20k_rq_ctx_s { + u64 ena : 1; + u64 sso_ena : 1; + u64 ipsech_ena : 1; + u64 ena_wqwd : 1; + u64 cq : 20; + u64 reserved_24_34 : 11; + u64 port_il4_dis : 1; + u64 port_ol4_dis : 1; + u64 lenerr_dis : 1; + u64 csum_il4_dis : 1; + u64 csum_ol4_dis : 1; + u64 len_il4_dis : 1; + u64 len_il3_dis : 1; + u64 len_ol4_dis : 1; + u64 len_ol3_dis : 1; + u64 wqe_aura : 20; + u64 spb_aura : 20; + u64 lpb_aura : 20; + u64 sso_grp : 10; + u64 sso_tt : 2; + u64 pb_caching : 2; + u64 wqe_caching : 1; + u64 xqe_drop_ena : 1; + u64 spb_drop_ena : 1; + u64 lpb_drop_ena : 1; + u64 pb_stashing : 1; + u64 ipsecd_drop_en : 1; + u64 chi_ena : 1; + u64 reserved_125_127 : 3; + u64 band_prof_id_l : 10; + u64 sso_fc_ena : 1; + u64 policer_ena : 1; + u64 spb_sizem1 : 6; + u64 wqe_skip : 2; + u64 spb_high_sizem1 : 3; + u64 spb_ena : 1; + u64 lpb_sizem1 : 12; + u64 first_skip : 7; + u64 reserved_171_171 : 1; + u64 later_skip : 6; + u64 xqe_imm_size : 6; + u64 band_prof_id_h : 4; + u64 reserved_188_189 : 2; + u64 xqe_imm_copy : 1; + u64 xqe_hdr_split : 1; + u64 xqe_drop : 8; + u64 xqe_pass : 8; + u64 wqe_pool_drop : 8; + u64 wqe_pool_pass : 8; + u64 spb_aura_drop : 8; + u64 spb_aura_pass : 8; + u64 spb_pool_drop : 8; + u64 spb_pool_pass : 8; + u64 lpb_aura_drop : 8; + u64 lpb_aura_pass : 8; + u64 lpb_pool_drop : 8; + u64 lpb_pool_pass : 8; + u64 reserved_288_291 : 4; + u64 rq_int : 8; + u64 rq_int_ena : 8; + u64 qint_idx : 7; + u64 reserved_315_319 : 5; + u64 ltag : 24; + u64 good_utag : 8; + u64 bad_utag : 8; + u64 flow_tagw : 6; + u64 ipsec_vwqe : 1; + u64 vwqe_ena : 1; + u64 vtime_wait : 8; + u64 max_vsize_exp : 4; + u64 vwqe_skip : 2; + u64 reserved_382_383 : 2; + u64 octs : 48; + u64 reserved_432_447 : 16; + u64 pkts : 48; + u64 reserved_496_511 : 16; + u64 drop_octs : 48; + u64 reserved_560_575 : 16; + u64 drop_pkts : 48; + u64 reserved_624_639 : 16; + u64 re_pkts : 48; + u64 reserved_688_703 : 16; + u64 reserved_704_767 : 64; + u64 reserved_768_831 : 64; + u64 reserved_832_895 : 64; + u64 reserved_896_959 : 64; + u64 reserved_960_1023 : 64; +}; + +static_assert(sizeof(struct nix_cn20k_rq_ctx_s) == NIX_MAX_CTX_SIZE); + #endif diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 933073cd22804..01086c52e78d4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -336,6 +336,8 @@ M(NIX_MCAST_GRP_UPDATE, 0x802d, nix_mcast_grp_update, \ nix_mcast_grp_update_req, \ nix_mcast_grp_update_rsp) \ M(NIX_LF_STATS, 0x802e, nix_lf_stats, nix_stats_req, nix_stats_rsp) \ +M(NIX_CN20K_AQ_ENQ, 0x802f, nix_cn20k_aq_enq, nix_cn20k_aq_enq_req, \ + nix_cn20k_aq_enq_rsp) \ /* MCS mbox IDs (range 0xA000 - 0xBFFF) */ \ M(MCS_ALLOC_RESOURCES, 0xa000, mcs_alloc_resources, mcs_alloc_rsrc_req, \ mcs_alloc_rsrc_rsp) \ @@ -940,6 +942,42 @@ struct nix_lf_free_req { u64 flags; }; +/* CN20K NIX AQ enqueue msg */ +struct nix_cn20k_aq_enq_req { + struct mbox_msghdr hdr; + u32 qidx; + u8 ctype; + u8 op; + union { + struct nix_cn20k_rq_ctx_s rq; + struct nix_cn20k_sq_ctx_s sq; + struct nix_cn20k_cq_ctx_s cq; + struct nix_rsse_s rss; + struct nix_rx_mce_s mce; + struct nix_bandprof_s prof; + }; + union { + struct nix_cn20k_rq_ctx_s rq_mask; + struct nix_cn20k_sq_ctx_s sq_mask; + struct nix_cn20k_cq_ctx_s cq_mask; + struct nix_rsse_s rss_mask; + struct nix_rx_mce_s mce_mask; + struct nix_bandprof_s prof_mask; + }; +}; + +struct nix_cn20k_aq_enq_rsp { + struct mbox_msghdr hdr; + union { + struct nix_cn20k_rq_ctx_s rq; + struct nix_cn20k_sq_ctx_s sq; + struct nix_cn20k_cq_ctx_s cq; + struct nix_rsse_s rss; + struct nix_rx_mce_s mce; + struct nix_bandprof_s prof; + }; +}; + /* CN10K NIX AQ enqueue msg */ struct nix_cn10k_aq_enq_req { struct mbox_msghdr hdr; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index b582833419232..e85dac2c806d9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -498,6 +498,14 @@ struct channel_fwdata { u8 reserved[RVU_CHANL_INFO_RESERVED]; }; +struct altaf_intr_notify { + unsigned long flr_pf_bmap[2]; + unsigned long flr_vf_bmap[2]; + unsigned long gint_paddr; + unsigned long gint_iova_addr; + unsigned long reserved[6]; +}; + struct rvu_fwdata { #define RVU_FWDATA_HEADER_MAGIC 0xCFDA /* Custom Firmware Data*/ #define RVU_FWDATA_VERSION 0x0001 @@ -517,7 +525,8 @@ struct rvu_fwdata { u32 ptp_ext_clk_rate; u32 ptp_ext_tstamp; struct channel_fwdata channel_data; -#define FWDATA_RESERVED_MEM 958 + struct altaf_intr_notify altaf_intr_info; +#define FWDATA_RESERVED_MEM 946 u64 reserved[FWDATA_RESERVED_MEM]; #define CGX_MAX 9 #define CGX_LMACS_MAX 4 @@ -648,6 +657,7 @@ struct rvu { struct mutex mbox_lock; /* Serialize mbox up and down msgs */ u16 rep_pcifunc; + bool altaf_ready; int rep_cnt; u16 *rep2pfvf_map; u8 rep_mode; @@ -1032,6 +1042,9 @@ void rvu_nix_flr_free_bpids(struct rvu *rvu, u16 pcifunc); int rvu_alloc_cint_qint_mem(struct rvu *rvu, struct rvu_pfvf *pfvf, int blkaddr, int nixlf); void rvu_block_bcast_xon(struct rvu *rvu, int blkaddr); +int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req, + struct nix_aq_enq_rsp *rsp); + /* NPC APIs */ void rvu_npc_freemem(struct rvu *rvu); int rvu_npc_get_pkind(struct rvu *rvu, u16 pf); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 26dc0fbeafa67..d156d124f0796 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -1019,6 +1019,12 @@ static void nix_get_aq_req_smq(struct rvu *rvu, struct nix_aq_enq_req *req, { struct nix_cn10k_aq_enq_req *aq_req; + if (is_cn20k(rvu->pdev)) { + *smq = ((struct nix_cn20k_aq_enq_req *)req)->sq.smq; + *smq_mask = ((struct nix_cn20k_aq_enq_req *)req)->sq_mask.smq; + return; + } + if (!is_rvu_otx2(rvu)) { aq_req = (struct nix_cn10k_aq_enq_req *)req; *smq = aq_req->sq.smq; @@ -1323,8 +1329,8 @@ static int rvu_nix_verify_aq_ctx(struct rvu *rvu, struct nix_hw *nix_hw, return 0; } -static int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req, - struct nix_aq_enq_rsp *rsp) +int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req, + struct nix_aq_enq_rsp *rsp) { struct nix_hw *nix_hw; int err, retries = 5; From 45229e9a9ab5edf089d4c16efdf690a06d11a9bf Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Sat, 25 Oct 2025 16:02:39 +0530 Subject: [PATCH 398/867] octeontx2-af: Extend debugfs support for cn20k NIX Extend debugfs to display CN20K NIX send, receive and completion queue contexts. Signed-off-by: Subbaraya Sundeep Link: https://patch.msgid.link/1761388367-16579-4-git-send-email-sbhatta@marvell.com Signed-off-by: Paolo Abeni --- .../ethernet/marvell/octeontx2/af/Makefile | 2 +- .../marvell/octeontx2/af/cn20k/debugfs.c | 134 ++++++++++++++++++ .../marvell/octeontx2/af/cn20k/debugfs.h | 24 ++++ .../marvell/octeontx2/af/rvu_debugfs.c | 17 +++ 4 files changed, 176 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c create mode 100644 drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.h diff --git a/drivers/net/ethernet/marvell/octeontx2/af/Makefile b/drivers/net/ethernet/marvell/octeontx2/af/Makefile index cb77b978eda58..57eeaa116116d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/af/Makefile @@ -12,4 +12,4 @@ rvu_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o \ rvu_reg.o rvu_npc.o rvu_debugfs.o ptp.o rvu_npc_fs.o \ rvu_cpt.o rvu_devlink.o rpm.o rvu_cn10k.o rvu_switch.o \ rvu_sdp.o rvu_npc_hash.o mcs.o mcs_rvu_if.o mcs_cnf10kb.o \ - rvu_rep.o cn20k/mbox_init.o cn20k/nix.o + rvu_rep.o cn20k/mbox_init.o cn20k/nix.o cn20k/debugfs.o diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c new file mode 100644 index 0000000000000..50b1bd1d2c865 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell RVU Admin Function driver + * + * Copyright (C) 2024 Marvell. + * + */ + +#include +#include +#include +#include + +#include "struct.h" +#include "debugfs.h" + +void print_nix_cn20k_sq_ctx(struct seq_file *m, + struct nix_cn20k_sq_ctx_s *sq_ctx) +{ + seq_printf(m, "W0: ena \t\t\t%d\nW0: qint_idx \t\t\t%d\n", + sq_ctx->ena, sq_ctx->qint_idx); + seq_printf(m, "W0: substream \t\t\t0x%03x\nW0: sdp_mcast \t\t\t%d\n", + sq_ctx->substream, sq_ctx->sdp_mcast); + seq_printf(m, "W0: cq \t\t\t\t%d\nW0: sqe_way_mask \t\t%d\n\n", + sq_ctx->cq, sq_ctx->sqe_way_mask); + + seq_printf(m, "W1: smq \t\t\t%d\nW1: cq_ena \t\t\t%d\nW1: xoff\t\t\t%d\n", + sq_ctx->smq, sq_ctx->cq_ena, sq_ctx->xoff); + seq_printf(m, "W1: sso_ena \t\t\t%d\nW1: smq_rr_weight\t\t%d\n", + sq_ctx->sso_ena, sq_ctx->smq_rr_weight); + seq_printf(m, "W1: default_chan\t\t%d\nW1: sqb_count\t\t\t%d\n\n", + sq_ctx->default_chan, sq_ctx->sqb_count); + + seq_printf(m, "W1: smq_rr_count_lb \t\t%d\n", sq_ctx->smq_rr_count_lb); + seq_printf(m, "W2: smq_rr_count_ub \t\t%d\n", sq_ctx->smq_rr_count_ub); + seq_printf(m, "W2: sqb_aura \t\t\t%d\nW2: sq_int \t\t\t%d\n", + sq_ctx->sqb_aura, sq_ctx->sq_int); + seq_printf(m, "W2: sq_int_ena \t\t\t%d\nW2: sqe_stype \t\t\t%d\n", + sq_ctx->sq_int_ena, sq_ctx->sqe_stype); + + seq_printf(m, "W3: max_sqe_size\t\t%d\nW3: cq_limit\t\t\t%d\n", + sq_ctx->max_sqe_size, sq_ctx->cq_limit); + seq_printf(m, "W3: lmt_dis \t\t\t%d\nW3: mnq_dis \t\t\t%d\n", + sq_ctx->lmt_dis, sq_ctx->mnq_dis); + seq_printf(m, "W3: smq_next_sq\t\t\t%d\nW3: smq_lso_segnum\t\t%d\n", + sq_ctx->smq_next_sq, sq_ctx->smq_lso_segnum); + seq_printf(m, "W3: tail_offset \t\t%d\nW3: smenq_offset\t\t%d\n", + sq_ctx->tail_offset, sq_ctx->smenq_offset); + seq_printf(m, "W3: head_offset\t\t\t%d\nW3: smenq_next_sqb_vld\t\t%d\n\n", + sq_ctx->head_offset, sq_ctx->smenq_next_sqb_vld); + + seq_printf(m, "W3: smq_next_sq_vld\t\t%d\nW3: smq_pend\t\t\t%d\n", + sq_ctx->smq_next_sq_vld, sq_ctx->smq_pend); + seq_printf(m, "W4: next_sqb \t\t\t%llx\n\n", sq_ctx->next_sqb); + seq_printf(m, "W5: tail_sqb \t\t\t%llx\n\n", sq_ctx->tail_sqb); + seq_printf(m, "W6: smenq_sqb \t\t\t%llx\n\n", sq_ctx->smenq_sqb); + seq_printf(m, "W7: smenq_next_sqb \t\t%llx\n\n", + sq_ctx->smenq_next_sqb); + + seq_printf(m, "W8: head_sqb\t\t\t%llx\n\n", sq_ctx->head_sqb); + + seq_printf(m, "W9: vfi_lso_total\t\t%d\n", sq_ctx->vfi_lso_total); + seq_printf(m, "W9: vfi_lso_sizem1\t\t%d\nW9: vfi_lso_sb\t\t\t%d\n", + sq_ctx->vfi_lso_sizem1, sq_ctx->vfi_lso_sb); + seq_printf(m, "W9: vfi_lso_mps\t\t\t%d\nW9: vfi_lso_vlan0_ins_ena\t%d\n", + sq_ctx->vfi_lso_mps, sq_ctx->vfi_lso_vlan0_ins_ena); + seq_printf(m, "W9: vfi_lso_vlan1_ins_ena\t%d\nW9: vfi_lso_vld \t\t%d\n\n", + sq_ctx->vfi_lso_vld, sq_ctx->vfi_lso_vlan1_ins_ena); + + seq_printf(m, "W10: scm_lso_rem \t\t%llu\n\n", + (u64)sq_ctx->scm_lso_rem); + seq_printf(m, "W11: octs \t\t\t%llu\n\n", (u64)sq_ctx->octs); + seq_printf(m, "W12: pkts \t\t\t%llu\n\n", (u64)sq_ctx->pkts); + seq_printf(m, "W13: aged_drop_octs \t\t\t%llu\n\n", + (u64)sq_ctx->aged_drop_octs); + seq_printf(m, "W13: aged_drop_pkts \t\t\t%llu\n\n", + (u64)sq_ctx->aged_drop_pkts); + seq_printf(m, "W14: dropped_octs \t\t%llu\n\n", + (u64)sq_ctx->dropped_octs); + seq_printf(m, "W15: dropped_pkts \t\t%llu\n\n", + (u64)sq_ctx->dropped_pkts); +} + +void print_nix_cn20k_cq_ctx(struct seq_file *m, + struct nix_cn20k_aq_enq_rsp *rsp) +{ + struct nix_cn20k_cq_ctx_s *cq_ctx = &rsp->cq; + + seq_printf(m, "W0: base \t\t\t%llx\n\n", cq_ctx->base); + + seq_printf(m, "W1: wrptr \t\t\t%llx\n", (u64)cq_ctx->wrptr); + seq_printf(m, "W1: avg_con \t\t\t%d\nW1: cint_idx \t\t\t%d\n", + cq_ctx->avg_con, cq_ctx->cint_idx); + seq_printf(m, "W1: cq_err \t\t\t%d\nW1: qint_idx \t\t\t%d\n", + cq_ctx->cq_err, cq_ctx->qint_idx); + seq_printf(m, "W1: bpid \t\t\t%d\nW1: bp_ena \t\t\t%d\n\n", + cq_ctx->bpid, cq_ctx->bp_ena); + + seq_printf(m, "W1: lbpid_high \t\t\t0x%03x\n", cq_ctx->lbpid_high); + seq_printf(m, "W1: lbpid_med \t\t\t0x%03x\n", cq_ctx->lbpid_med); + seq_printf(m, "W1: lbpid_low \t\t\t0x%03x\n", cq_ctx->lbpid_low); + seq_printf(m, "(W1: lbpid) \t\t\t0x%03x\n", + cq_ctx->lbpid_high << 6 | cq_ctx->lbpid_med << 3 | + cq_ctx->lbpid_low); + seq_printf(m, "W1: lbp_ena \t\t\t\t%d\n\n", cq_ctx->lbp_ena); + + seq_printf(m, "W2: update_time \t\t%d\nW2:avg_level \t\t\t%d\n", + cq_ctx->update_time, cq_ctx->avg_level); + seq_printf(m, "W2: head \t\t\t%d\nW2:tail \t\t\t%d\n\n", + cq_ctx->head, cq_ctx->tail); + + seq_printf(m, "W3: cq_err_int_ena \t\t%d\nW3:cq_err_int \t\t\t%d\n", + cq_ctx->cq_err_int_ena, cq_ctx->cq_err_int); + seq_printf(m, "W3: qsize \t\t\t%d\nW3:stashing \t\t\t%d\n", + cq_ctx->qsize, cq_ctx->stashing); + + seq_printf(m, "W3: caching \t\t\t%d\n", cq_ctx->caching); + seq_printf(m, "W3: lbp_frac \t\t\t%d\n", cq_ctx->lbp_frac); + seq_printf(m, "W3: stash_thresh \t\t\t%d\n", + cq_ctx->stash_thresh); + + seq_printf(m, "W3: msh_valid \t\t\t%d\nW3:msh_dst \t\t\t%d\n", + cq_ctx->msh_valid, cq_ctx->msh_dst); + + seq_printf(m, "W3: cpt_drop_err_en \t\t\t%d\n", + cq_ctx->cpt_drop_err_en); + seq_printf(m, "W3: ena \t\t\t%d\n", + cq_ctx->ena); + seq_printf(m, "W3: drop_ena \t\t\t%d\nW3: drop \t\t\t%d\n", + cq_ctx->drop_ena, cq_ctx->drop); + seq_printf(m, "W3: bp \t\t\t\t%d\n\n", cq_ctx->bp); + + seq_printf(m, "W4: lbpid_ext \t\t\t\t%d\n\n", cq_ctx->lbpid_ext); + seq_printf(m, "W4: bpid_ext \t\t\t\t%d\n\n", cq_ctx->bpid_ext); +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.h new file mode 100644 index 0000000000000..9d3a98dc30005 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell OcteonTx2 CGX driver + * + * Copyright (C) 2024 Marvell. + * + */ + +#ifndef DEBUFS_H +#define DEBUFS_H + +#include +#include +#include +#include + +#include "struct.h" +#include "../mbox.h" + +void print_nix_cn20k_sq_ctx(struct seq_file *m, + struct nix_cn20k_sq_ctx_s *sq_ctx); +void print_nix_cn20k_cq_ctx(struct seq_file *m, + struct nix_cn20k_aq_enq_rsp *rsp); + +#endif diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 8375f18c8e074..eeca8cef79648 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -21,6 +21,8 @@ #include "rvu_npc_hash.h" #include "mcs.h" +#include "cn20k/debugfs.h" + #define DEBUGFS_DIR_NAME "octeontx2" enum { @@ -2009,10 +2011,16 @@ static void print_nix_sq_ctx(struct seq_file *m, struct nix_aq_enq_rsp *rsp) struct nix_hw *nix_hw = m->private; struct rvu *rvu = nix_hw->rvu; + if (is_cn20k(rvu->pdev)) { + print_nix_cn20k_sq_ctx(m, (struct nix_cn20k_sq_ctx_s *)sq_ctx); + return; + } + if (!is_rvu_otx2(rvu)) { print_nix_cn10k_sq_ctx(m, (struct nix_cn10k_sq_ctx_s *)sq_ctx); return; } + seq_printf(m, "W0: sqe_way_mask \t\t%d\nW0: cq \t\t\t\t%d\n", sq_ctx->sqe_way_mask, sq_ctx->cq); seq_printf(m, "W0: sdp_mcast \t\t\t%d\nW0: substream \t\t\t0x%03x\n", @@ -2225,6 +2233,11 @@ static void print_nix_cq_ctx(struct seq_file *m, struct nix_aq_enq_rsp *rsp) struct nix_hw *nix_hw = m->private; struct rvu *rvu = nix_hw->rvu; + if (is_cn20k(rvu->pdev)) { + print_nix_cn20k_cq_ctx(m, (struct nix_cn20k_aq_enq_rsp *)rsp); + return; + } + seq_printf(m, "W0: base \t\t\t%llx\n\n", cq_ctx->base); seq_printf(m, "W1: wrptr \t\t\t%llx\n", (u64)cq_ctx->wrptr); @@ -2254,6 +2267,7 @@ static void print_nix_cq_ctx(struct seq_file *m, struct nix_aq_enq_rsp *rsp) cq_ctx->cq_err_int_ena, cq_ctx->cq_err_int); seq_printf(m, "W3: qsize \t\t\t%d\nW3:caching \t\t\t%d\n", cq_ctx->qsize, cq_ctx->caching); + seq_printf(m, "W3: substream \t\t\t0x%03x\nW3: ena \t\t\t%d\n", cq_ctx->substream, cq_ctx->ena); if (!is_rvu_otx2(rvu)) { @@ -3950,6 +3964,9 @@ static void rvu_dbg_cpt_init(struct rvu *rvu, int blkaddr) static const char *rvu_get_dbg_dir_name(struct rvu *rvu) { + if (is_cn20k(rvu->pdev)) + return "cn20k"; + if (!is_rvu_otx2(rvu)) return "cn10k"; else From 8a8b1301277405047f73e641eadf0555f502756a Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Sat, 25 Oct 2025 16:02:40 +0530 Subject: [PATCH 399/867] octeontx2-af: Add cn20k NPA block contexts New CN20K silicon has NPA hardware context structures different from previous silicons. Add NPA aura and pool context definitions for cn20k. Extend NPA context handling support to cn20k. Signed-off-by: Linu Cherian Signed-off-by: Subbaraya Sundeep Link: https://patch.msgid.link/1761388367-16579-5-git-send-email-sbhatta@marvell.com Signed-off-by: Paolo Abeni --- .../ethernet/marvell/octeontx2/af/Makefile | 3 +- .../ethernet/marvell/octeontx2/af/cn20k/npa.c | 21 +++ .../marvell/octeontx2/af/cn20k/struct.h | 135 ++++++++++++++++++ .../net/ethernet/marvell/octeontx2/af/mbox.h | 35 +++++ 4 files changed, 193 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/marvell/octeontx2/af/cn20k/npa.c diff --git a/drivers/net/ethernet/marvell/octeontx2/af/Makefile b/drivers/net/ethernet/marvell/octeontx2/af/Makefile index 57eeaa116116d..244de500963ed 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/af/Makefile @@ -12,4 +12,5 @@ rvu_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o \ rvu_reg.o rvu_npc.o rvu_debugfs.o ptp.o rvu_npc_fs.o \ rvu_cpt.o rvu_devlink.o rpm.o rvu_cn10k.o rvu_switch.o \ rvu_sdp.o rvu_npc_hash.o mcs.o mcs_rvu_if.o mcs_cnf10kb.o \ - rvu_rep.o cn20k/mbox_init.o cn20k/nix.o cn20k/debugfs.o + rvu_rep.o cn20k/mbox_init.o cn20k/nix.o cn20k/debugfs.o \ + cn20k/npa.o diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npa.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npa.c new file mode 100644 index 0000000000000..fe8f926c8b750 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npa.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell RVU Admin Function driver + * + * Copyright (C) 2024 Marvell. + * + */ + +#include +#include + +#include "struct.h" +#include "../rvu.h" + +int rvu_mbox_handler_npa_cn20k_aq_enq(struct rvu *rvu, + struct npa_cn20k_aq_enq_req *req, + struct npa_cn20k_aq_enq_rsp *rsp) +{ + return rvu_npa_aq_enq_inst(rvu, (struct npa_aq_enq_req *)req, + (struct npa_aq_enq_rsp *)rsp); +} +EXPORT_SYMBOL(rvu_mbox_handler_npa_cn20k_aq_enq); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/struct.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/struct.h index ff8f0d1809c8c..763f6cabd7c24 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/struct.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/struct.h @@ -242,4 +242,139 @@ struct nix_cn20k_rq_ctx_s { static_assert(sizeof(struct nix_cn20k_rq_ctx_s) == NIX_MAX_CTX_SIZE); +struct npa_cn20k_aura_s { + u64 pool_addr; /* W0 */ + u64 ena : 1; /* W1 */ + u64 reserved_65 : 2; + u64 pool_caching : 1; + u64 reserved_68 : 16; + u64 avg_con : 9; + u64 reserved_93 : 1; + u64 pool_drop_ena : 1; + u64 aura_drop_ena : 1; + u64 bp_ena : 1; + u64 reserved_97_103 : 7; + u64 aura_drop : 8; + u64 shift : 6; + u64 reserved_118_119 : 2; + u64 avg_level : 8; + u64 count : 36; /* W2 */ + u64 reserved_164_167 : 4; + u64 bpid : 12; + u64 reserved_180_191 : 12; + u64 limit : 36; /* W3 */ + u64 reserved_228_231 : 4; + u64 bp : 7; + u64 reserved_239_243 : 5; + u64 fc_ena : 1; + u64 fc_up_crossing : 1; + u64 fc_stype : 2; + u64 fc_hyst_bits : 4; + u64 reserved_252_255 : 4; + u64 fc_addr; /* W4 */ + u64 pool_drop : 8; /* W5 */ + u64 update_time : 16; + u64 err_int : 8; + u64 err_int_ena : 8; + u64 thresh_int : 1; + u64 thresh_int_ena : 1; + u64 thresh_up : 1; + u64 reserved_363 : 1; + u64 thresh_qint_idx : 7; + u64 reserved_371 : 1; + u64 err_qint_idx : 7; + u64 reserved_379_383 : 5; + u64 thresh : 36; /* W6*/ + u64 rsvd_423_420 : 4; + u64 fc_msh_dst : 11; + u64 reserved_435_438 : 4; + u64 op_dpc_ena : 1; + u64 op_dpc_set : 5; + u64 reserved_445_445 : 1; + u64 stream_ctx : 1; + u64 unified_ctx : 1; + u64 reserved_448_511; /* W7 */ + u64 padding[8]; +}; + +static_assert(sizeof(struct npa_cn20k_aura_s) == NIX_MAX_CTX_SIZE); + +struct npa_cn20k_pool_s { + u64 stack_base; /* W0 */ + u64 ena : 1; + u64 nat_align : 1; + u64 reserved_66_67 : 2; + u64 stack_caching : 1; + u64 reserved_69_87 : 19; + u64 buf_offset : 12; + u64 reserved_100_103 : 4; + u64 buf_size : 12; + u64 reserved_116_119 : 4; + u64 ref_cnt_prof : 3; + u64 reserved_123_127 : 5; + u64 stack_max_pages : 32; + u64 stack_pages : 32; + uint64_t bp_0 : 7; + uint64_t bp_1 : 7; + uint64_t bp_2 : 7; + uint64_t bp_3 : 7; + uint64_t bp_4 : 7; + uint64_t bp_5 : 7; + uint64_t bp_6 : 7; + uint64_t bp_7 : 7; + uint64_t bp_ena_0 : 1; + uint64_t bp_ena_1 : 1; + uint64_t bp_ena_2 : 1; + uint64_t bp_ena_3 : 1; + uint64_t bp_ena_4 : 1; + uint64_t bp_ena_5 : 1; + uint64_t bp_ena_6 : 1; + uint64_t bp_ena_7 : 1; + u64 stack_offset : 4; + u64 reserved_260_263 : 4; + u64 shift : 6; + u64 reserved_270_271 : 2; + u64 avg_level : 8; + u64 avg_con : 9; + u64 fc_ena : 1; + u64 fc_stype : 2; + u64 fc_hyst_bits : 4; + u64 fc_up_crossing : 1; + u64 reserved_297_299 : 3; + u64 update_time : 16; + u64 reserved_316_319 : 4; + u64 fc_addr; /* W5 */ + u64 ptr_start; /* W6 */ + u64 ptr_end; /* W7 */ + u64 bpid_0 : 12; + u64 reserved_524_535 : 12; + u64 err_int : 8; + u64 err_int_ena : 8; + u64 thresh_int : 1; + u64 thresh_int_ena : 1; + u64 thresh_up : 1; + u64 reserved_555 : 1; + u64 thresh_qint_idx : 7; + u64 reserved_563 : 1; + u64 err_qint_idx : 7; + u64 reserved_571_575 : 5; + u64 thresh : 36; + u64 rsvd_612_615 : 4; + u64 fc_msh_dst : 11; + u64 reserved_627_630 : 4; + u64 op_dpc_ena : 1; + u64 op_dpc_set : 5; + u64 reserved_637_637 : 1; + u64 stream_ctx : 1; + u64 reserved_639 : 1; + u64 reserved_640_703; /* W10 */ + u64 reserved_704_767; /* W11 */ + u64 reserved_768_831; /* W12 */ + u64 reserved_832_895; /* W13 */ + u64 reserved_896_959; /* W14 */ + u64 reserved_960_1023; /* W15 */ +}; + +static_assert(sizeof(struct npa_cn20k_pool_s) == NIX_MAX_CTX_SIZE); + #endif diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 01086c52e78d4..a3e273126e4e1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -203,6 +203,8 @@ M(NPA_LF_ALLOC, 0x400, npa_lf_alloc, \ M(NPA_LF_FREE, 0x401, npa_lf_free, msg_req, msg_rsp) \ M(NPA_AQ_ENQ, 0x402, npa_aq_enq, npa_aq_enq_req, npa_aq_enq_rsp) \ M(NPA_HWCTX_DISABLE, 0x403, npa_hwctx_disable, hwctx_disable_req, msg_rsp)\ +M(NPA_CN20K_AQ_ENQ, 0x404, npa_cn20k_aq_enq, npa_cn20k_aq_enq_req, \ + npa_cn20k_aq_enq_rsp) \ /* SSO/SSOW mbox IDs (range 0x600 - 0x7FF) */ \ /* TIM mbox IDs (range 0x800 - 0x9FF) */ \ /* CPT mbox IDs (range 0xA00 - 0xBFF) */ \ @@ -834,6 +836,39 @@ struct npa_aq_enq_rsp { }; }; +struct npa_cn20k_aq_enq_req { + struct mbox_msghdr hdr; + u32 aura_id; + u8 ctype; + u8 op; + union { + /* Valid when op == WRITE/INIT and ctype == AURA. + * LF fills the pool_id in aura.pool_addr. AF will translate + * the pool_id to pool context pointer. + */ + struct npa_cn20k_aura_s aura; + /* Valid when op == WRITE/INIT and ctype == POOL */ + struct npa_cn20k_pool_s pool; + }; + /* Mask data when op == WRITE (1=write, 0=don't write) */ + union { + /* Valid when op == WRITE and ctype == AURA */ + struct npa_cn20k_aura_s aura_mask; + /* Valid when op == WRITE and ctype == POOL */ + struct npa_cn20k_pool_s pool_mask; + }; +}; + +struct npa_cn20k_aq_enq_rsp { + struct mbox_msghdr hdr; + union { + /* Valid when op == READ and ctype == AURA */ + struct npa_cn20k_aura_s aura; + /* Valid when op == READ and ctype == POOL */ + struct npa_cn20k_pool_s pool; + }; +}; + /* Disable all contexts of type 'ctype' */ struct hwctx_disable_req { struct mbox_msghdr hdr; From e4a8e78aca5e9e1bc5dc0f69a069c04fc9af943e Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Sat, 25 Oct 2025 16:02:41 +0530 Subject: [PATCH 400/867] octeontx2-af: Extend debugfs support for cn20k NPA Extend debugfs to display CN20K NPA aura and pool contexts. Signed-off-by: Linu Cherian Signed-off-by: Subbaraya Sundeep Link: https://patch.msgid.link/1761388367-16579-6-git-send-email-sbhatta@marvell.com Signed-off-by: Paolo Abeni --- .../marvell/octeontx2/af/cn20k/debugfs.c | 84 +++++++++++++++++++ .../marvell/octeontx2/af/cn20k/debugfs.h | 4 + .../marvell/octeontx2/af/rvu_debugfs.c | 10 +++ 3 files changed, 98 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c index 50b1bd1d2c865..498968bf4cf59 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c @@ -132,3 +132,87 @@ void print_nix_cn20k_cq_ctx(struct seq_file *m, seq_printf(m, "W4: lbpid_ext \t\t\t\t%d\n\n", cq_ctx->lbpid_ext); seq_printf(m, "W4: bpid_ext \t\t\t\t%d\n\n", cq_ctx->bpid_ext); } + +void print_npa_cn20k_aura_ctx(struct seq_file *m, + struct npa_cn20k_aq_enq_rsp *rsp) +{ + struct npa_cn20k_aura_s *aura = &rsp->aura; + + seq_printf(m, "W0: Pool addr\t\t%llx\n", aura->pool_addr); + + seq_printf(m, "W1: ena\t\t\t%d\nW1: pool caching\t%d\n", + aura->ena, aura->pool_caching); + seq_printf(m, "W1: avg con\t\t%d\n", aura->avg_con); + seq_printf(m, "W1: pool drop ena\t%d\nW1: aura drop ena\t%d\n", + aura->pool_drop_ena, aura->aura_drop_ena); + seq_printf(m, "W1: bp_ena\t\t%d\nW1: aura drop\t\t%d\n", + aura->bp_ena, aura->aura_drop); + seq_printf(m, "W1: aura shift\t\t%d\nW1: avg_level\t\t%d\n", + aura->shift, aura->avg_level); + + seq_printf(m, "W2: count\t\t%llu\nW2: nix_bpid\t\t%d\n", + (u64)aura->count, aura->bpid); + + seq_printf(m, "W3: limit\t\t%llu\nW3: bp\t\t\t%d\nW3: fc_ena\t\t%d\n", + (u64)aura->limit, aura->bp, aura->fc_ena); + + seq_printf(m, "W3: fc_up_crossing\t%d\nW3: fc_stype\t\t%d\n", + aura->fc_up_crossing, aura->fc_stype); + seq_printf(m, "W3: fc_hyst_bits\t%d\n", aura->fc_hyst_bits); + + seq_printf(m, "W4: fc_addr\t\t%llx\n", aura->fc_addr); + + seq_printf(m, "W5: pool_drop\t\t%d\nW5: update_time\t\t%d\n", + aura->pool_drop, aura->update_time); + seq_printf(m, "W5: err_int \t\t%d\nW5: err_int_ena\t\t%d\n", + aura->err_int, aura->err_int_ena); + seq_printf(m, "W5: thresh_int\t\t%d\nW5: thresh_int_ena \t%d\n", + aura->thresh_int, aura->thresh_int_ena); + seq_printf(m, "W5: thresh_up\t\t%d\nW5: thresh_qint_idx\t%d\n", + aura->thresh_up, aura->thresh_qint_idx); + seq_printf(m, "W5: err_qint_idx \t%d\n", aura->err_qint_idx); + + seq_printf(m, "W6: thresh\t\t%llu\n", (u64)aura->thresh); + seq_printf(m, "W6: fc_msh_dst\t\t%d\n", aura->fc_msh_dst); +} + +void print_npa_cn20k_pool_ctx(struct seq_file *m, + struct npa_cn20k_aq_enq_rsp *rsp) +{ + struct npa_cn20k_pool_s *pool = &rsp->pool; + + seq_printf(m, "W0: Stack base\t\t%llx\n", pool->stack_base); + + seq_printf(m, "W1: ena \t\t%d\nW1: nat_align \t\t%d\n", + pool->ena, pool->nat_align); + seq_printf(m, "W1: stack_caching\t%d\n", + pool->stack_caching); + seq_printf(m, "W1: buf_offset\t\t%d\nW1: buf_size\t\t%d\n", + pool->buf_offset, pool->buf_size); + + seq_printf(m, "W2: stack_max_pages \t%d\nW2: stack_pages\t\t%d\n", + pool->stack_max_pages, pool->stack_pages); + + seq_printf(m, "W4: stack_offset\t%d\nW4: shift\t\t%d\nW4: avg_level\t\t%d\n", + pool->stack_offset, pool->shift, pool->avg_level); + seq_printf(m, "W4: avg_con \t\t%d\nW4: fc_ena\t\t%d\nW4: fc_stype\t\t%d\n", + pool->avg_con, pool->fc_ena, pool->fc_stype); + seq_printf(m, "W4: fc_hyst_bits\t%d\nW4: fc_up_crossing\t%d\n", + pool->fc_hyst_bits, pool->fc_up_crossing); + seq_printf(m, "W4: update_time\t\t%d\n", pool->update_time); + + seq_printf(m, "W5: fc_addr\t\t%llx\n", pool->fc_addr); + + seq_printf(m, "W6: ptr_start\t\t%llx\n", pool->ptr_start); + + seq_printf(m, "W7: ptr_end\t\t%llx\n", pool->ptr_end); + + seq_printf(m, "W8: err_int\t\t%d\nW8: err_int_ena\t\t%d\n", + pool->err_int, pool->err_int_ena); + seq_printf(m, "W8: thresh_int\t\t%d\n", pool->thresh_int); + seq_printf(m, "W8: thresh_int_ena\t%d\nW8: thresh_up\t\t%d\n", + pool->thresh_int_ena, pool->thresh_up); + seq_printf(m, "W8: thresh_qint_idx\t%d\nW8: err_qint_idx\t%d\n", + pool->thresh_qint_idx, pool->err_qint_idx); + seq_printf(m, "W8: fc_msh_dst\t\t%d\n", pool->fc_msh_dst); +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.h index 9d3a98dc30005..a2e3a2cd6edb3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.h @@ -20,5 +20,9 @@ void print_nix_cn20k_sq_ctx(struct seq_file *m, struct nix_cn20k_sq_ctx_s *sq_ctx); void print_nix_cn20k_cq_ctx(struct seq_file *m, struct nix_cn20k_aq_enq_rsp *rsp); +void print_npa_cn20k_aura_ctx(struct seq_file *m, + struct npa_cn20k_aq_enq_rsp *rsp); +void print_npa_cn20k_pool_ctx(struct seq_file *m, + struct npa_cn20k_aq_enq_rsp *rsp); #endif diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index eeca8cef79648..c55a0f15380d0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -1103,6 +1103,11 @@ static void print_npa_aura_ctx(struct seq_file *m, struct npa_aq_enq_rsp *rsp) struct npa_aura_s *aura = &rsp->aura; struct rvu *rvu = m->private; + if (is_cn20k(rvu->pdev)) { + print_npa_cn20k_aura_ctx(m, (struct npa_cn20k_aq_enq_rsp *)rsp); + return; + } + seq_printf(m, "W0: Pool addr\t\t%llx\n", aura->pool_addr); seq_printf(m, "W1: ena\t\t\t%d\nW1: pool caching\t%d\n", @@ -1151,6 +1156,11 @@ static void print_npa_pool_ctx(struct seq_file *m, struct npa_aq_enq_rsp *rsp) struct npa_pool_s *pool = &rsp->pool; struct rvu *rvu = m->private; + if (is_cn20k(rvu->pdev)) { + print_npa_cn20k_pool_ctx(m, (struct npa_cn20k_aq_enq_rsp *)rsp); + return; + } + seq_printf(m, "W0: Stack base\t\t%llx\n", pool->stack_base); seq_printf(m, "W1: ena \t\t%d\nW1: nat_align \t\t%d\n", From a861e5809f3e7ecbfde09546f6073da34bde1b47 Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Sat, 25 Oct 2025 16:02:42 +0530 Subject: [PATCH 401/867] octeontx2-af: Skip NDC operations for cn20k For cn20k, NPA block doesn't use the general purpose NDC (Near Coprocessor Bus Data cache Unit) for caching, hence skip the NDC related operations. Also refactor NDC configuration code to a helper function. Signed-off-by: Linu Cherian Signed-off-by: Subbaraya Sundeep Link: https://patch.msgid.link/1761388367-16579-7-git-send-email-sbhatta@marvell.com Signed-off-by: Paolo Abeni --- .../marvell/octeontx2/af/rvu_debugfs.c | 3 ++ .../ethernet/marvell/octeontx2/af/rvu_npa.c | 29 ++++++++++++++----- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index c55a0f15380d0..8ab82700e826d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -2808,6 +2808,9 @@ static void rvu_dbg_npa_init(struct rvu *rvu) &rvu_dbg_npa_aura_ctx_fops); debugfs_create_file("pool_ctx", 0600, rvu->rvu_dbg.npa, rvu, &rvu_dbg_npa_pool_ctx_fops); + + if (is_cn20k(rvu->pdev)) /* NDC not appliable for cn20k */ + return; debugfs_create_file("ndc_cache", 0600, rvu->rvu_dbg.npa, rvu, &rvu_dbg_npa_ndc_cache_fops); debugfs_create_file("ndc_hits_miss", 0600, rvu->rvu_dbg.npa, rvu, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c index 4f5ca5ab13a40..e2a33e46b48ad 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c @@ -464,6 +464,23 @@ int rvu_mbox_handler_npa_lf_free(struct rvu *rvu, struct msg_req *req, return 0; } +static void npa_aq_ndc_config(struct rvu *rvu, struct rvu_block *block) +{ + u64 cfg; + + if (is_cn20k(rvu->pdev)) /* NDC not applicable to cn20k */ + return; + + /* Do not bypass NDC cache */ + cfg = rvu_read64(rvu, block->addr, NPA_AF_NDC_CFG); + cfg &= ~0x03DULL; +#ifdef CONFIG_NDC_DIS_DYNAMIC_CACHING + /* Disable caching of stack pages */ + cfg |= 0x10ULL; +#endif + rvu_write64(rvu, block->addr, NPA_AF_NDC_CFG, cfg); +} + static int npa_aq_init(struct rvu *rvu, struct rvu_block *block) { u64 cfg; @@ -479,14 +496,7 @@ static int npa_aq_init(struct rvu *rvu, struct rvu_block *block) rvu_write64(rvu, block->addr, NPA_AF_GEN_CFG, cfg); #endif - /* Do not bypass NDC cache */ - cfg = rvu_read64(rvu, block->addr, NPA_AF_NDC_CFG); - cfg &= ~0x03DULL; -#ifdef CONFIG_NDC_DIS_DYNAMIC_CACHING - /* Disable caching of stack pages */ - cfg |= 0x10ULL; -#endif - rvu_write64(rvu, block->addr, NPA_AF_NDC_CFG, cfg); + npa_aq_ndc_config(rvu, block); /* For CN10K NPA BATCH DMA set 35 cache lines */ if (!is_rvu_otx2(rvu)) { @@ -567,6 +577,9 @@ int rvu_ndc_fix_locked_cacheline(struct rvu *rvu, int blkaddr) int bank, max_bank, line, max_line, err; u64 reg, ndc_af_const; + if (is_cn20k(rvu->pdev)) /* NDC not applicable to cn20k */ + return 0; + /* Set the ENABLE bit(63) to '0' */ reg = rvu_read64(rvu, blkaddr, NDC_AF_CAMS_RD_INTERVAL); rvu_write64(rvu, blkaddr, NDC_AF_CAMS_RD_INTERVAL, reg & GENMASK_ULL(62, 0)); From d322fbd1720382a37ec4d2cf3f3eda7e59439d40 Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Sat, 25 Oct 2025 16:02:43 +0530 Subject: [PATCH 402/867] octeontx2-pf: Initialize cn20k specific aura and pool contexts With new CN20K NPA pool and aura contexts supported in AF driver this patch modifies PF driver to use new NPA contexts. Implement new hw_ops for intializing aura and pool contexts for all the silicons. Signed-off-by: Linu Cherian Signed-off-by: Subbaraya Sundeep Link: https://patch.msgid.link/1761388367-16579-8-git-send-email-sbhatta@marvell.com Signed-off-by: Paolo Abeni --- .../ethernet/marvell/octeontx2/nic/cn10k.c | 4 + .../ethernet/marvell/octeontx2/nic/cn20k.c | 186 ++++++++++++++++-- .../marvell/octeontx2/nic/otx2_common.c | 14 ++ .../marvell/octeontx2/nic/otx2_common.h | 10 + 4 files changed, 203 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c index bec7d5b4d7cc0..cab157aac2517 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c @@ -15,6 +15,8 @@ static struct dev_hw_ops otx2_hw_ops = { .aura_freeptr = otx2_aura_freeptr, .refill_pool_ptrs = otx2_refill_pool_ptrs, .pfaf_mbox_intr_handler = otx2_pfaf_mbox_intr_handler, + .aura_aq_init = otx2_aura_aq_init, + .pool_aq_init = otx2_pool_aq_init, }; static struct dev_hw_ops cn10k_hw_ops = { @@ -23,6 +25,8 @@ static struct dev_hw_ops cn10k_hw_ops = { .aura_freeptr = cn10k_aura_freeptr, .refill_pool_ptrs = cn10k_refill_pool_ptrs, .pfaf_mbox_intr_handler = otx2_pfaf_mbox_intr_handler, + .aura_aq_init = otx2_aura_aq_init, + .pool_aq_init = otx2_pool_aq_init, }; void otx2_init_hw_ops(struct otx2_nic *pfvf) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c index ec8cde98076dc..6063025824ec5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c @@ -10,17 +10,6 @@ #include "otx2_struct.h" #include "cn10k.h" -static struct dev_hw_ops cn20k_hw_ops = { - .pfaf_mbox_intr_handler = cn20k_pfaf_mbox_intr_handler, - .vfaf_mbox_intr_handler = cn20k_vfaf_mbox_intr_handler, - .pfvf_mbox_intr_handler = cn20k_pfvf_mbox_intr_handler, -}; - -void cn20k_init(struct otx2_nic *pfvf) -{ - pfvf->hw_ops = &cn20k_hw_ops; -} -EXPORT_SYMBOL(cn20k_init); /* CN20K mbox AF => PFx irq handler */ irqreturn_t cn20k_pfaf_mbox_intr_handler(int irq, void *pf_irq) { @@ -250,3 +239,178 @@ int cn20k_register_pfvf_mbox_intr(struct otx2_nic *pf, int numvfs) return 0; } + +#define RQ_BP_LVL_AURA (255 - ((85 * 256) / 100)) /* BP when 85% is full */ + +static u8 cn20k_aura_bpid_idx(struct otx2_nic *pfvf, int aura_id) +{ +#ifdef CONFIG_DCB + return pfvf->queue_to_pfc_map[aura_id]; +#else + return 0; +#endif +} + +static int cn20k_aura_aq_init(struct otx2_nic *pfvf, int aura_id, + int pool_id, int numptrs) +{ + struct npa_cn20k_aq_enq_req *aq; + struct otx2_pool *pool; + u8 bpid_idx; + int err; + + pool = &pfvf->qset.pool[pool_id]; + + /* Allocate memory for HW to update Aura count. + * Alloc one cache line, so that it fits all FC_STYPE modes. + */ + if (!pool->fc_addr) { + err = qmem_alloc(pfvf->dev, &pool->fc_addr, 1, OTX2_ALIGN); + if (err) + return err; + } + + /* Initialize this aura's context via AF */ + aq = otx2_mbox_alloc_msg_npa_cn20k_aq_enq(&pfvf->mbox); + if (!aq) { + /* Shared mbox memory buffer is full, flush it and retry */ + err = otx2_sync_mbox_msg(&pfvf->mbox); + if (err) + return err; + aq = otx2_mbox_alloc_msg_npa_cn20k_aq_enq(&pfvf->mbox); + if (!aq) + return -ENOMEM; + } + + aq->aura_id = aura_id; + + /* Will be filled by AF with correct pool context address */ + aq->aura.pool_addr = pool_id; + aq->aura.pool_caching = 1; + aq->aura.shift = ilog2(numptrs) - 8; + aq->aura.count = numptrs; + aq->aura.limit = numptrs; + aq->aura.avg_level = 255; + aq->aura.ena = 1; + aq->aura.fc_ena = 1; + aq->aura.fc_addr = pool->fc_addr->iova; + aq->aura.fc_hyst_bits = 0; /* Store count on all updates */ + + /* Enable backpressure for RQ aura */ + if (aura_id < pfvf->hw.rqpool_cnt && !is_otx2_lbkvf(pfvf->pdev)) { + aq->aura.bp_ena = 0; + /* If NIX1 LF is attached then specify NIX1_RX. + * + * Below NPA_AURA_S[BP_ENA] is set according to the + * NPA_BPINTF_E enumeration given as: + * 0x0 + a*0x1 where 'a' is 0 for NIX0_RX and 1 for NIX1_RX so + * NIX0_RX is 0x0 + 0*0x1 = 0 + * NIX1_RX is 0x0 + 1*0x1 = 1 + * But in HRM it is given that + * "NPA_AURA_S[BP_ENA](w1[33:32]) - Enable aura backpressure to + * NIX-RX based on [BP] level. One bit per NIX-RX; index + * enumerated by NPA_BPINTF_E." + */ + if (pfvf->nix_blkaddr == BLKADDR_NIX1) + aq->aura.bp_ena = 1; + + bpid_idx = cn20k_aura_bpid_idx(pfvf, aura_id); + aq->aura.bpid = pfvf->bpid[bpid_idx]; + + /* Set backpressure level for RQ's Aura */ + aq->aura.bp = RQ_BP_LVL_AURA; + } + + /* Fill AQ info */ + aq->ctype = NPA_AQ_CTYPE_AURA; + aq->op = NPA_AQ_INSTOP_INIT; + + return 0; +} + +static int cn20k_pool_aq_init(struct otx2_nic *pfvf, u16 pool_id, + int stack_pages, int numptrs, int buf_size, + int type) +{ + struct page_pool_params pp_params = { 0 }; + struct npa_cn20k_aq_enq_req *aq; + struct otx2_pool *pool; + int err, sz; + + pool = &pfvf->qset.pool[pool_id]; + /* Alloc memory for stack which is used to store buffer pointers */ + err = qmem_alloc(pfvf->dev, &pool->stack, + stack_pages, pfvf->hw.stack_pg_bytes); + if (err) + return err; + + pool->rbsize = buf_size; + + /* Initialize this pool's context via AF */ + aq = otx2_mbox_alloc_msg_npa_cn20k_aq_enq(&pfvf->mbox); + if (!aq) { + /* Shared mbox memory buffer is full, flush it and retry */ + err = otx2_sync_mbox_msg(&pfvf->mbox); + if (err) { + qmem_free(pfvf->dev, pool->stack); + return err; + } + aq = otx2_mbox_alloc_msg_npa_cn20k_aq_enq(&pfvf->mbox); + if (!aq) { + qmem_free(pfvf->dev, pool->stack); + return -ENOMEM; + } + } + + aq->aura_id = pool_id; + aq->pool.stack_base = pool->stack->iova; + aq->pool.stack_caching = 1; + aq->pool.ena = 1; + aq->pool.buf_size = buf_size / 128; + aq->pool.stack_max_pages = stack_pages; + aq->pool.shift = ilog2(numptrs) - 8; + aq->pool.ptr_start = 0; + aq->pool.ptr_end = ~0ULL; + + /* Fill AQ info */ + aq->ctype = NPA_AQ_CTYPE_POOL; + aq->op = NPA_AQ_INSTOP_INIT; + + if (type != AURA_NIX_RQ) { + pool->page_pool = NULL; + return 0; + } + + sz = ALIGN(ALIGN(SKB_DATA_ALIGN(buf_size), OTX2_ALIGN), PAGE_SIZE); + pp_params.order = get_order(sz); + pp_params.flags = PP_FLAG_DMA_MAP; + pp_params.pool_size = min(OTX2_PAGE_POOL_SZ, numptrs); + pp_params.nid = NUMA_NO_NODE; + pp_params.dev = pfvf->dev; + pp_params.dma_dir = DMA_FROM_DEVICE; + pool->page_pool = page_pool_create(&pp_params); + if (IS_ERR(pool->page_pool)) { + netdev_err(pfvf->netdev, "Creation of page pool failed\n"); + return PTR_ERR(pool->page_pool); + } + + return 0; +} + +static struct dev_hw_ops cn20k_hw_ops = { + .pfaf_mbox_intr_handler = cn20k_pfaf_mbox_intr_handler, + .vfaf_mbox_intr_handler = cn20k_vfaf_mbox_intr_handler, + .pfvf_mbox_intr_handler = cn20k_pfvf_mbox_intr_handler, + .sq_aq_init = cn10k_sq_aq_init, + .sqe_flush = cn10k_sqe_flush, + .aura_freeptr = cn10k_aura_freeptr, + .refill_pool_ptrs = cn10k_refill_pool_ptrs, + .aura_aq_init = cn20k_aura_aq_init, + .pool_aq_init = cn20k_pool_aq_init, +}; + +void cn20k_init(struct otx2_nic *pfvf) +{ + pfvf->hw_ops = &cn20k_hw_ops; +} +EXPORT_SYMBOL(cn20k_init); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index aff17c37ddde0..3378be87a4734 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -1368,6 +1368,13 @@ void otx2_aura_pool_free(struct otx2_nic *pfvf) int otx2_aura_init(struct otx2_nic *pfvf, int aura_id, int pool_id, int numptrs) +{ + return pfvf->hw_ops->aura_aq_init(pfvf, aura_id, pool_id, + numptrs); +} + +int otx2_aura_aq_init(struct otx2_nic *pfvf, int aura_id, + int pool_id, int numptrs) { struct npa_aq_enq_req *aq; struct otx2_pool *pool; @@ -1445,6 +1452,13 @@ int otx2_aura_init(struct otx2_nic *pfvf, int aura_id, int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, int stack_pages, int numptrs, int buf_size, int type) +{ + return pfvf->hw_ops->pool_aq_init(pfvf, pool_id, stack_pages, numptrs, + buf_size, type); +} + +int otx2_pool_aq_init(struct otx2_nic *pfvf, u16 pool_id, + int stack_pages, int numptrs, int buf_size, int type) { struct page_pool_params pp_params = { 0 }; struct xsk_buff_pool *xsk_pool; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index ec26d1b6c789f..e616a727a3a91 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -375,6 +376,11 @@ struct dev_hw_ops { irqreturn_t (*pfaf_mbox_intr_handler)(int irq, void *pf_irq); irqreturn_t (*vfaf_mbox_intr_handler)(int irq, void *pf_irq); irqreturn_t (*pfvf_mbox_intr_handler)(int irq, void *pf_irq); + int (*aura_aq_init)(struct otx2_nic *pfvf, int aura_id, + int pool_id, int numptrs); + int (*pool_aq_init)(struct otx2_nic *pfvf, u16 pool_id, + int stack_pages, int numptrs, int buf_size, + int type); }; #define CN10K_MCS_SA_PER_SC 4 @@ -1059,6 +1065,10 @@ irqreturn_t otx2_cq_intr_handler(int irq, void *cq_irq); int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura); int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx); int otx2_set_hw_capabilities(struct otx2_nic *pfvf); +int otx2_aura_aq_init(struct otx2_nic *pfvf, int aura_id, + int pool_id, int numptrs); +int otx2_pool_aq_init(struct otx2_nic *pfvf, u16 pool_id, + int stack_pages, int numptrs, int buf_size, int type); /* RSS configuration APIs*/ int otx2_rss_init(struct otx2_nic *pfvf); From 81f12533572d929cd503b908593c7cfd5d13717e Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Sat, 25 Oct 2025 16:02:44 +0530 Subject: [PATCH 403/867] octeontx2-pf: Initialize new NIX SQ context for cn20k cn20k has different NIX context for send queue hence use the new cn20k mailbox to init SQ context. Signed-off-by: Subbaraya Sundeep Link: https://patch.msgid.link/1761388367-16579-9-git-send-email-sbhatta@marvell.com Signed-off-by: Paolo Abeni --- .../ethernet/marvell/octeontx2/nic/cn20k.c | 36 ++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c index 6063025824ec5..a60f8cf53febb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c @@ -397,11 +397,45 @@ static int cn20k_pool_aq_init(struct otx2_nic *pfvf, u16 pool_id, return 0; } +static int cn20k_sq_aq_init(void *dev, u16 qidx, u8 chan_offset, u16 sqb_aura) +{ + struct nix_cn20k_aq_enq_req *aq; + struct otx2_nic *pfvf = dev; + + /* Get memory to put this msg */ + aq = otx2_mbox_alloc_msg_nix_cn20k_aq_enq(&pfvf->mbox); + if (!aq) + return -ENOMEM; + + aq->sq.cq = pfvf->hw.rx_queues + qidx; + aq->sq.max_sqe_size = NIX_MAXSQESZ_W16; /* 128 byte */ + aq->sq.cq_ena = 1; + aq->sq.ena = 1; + aq->sq.smq = otx2_get_smq_idx(pfvf, qidx); + aq->sq.smq_rr_weight = mtu_to_dwrr_weight(pfvf, pfvf->tx_max_pktlen); + aq->sq.default_chan = pfvf->hw.tx_chan_base + chan_offset; + aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */ + aq->sq.sqb_aura = sqb_aura; + aq->sq.sq_int_ena = NIX_SQINT_BITS; + aq->sq.qint_idx = 0; + /* Due pipelining impact minimum 2000 unused SQ CQE's + * need to maintain to avoid CQ overflow. + */ + aq->sq.cq_limit = (SEND_CQ_SKID * 256) / (pfvf->qset.sqe_cnt); + + /* Fill AQ info */ + aq->qidx = qidx; + aq->ctype = NIX_AQ_CTYPE_SQ; + aq->op = NIX_AQ_INSTOP_INIT; + + return otx2_sync_mbox_msg(&pfvf->mbox); +} + static struct dev_hw_ops cn20k_hw_ops = { .pfaf_mbox_intr_handler = cn20k_pfaf_mbox_intr_handler, .vfaf_mbox_intr_handler = cn20k_vfaf_mbox_intr_handler, .pfvf_mbox_intr_handler = cn20k_pfvf_mbox_intr_handler, - .sq_aq_init = cn10k_sq_aq_init, + .sq_aq_init = cn20k_sq_aq_init, .sqe_flush = cn10k_sqe_flush, .aura_freeptr = cn10k_aura_freeptr, .refill_pool_ptrs = cn10k_refill_pool_ptrs, From f7774633cf251b66d68695efe230fb0003a06d0a Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Sat, 25 Oct 2025 16:02:45 +0530 Subject: [PATCH 404/867] octeontx2-af: Accommodate more bandwidth profiles for cn20k CN20K has 16k of leaf profiles, 2k of middle profiles and 256 of top profiles. This patch modifies existing receive queue and bandwidth profile context structures to accommodate additional profiles of cn20k. Signed-off-by: Subbaraya Sundeep Link: https://patch.msgid.link/1761388367-16579-10-git-send-email-sbhatta@marvell.com Signed-off-by: Paolo Abeni --- .../ethernet/marvell/octeontx2/af/rvu_nix.c | 22 ++++++++++++++----- .../marvell/octeontx2/af/rvu_struct.h | 6 +++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index d156d124f0796..2f485a930edd1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -5818,6 +5818,8 @@ static void nix_ipolicer_freemem(struct rvu *rvu, struct nix_hw *nix_hw) } } +#define NIX_BW_PROF_HI_MASK GENMASK(10, 7) + static int nix_verify_bandprof(struct nix_cn10k_aq_enq_req *req, struct nix_hw *nix_hw, u16 pcifunc) { @@ -5856,7 +5858,8 @@ static int nix_verify_bandprof(struct nix_cn10k_aq_enq_req *req, return -EINVAL; ipolicer = &nix_hw->ipolicer[hi_layer]; - prof_idx = req->prof.band_prof_id; + prof_idx = FIELD_PREP(NIX_BW_PROF_HI_MASK, req->prof.band_prof_id_h); + prof_idx |= req->prof.band_prof_id; if (prof_idx >= ipolicer->band_prof.max || ipolicer->pfvf_map[prof_idx] != pcifunc) return -EINVAL; @@ -6021,8 +6024,10 @@ static int nix_ipolicer_map_leaf_midprofs(struct rvu *rvu, aq_req->op = NIX_AQ_INSTOP_WRITE; aq_req->qidx = leaf_prof; - aq_req->prof.band_prof_id = mid_prof; + aq_req->prof.band_prof_id = mid_prof & 0x7F; aq_req->prof_mask.band_prof_id = GENMASK(6, 0); + aq_req->prof.band_prof_id_h = FIELD_GET(NIX_BW_PROF_HI_MASK, mid_prof); + aq_req->prof_mask.band_prof_id_h = GENMASK(3, 0); aq_req->prof.hl_en = 1; aq_req->prof_mask.hl_en = 1; @@ -6031,6 +6036,8 @@ static int nix_ipolicer_map_leaf_midprofs(struct rvu *rvu, (struct nix_aq_enq_rsp *)aq_rsp); } +#define NIX_RQ_PROF_HI_MASK GENMASK(13, 10) + int rvu_nix_setup_ratelimit_aggr(struct rvu *rvu, u16 pcifunc, u16 rq_idx, u16 match_id) { @@ -6062,7 +6069,8 @@ int rvu_nix_setup_ratelimit_aggr(struct rvu *rvu, u16 pcifunc, return 0; /* Get the bandwidth profile ID mapped to this RQ */ - leaf_prof = aq_rsp.rq.band_prof_id; + leaf_prof = FIELD_PREP(NIX_RQ_PROF_HI_MASK, aq_rsp.rq.band_prof_id_h); + leaf_prof |= aq_rsp.rq.band_prof_id; ipolicer = &nix_hw->ipolicer[BAND_PROF_LEAF_LAYER]; ipolicer->match_id[leaf_prof] = match_id; @@ -6100,7 +6108,10 @@ int rvu_nix_setup_ratelimit_aggr(struct rvu *rvu, u16 pcifunc, * to different RQs and marked with same match_id * are rate limited in a aggregate fashion */ - mid_prof = aq_rsp.prof.band_prof_id; + mid_prof = FIELD_PREP(NIX_BW_PROF_HI_MASK, + aq_rsp.prof.band_prof_id_h); + mid_prof |= aq_rsp.prof.band_prof_id; + rc = nix_ipolicer_map_leaf_midprofs(rvu, nix_hw, &aq_req, &aq_rsp, leaf_prof, mid_prof); @@ -6222,7 +6233,8 @@ static void nix_clear_ratelimit_aggr(struct rvu *rvu, struct nix_hw *nix_hw, if (!aq_rsp.prof.hl_en) return; - mid_prof = aq_rsp.prof.band_prof_id; + mid_prof = FIELD_PREP(NIX_BW_PROF_HI_MASK, aq_rsp.prof.band_prof_id_h); + mid_prof |= aq_rsp.prof.band_prof_id; ipolicer = &nix_hw->ipolicer[BAND_PROF_MID_LAYER]; ipolicer->ref_count[mid_prof]--; /* If ref_count is zero, free mid layer profile */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h index 8d41cb8f85ef5..8e868f815de1f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h @@ -419,7 +419,8 @@ struct nix_cn10k_rq_ctx_s { u64 rsvd_171 : 1; u64 later_skip : 6; u64 xqe_imm_size : 6; - u64 rsvd_189_184 : 6; + u64 band_prof_id_h : 4; + u64 rsvd_189_188 : 2; u64 xqe_imm_copy : 1; u64 xqe_hdr_split : 1; u64 xqe_drop : 8; /* W3 */ @@ -757,7 +758,8 @@ struct nix_bandprof_s { uint64_t rc_action : 2; uint64_t meter_algo : 2; uint64_t band_prof_id : 7; - uint64_t reserved_111_118 : 8; + uint64_t band_prof_id_h : 4; + uint64_t reserved_115_118 : 4; uint64_t hl_en : 1; uint64_t reserved_120_127 : 8; uint64_t ts : 48; /* W2 */ From 47a1208776d7f57170e9061465380777a7722eb6 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Sat, 25 Oct 2025 16:02:46 +0530 Subject: [PATCH 405/867] octeontx2-af: Display new bandwidth profiles too in debugfs Consider the new profiles of cn20k too while displaying bandwidth profile contexts in debugfs. Signed-off-by: Subbaraya Sundeep Link: https://patch.msgid.link/1761388367-16579-11-git-send-email-sbhatta@marvell.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 8ab82700e826d..7370812ece2ac 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -2121,7 +2121,9 @@ static void print_nix_cn10k_rq_ctx(struct seq_file *m, seq_printf(m, "W1: ipsecd_drop_ena \t\t%d\nW1: chi_ena \t\t\t%d\n\n", rq_ctx->ipsecd_drop_ena, rq_ctx->chi_ena); - seq_printf(m, "W2: band_prof_id \t\t%d\n", rq_ctx->band_prof_id); + seq_printf(m, "W2: band_prof_id \t\t%d\n", + (u16)rq_ctx->band_prof_id_h << 10 | rq_ctx->band_prof_id); + seq_printf(m, "W2: policer_ena \t\t%d\n", rq_ctx->policer_ena); seq_printf(m, "W2: spb_sizem1 \t\t\t%d\n", rq_ctx->spb_sizem1); seq_printf(m, "W2: wqe_skip \t\t\t%d\nW2: sqb_ena \t\t\t%d\n", @@ -2639,7 +2641,10 @@ static void print_band_prof_ctx(struct seq_file *m, (prof->rc_action == 1) ? "DROP" : "RED"; seq_printf(m, "W1: rc_action\t\t%s\n", str); seq_printf(m, "W1: meter_algo\t\t%d\n", prof->meter_algo); - seq_printf(m, "W1: band_prof_id\t%d\n", prof->band_prof_id); + + seq_printf(m, "W1: band_prof_id\t%d\n", + (u16)prof->band_prof_id_h << 7 | prof->band_prof_id); + seq_printf(m, "W1: hl_en\t\t%d\n", prof->hl_en); seq_printf(m, "W2: ts\t\t\t%lld\n", (u64)prof->ts); From 33d8a1f45729b972e5e633a4296507be2e1866c3 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Sat, 25 Oct 2025 16:02:47 +0530 Subject: [PATCH 406/867] octeontx2-pf: Use new bandwidth profiles in receive queue Receive queue points to a bandwidth profile for rate limiting. Since cn20k has additional bandwidth profiles use them too while mapping receive queue to bandwidth profile. Signed-off-by: Subbaraya Sundeep Link: https://patch.msgid.link/1761388367-16579-12-git-send-email-sbhatta@marvell.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c index cab157aac2517..3e1bf22cba69c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c @@ -341,6 +341,12 @@ int cn10k_map_unmap_rq_policer(struct otx2_nic *pfvf, int rq_idx, aq->rq.band_prof_id = policer; aq->rq_mask.band_prof_id = GENMASK(9, 0); + /* If policer id is greater than 1023 then it implies hardware supports + * more leaf profiles. In that case use band_prof_id_h for 4 MSBs. + */ + aq->rq.band_prof_id_h = policer >> 10; + aq->rq_mask.band_prof_id_h = GENMASK(3, 0); + /* Fill AQ info */ aq->qidx = rq_idx; aq->ctype = NIX_AQ_CTYPE_RQ; From 320d80eeb22219bc1dd14780113c36e11dad7c04 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Mon, 13 Oct 2025 10:59:48 +0200 Subject: [PATCH 407/867] netfilter: nf_tables: use C99 struct initializer for nft_set_iter Use C99 struct initializer for nft_set_iter, simplifying the code and preventing future errors due to uninitialized fields if new fields are added to the struct. Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 34 ++++++++++++++++------------------ net/netfilter/nft_lookup.c | 13 +++++-------- 2 files changed, 21 insertions(+), 26 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index eed434e0a9702..f3de2f9bbebf1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5770,7 +5770,11 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding) { struct nft_set_binding *i; - struct nft_set_iter iter; + struct nft_set_iter iter = { + .genmask = nft_genmask_next(ctx->net), + .type = NFT_ITER_UPDATE, + .fn = nf_tables_bind_check_setelem, + }; if (!list_empty(&set->bindings) && nft_set_is_anonymous(set)) return -EBUSY; @@ -5785,13 +5789,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, goto bind; } - iter.genmask = nft_genmask_next(ctx->net); - iter.type = NFT_ITER_UPDATE; - iter.skip = 0; - iter.count = 0; - iter.err = 0; - iter.fn = nf_tables_bind_check_setelem; - set->ops->walk(ctx, set, &iter); if (!iter.err) iter.err = nft_set_catchall_bind_check(ctx, set); @@ -6195,7 +6192,17 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) struct nftables_pernet *nft_net; struct nft_table *table; struct nft_set *set; - struct nft_set_dump_args args; + struct nft_set_dump_args args = { + .cb = cb, + .skb = skb, + .reset = dump_ctx->reset, + .iter = { + .genmask = nft_genmask_cur(net), + .type = NFT_ITER_READ, + .skip = cb->args[0], + .fn = nf_tables_dump_setelem, + }, + }; bool set_found = false; struct nlmsghdr *nlh; struct nlattr *nest; @@ -6246,15 +6253,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) if (nest == NULL) goto nla_put_failure; - args.cb = cb; - args.skb = skb; - args.reset = dump_ctx->reset; - args.iter.genmask = nft_genmask_cur(net); - args.iter.type = NFT_ITER_READ; - args.iter.skip = cb->args[0]; - args.iter.count = 0; - args.iter.err = 0; - args.iter.fn = nf_tables_dump_setelem; set->ops->walk(&dump_ctx->ctx, set, &args.iter); if (!args.iter.err && args.iter.count == cb->args[0]) diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 58c5b14889c47..fc2d7c5d83c8e 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -246,19 +246,16 @@ static int nft_lookup_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { const struct nft_lookup *priv = nft_expr_priv(expr); - struct nft_set_iter iter; + struct nft_set_iter iter = { + .genmask = nft_genmask_next(ctx->net), + .type = NFT_ITER_UPDATE, + .fn = nft_setelem_validate, + }; if (!(priv->set->flags & NFT_SET_MAP) || priv->set->dtype != NFT_DATA_VERDICT) return 0; - iter.genmask = nft_genmask_next(ctx->net); - iter.type = NFT_ITER_UPDATE; - iter.skip = 0; - iter.count = 0; - iter.err = 0; - iter.fn = nft_setelem_validate; - priv->set->ops->walk(ctx, priv->set, &iter); if (!iter.err) iter.err = nft_set_catchall_validate(ctx, priv->set); From 2b749f257645c54f8659bddbdb5b2ede999bec00 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 21 Sep 2025 17:45:30 +0200 Subject: [PATCH 408/867] netfilter: conntrack: disable 0 value for conntrack_max setting Undocumented historical artifact inherited from ip_conntrack. If value is 0, then no limit is applied at all, conntrack table can grow to huge value, only limited by size of conntrack hashes and the kernel-internal upper limit on the hash chain lengths. This feature makes no sense; users can just set conntrack_max=2147483647 (INT_MAX). Disallow a 0 value. This will make it slightly easier to allow per-netns constraints for this value in a future patch. Signed-off-by: Florian Westphal --- net/netfilter/nf_conntrack_core.c | 2 +- net/netfilter/nf_conntrack_standalone.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 344f88295976d..0b95f226f2111 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1668,7 +1668,7 @@ __nf_conntrack_alloc(struct net *net, /* We don't want any race condition at early drop stage */ ct_count = atomic_inc_return(&cnet->count); - if (nf_conntrack_max && unlikely(ct_count > nf_conntrack_max)) { + if (unlikely(ct_count > nf_conntrack_max)) { if (!early_drop(net, hash)) { if (!conntrack_gc_work.early_drop) conntrack_gc_work.early_drop = true; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 708b79380f047..207b240b14e5d 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -648,7 +648,7 @@ static struct ctl_table nf_ct_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, + .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_INT_MAX, }, [NF_SYSCTL_CT_COUNT] = { @@ -929,7 +929,7 @@ static struct ctl_table nf_ct_netfilter_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, + .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_INT_MAX, }, }; From 57347d58a4011551e7d0e030f2f12e4d1a28feb6 Mon Sep 17 00:00:00 2001 From: "caivive (Weibiao Tu)" Date: Thu, 28 Nov 2024 20:52:04 +0800 Subject: [PATCH 409/867] netfilter: fix typo in nf_conntrack_l4proto.h comment In the comment for nf_conntrack_l4proto.h, the word "nfnetink" was incorrectly spelled. It has been corrected to "nfnetlink". Fixes a typo to enhance readability and ensure consistency. Signed-off-by: caivive (Weibiao Tu) Signed-off-by: Florian Westphal --- include/net/netfilter/nf_conntrack_l4proto.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 6929f8daf1ed0..cd5020835a6d3 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -30,7 +30,7 @@ struct nf_conntrack_l4proto { /* called by gc worker if table is full */ bool (*can_early_drop)(const struct nf_conn *ct); - /* convert protoinfo to nfnetink attributes */ + /* convert protoinfo to nfnetlink attributes */ int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla, struct nf_conn *ct, bool destroy); From aef3cdb47bbbef9fea9512ed6c02d64394449d53 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Mon, 27 Oct 2025 23:48:55 +0100 Subject: [PATCH 410/867] net/smc: make wr buffer count configurable Think SMC_WR_BUF_CNT_SEND := SMC_WR_BUF_CNT used in send context and SMC_WR_BUF_CNT_RECV := 3 * SMC_WR_BUF_CNT used in recv context. Those get replaced with lgr->max_send_wr and lgr->max_recv_wr respective. Please note that although with the default sysctl values qp_attr.cap.max_send_wr == qp_attr.cap.max_recv_wr is maintained but can not be assumed to be generally true any more. I see no downside to that, but my confidence level is rather modest. Signed-off-by: Halil Pasic Reviewed-by: Sidraya Jayagond Reviewed-by: Dust Li Tested-by: Mahanta Jambigi Link: https://patch.msgid.link/20251027224856.2970019-2-pasic@linux.ibm.com Signed-off-by: Paolo Abeni --- Documentation/networking/smc-sysctl.rst | 36 +++++++++++++++++++++++++ include/net/netns/smc.h | 2 ++ net/smc/smc_core.h | 6 +++++ net/smc/smc_ib.c | 10 +++---- net/smc/smc_llc.c | 2 ++ net/smc/smc_sysctl.c | 22 +++++++++++++++ net/smc/smc_sysctl.h | 2 ++ net/smc/smc_wr.c | 31 ++++++++++----------- net/smc/smc_wr.h | 2 -- 9 files changed, 91 insertions(+), 22 deletions(-) diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst index a874d007f2db5..337ac2be167e1 100644 --- a/Documentation/networking/smc-sysctl.rst +++ b/Documentation/networking/smc-sysctl.rst @@ -71,3 +71,39 @@ smcr_max_conns_per_lgr - INTEGER acceptable value ranges from 16 to 255. Only for SMC-R v2.1 and later. Default: 255 + +smcr_max_send_wr - INTEGER + So-called work request buffers are SMCR link (and RDMA queue pair) level + resources necessary for performing RDMA operations. Since up to 255 + connections can share a link group and thus also a link and the number + of the work request buffers is decided when the link is allocated, + depending on the workload it can be a bottleneck in a sense that threads + have to wait for work request buffers to become available. Before the + introduction of this control the maximal number of work request buffers + available on the send path used to be hard coded to 16. With this control + it becomes configurable. The acceptable range is between 2 and 2048. + + Please be aware that all the buffers need to be allocated as a physically + continuous array in which each element is a single buffer and has the size + of SMC_WR_BUF_SIZE (48) bytes. If the allocation fails we give up much + like before having this control. + + Default: 16 + +smcr_max_recv_wr - INTEGER + So-called work request buffers are SMCR link (and RDMA queue pair) level + resources necessary for performing RDMA operations. Since up to 255 + connections can share a link group and thus also a link and the number + of the work request buffers is decided when the link is allocated, + depending on the workload it can be a bottleneck in a sense that threads + have to wait for work request buffers to become available. Before the + introduction of this control the maximal number of work request buffers + available on the receive path used to be hard coded to 16. With this control + it becomes configurable. The acceptable range is between 2 and 2048. + + Please be aware that all the buffers need to be allocated as a physically + continuous array in which each element is a single buffer and has the size + of SMC_WR_BUF_SIZE (48) bytes. If the allocation fails we give up much + like before having this control. + + Default: 48 diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h index fc752a50f91b4..6ceb12baec241 100644 --- a/include/net/netns/smc.h +++ b/include/net/netns/smc.h @@ -24,5 +24,7 @@ struct netns_smc { int sysctl_rmem; int sysctl_max_links_per_lgr; int sysctl_max_conns_per_lgr; + unsigned int sysctl_smcr_max_send_wr; + unsigned int sysctl_smcr_max_recv_wr; }; #endif diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index a5a78cbff3411..8d06c8bb14e9d 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -34,6 +34,8 @@ * distributions may modify it to a value between * 16-255 as needed. */ +#define SMCR_MAX_SEND_WR_DEF 16 /* Default number of work requests per send queue */ +#define SMCR_MAX_RECV_WR_DEF 48 /* Default number of work requests per recv queue */ struct smc_lgr_list { /* list of link group definition */ struct list_head list; @@ -366,6 +368,10 @@ struct smc_link_group { /* max conn can be assigned to lgr */ u8 max_links; /* max links can be added in lgr */ + u16 max_send_wr; + /* number of WR buffers on send */ + u16 max_recv_wr; + /* number of WR buffers on recv */ }; struct { /* SMC-D */ struct smcd_gid peer_gid; diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 0052f02756ebb..1154907c5c050 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -669,11 +669,6 @@ int smc_ib_create_queue_pair(struct smc_link *lnk) .recv_cq = lnk->smcibdev->roce_cq_recv, .srq = NULL, .cap = { - /* include unsolicited rdma_writes as well, - * there are max. 2 RDMA_WRITE per 1 WR_SEND - */ - .max_send_wr = SMC_WR_BUF_CNT * 3, - .max_recv_wr = SMC_WR_BUF_CNT * 3, .max_send_sge = SMC_IB_MAX_SEND_SGE, .max_recv_sge = lnk->wr_rx_sge_cnt, .max_inline_data = 0, @@ -683,6 +678,11 @@ int smc_ib_create_queue_pair(struct smc_link *lnk) }; int rc; + /* include unsolicited rdma_writes as well, + * there are max. 2 RDMA_WRITE per 1 WR_SEND + */ + qp_attr.cap.max_send_wr = 3 * lnk->lgr->max_send_wr; + qp_attr.cap.max_recv_wr = lnk->lgr->max_recv_wr; lnk->roce_qp = ib_create_qp(lnk->roce_pd, &qp_attr); rc = PTR_ERR_OR_ZERO(lnk->roce_qp); if (IS_ERR(lnk->roce_qp)) diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index f865c58c3aa77..f5d5eb6175267 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -2157,6 +2157,8 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) init_waitqueue_head(&lgr->llc_msg_waiter); init_rwsem(&lgr->llc_conf_mutex); lgr->llc_testlink_time = READ_ONCE(net->smc.sysctl_smcr_testlink_time); + lgr->max_send_wr = (u16)(READ_ONCE(net->smc.sysctl_smcr_max_send_wr)); + lgr->max_recv_wr = (u16)(READ_ONCE(net->smc.sysctl_smcr_max_recv_wr)); } /* called after lgr was removed from lgr_list */ diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c index 2fab6456f7654..7b2471904d049 100644 --- a/net/smc/smc_sysctl.c +++ b/net/smc/smc_sysctl.c @@ -29,6 +29,8 @@ static int links_per_lgr_min = SMC_LINKS_ADD_LNK_MIN; static int links_per_lgr_max = SMC_LINKS_ADD_LNK_MAX; static int conns_per_lgr_min = SMC_CONN_PER_LGR_MIN; static int conns_per_lgr_max = SMC_CONN_PER_LGR_MAX; +static unsigned int smcr_max_wr_min = 2; +static unsigned int smcr_max_wr_max = 2048; static struct ctl_table smc_table[] = { { @@ -99,6 +101,24 @@ static struct ctl_table smc_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, + { + .procname = "smcr_max_send_wr", + .data = &init_net.smc.sysctl_smcr_max_send_wr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &smcr_max_wr_min, + .extra2 = &smcr_max_wr_max, + }, + { + .procname = "smcr_max_recv_wr", + .data = &init_net.smc.sysctl_smcr_max_recv_wr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &smcr_max_wr_min, + .extra2 = &smcr_max_wr_max, + }, }; int __net_init smc_sysctl_net_init(struct net *net) @@ -130,6 +150,8 @@ int __net_init smc_sysctl_net_init(struct net *net) WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init); net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER; net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER; + net->smc.sysctl_smcr_max_send_wr = SMCR_MAX_SEND_WR_DEF; + net->smc.sysctl_smcr_max_recv_wr = SMCR_MAX_RECV_WR_DEF; /* disable handshake limitation by default */ net->smc.limit_smc_hs = 0; diff --git a/net/smc/smc_sysctl.h b/net/smc/smc_sysctl.h index eb2465ae1e15e..8538915af7afe 100644 --- a/net/smc/smc_sysctl.h +++ b/net/smc/smc_sysctl.h @@ -25,6 +25,8 @@ static inline int smc_sysctl_net_init(struct net *net) net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE; net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER; net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER; + net->smc.sysctl_smcr_max_send_wr = SMCR_MAX_SEND_WR_DEF; + net->smc.sysctl_smcr_max_recv_wr = SMCR_MAX_RECV_WR_DEF; return 0; } diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index b04a21b8c5111..883fb0f1ce433 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -547,9 +547,9 @@ void smc_wr_remember_qp_attr(struct smc_link *lnk) IB_QP_DEST_QPN, &init_attr); - lnk->wr_tx_cnt = min_t(size_t, SMC_WR_BUF_CNT, + lnk->wr_tx_cnt = min_t(size_t, lnk->lgr->max_send_wr, lnk->qp_attr.cap.max_send_wr); - lnk->wr_rx_cnt = min_t(size_t, SMC_WR_BUF_CNT * 3, + lnk->wr_rx_cnt = min_t(size_t, lnk->lgr->max_recv_wr, lnk->qp_attr.cap.max_recv_wr); } @@ -741,50 +741,51 @@ int smc_wr_alloc_lgr_mem(struct smc_link_group *lgr) int smc_wr_alloc_link_mem(struct smc_link *link) { /* allocate link related memory */ - link->wr_tx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL); + link->wr_tx_bufs = kcalloc(link->lgr->max_send_wr, + SMC_WR_BUF_SIZE, GFP_KERNEL); if (!link->wr_tx_bufs) goto no_mem; - link->wr_rx_bufs = kcalloc(SMC_WR_BUF_CNT * 3, link->wr_rx_buflen, + link->wr_rx_bufs = kcalloc(link->lgr->max_recv_wr, link->wr_rx_buflen, GFP_KERNEL); if (!link->wr_rx_bufs) goto no_mem_wr_tx_bufs; - link->wr_tx_ibs = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_ibs[0]), - GFP_KERNEL); + link->wr_tx_ibs = kcalloc(link->lgr->max_send_wr, + sizeof(link->wr_tx_ibs[0]), GFP_KERNEL); if (!link->wr_tx_ibs) goto no_mem_wr_rx_bufs; - link->wr_rx_ibs = kcalloc(SMC_WR_BUF_CNT * 3, + link->wr_rx_ibs = kcalloc(link->lgr->max_recv_wr, sizeof(link->wr_rx_ibs[0]), GFP_KERNEL); if (!link->wr_rx_ibs) goto no_mem_wr_tx_ibs; - link->wr_tx_rdmas = kcalloc(SMC_WR_BUF_CNT, + link->wr_tx_rdmas = kcalloc(link->lgr->max_send_wr, sizeof(link->wr_tx_rdmas[0]), GFP_KERNEL); if (!link->wr_tx_rdmas) goto no_mem_wr_rx_ibs; - link->wr_tx_rdma_sges = kcalloc(SMC_WR_BUF_CNT, + link->wr_tx_rdma_sges = kcalloc(link->lgr->max_send_wr, sizeof(link->wr_tx_rdma_sges[0]), GFP_KERNEL); if (!link->wr_tx_rdma_sges) goto no_mem_wr_tx_rdmas; - link->wr_tx_sges = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_sges[0]), + link->wr_tx_sges = kcalloc(link->lgr->max_send_wr, sizeof(link->wr_tx_sges[0]), GFP_KERNEL); if (!link->wr_tx_sges) goto no_mem_wr_tx_rdma_sges; - link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3, + link->wr_rx_sges = kcalloc(link->lgr->max_recv_wr, sizeof(link->wr_rx_sges[0]) * link->wr_rx_sge_cnt, GFP_KERNEL); if (!link->wr_rx_sges) goto no_mem_wr_tx_sges; - link->wr_tx_mask = bitmap_zalloc(SMC_WR_BUF_CNT, GFP_KERNEL); + link->wr_tx_mask = bitmap_zalloc(link->lgr->max_send_wr, GFP_KERNEL); if (!link->wr_tx_mask) goto no_mem_wr_rx_sges; - link->wr_tx_pends = kcalloc(SMC_WR_BUF_CNT, + link->wr_tx_pends = kcalloc(link->lgr->max_send_wr, sizeof(link->wr_tx_pends[0]), GFP_KERNEL); if (!link->wr_tx_pends) goto no_mem_wr_tx_mask; - link->wr_tx_compl = kcalloc(SMC_WR_BUF_CNT, + link->wr_tx_compl = kcalloc(link->lgr->max_send_wr, sizeof(link->wr_tx_compl[0]), GFP_KERNEL); if (!link->wr_tx_compl) @@ -905,7 +906,7 @@ int smc_wr_create_link(struct smc_link *lnk) goto dma_unmap; } smc_wr_init_sge(lnk); - bitmap_zero(lnk->wr_tx_mask, SMC_WR_BUF_CNT); + bitmap_zero(lnk->wr_tx_mask, lnk->lgr->max_send_wr); init_waitqueue_head(&lnk->wr_tx_wait); rc = percpu_ref_init(&lnk->wr_tx_refs, smcr_wr_tx_refs_free, 0, GFP_KERNEL); if (rc) diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index f3008dda222a6..aa4533af9122d 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -19,8 +19,6 @@ #include "smc.h" #include "smc_core.h" -#define SMC_WR_BUF_CNT 16 /* # of ctrl buffers per link */ - #define SMC_WR_TX_WAIT_FREE_SLOT_TIME (10 * HZ) #define SMC_WR_TX_SIZE 44 /* actual size of wr_send data (<=SMC_WR_BUF_SIZE) */ From 8f736087e52f1cd4234b871c137f5a3eb0d2741a Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Mon, 27 Oct 2025 23:48:56 +0100 Subject: [PATCH 411/867] net/smc: handle -ENOMEM from smc_wr_alloc_link_mem gracefully Currently if a -ENOMEM from smc_wr_alloc_link_mem() is handled by giving up and going the way of a TCP fallback. This was reasonable before the sizes of the allocations there were compile time constants and reasonably small. But now those are actually configurable. So instead of giving up, keep retrying with half of the requested size unless we dip below the old static sizes -- then give up! In terms of numbers that means we give up when it is certain that we at best would end up allocating less than 16 send WR buffers or less than 48 recv WR buffers. This is to avoid regressions due to having fewer buffers compared the static values of the past. Please note that SMC-R is supposed to be an optimisation over TCP, and falling back to TCP is superior to establishing an SMC connection that is going to perform worse. If the memory allocation fails (and we propagate -ENOMEM), we fall back to TCP. Preserve (modulo truncation) the ratio of send/recv WR buffer counts. Signed-off-by: Halil Pasic Reviewed-by: Wenjia Zhang Reviewed-by: Mahanta Jambigi Reviewed-by: Sidraya Jayagond Reviewed-by: Dust Li Tested-by: Mahanta Jambigi Link: https://patch.msgid.link/20251027224856.2970019-3-pasic@linux.ibm.com Signed-off-by: Paolo Abeni --- Documentation/networking/smc-sysctl.rst | 8 ++++-- net/smc/smc_core.c | 34 +++++++++++++++++-------- net/smc/smc_core.h | 2 ++ net/smc/smc_wr.c | 28 ++++++++++---------- 4 files changed, 46 insertions(+), 26 deletions(-) diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst index 337ac2be167e1..904a910f198e4 100644 --- a/Documentation/networking/smc-sysctl.rst +++ b/Documentation/networking/smc-sysctl.rst @@ -85,7 +85,9 @@ smcr_max_send_wr - INTEGER Please be aware that all the buffers need to be allocated as a physically continuous array in which each element is a single buffer and has the size - of SMC_WR_BUF_SIZE (48) bytes. If the allocation fails we give up much + of SMC_WR_BUF_SIZE (48) bytes. If the allocation fails, we keep retrying + with half of the buffer count until it is ether successful or (unlikely) + we dip below the old hard coded value which is 16 where we give up much like before having this control. Default: 16 @@ -103,7 +105,9 @@ smcr_max_recv_wr - INTEGER Please be aware that all the buffers need to be allocated as a physically continuous array in which each element is a single buffer and has the size - of SMC_WR_BUF_SIZE (48) bytes. If the allocation fails we give up much + of SMC_WR_BUF_SIZE (48) bytes. If the allocation fails, we keep retrying + with half of the buffer count until it is ether successful or (unlikely) + we dip below the old hard coded value which is 16 where we give up much like before having this control. Default: 48 diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index be0c2da83d2bf..e4eabc83719e1 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -810,6 +810,8 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, lnk->clearing = 0; lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu; lnk->link_id = smcr_next_link_id(lgr); + lnk->max_send_wr = lgr->max_send_wr; + lnk->max_recv_wr = lgr->max_recv_wr; lnk->lgr = lgr; smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */ lnk->link_idx = link_idx; @@ -836,27 +838,39 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, rc = smc_llc_link_init(lnk); if (rc) goto out; - rc = smc_wr_alloc_link_mem(lnk); - if (rc) - goto clear_llc_lnk; rc = smc_ib_create_protection_domain(lnk); if (rc) - goto free_link_mem; - rc = smc_ib_create_queue_pair(lnk); - if (rc) - goto dealloc_pd; + goto clear_llc_lnk; + do { + rc = smc_ib_create_queue_pair(lnk); + if (rc) + goto dealloc_pd; + rc = smc_wr_alloc_link_mem(lnk); + if (!rc) + break; + else if (rc != -ENOMEM) /* give up */ + goto destroy_qp; + /* retry with smaller ... */ + lnk->max_send_wr /= 2; + lnk->max_recv_wr /= 2; + /* ... unless droping below old SMC_WR_BUF_SIZE */ + if (lnk->max_send_wr < 16 || lnk->max_recv_wr < 48) + goto destroy_qp; + smc_ib_destroy_queue_pair(lnk); + } while (1); + rc = smc_wr_create_link(lnk); if (rc) - goto destroy_qp; + goto free_link_mem; lnk->state = SMC_LNK_ACTIVATING; return 0; +free_link_mem: + smc_wr_free_link_mem(lnk); destroy_qp: smc_ib_destroy_queue_pair(lnk); dealloc_pd: smc_ib_dealloc_protection_domain(lnk); -free_link_mem: - smc_wr_free_link_mem(lnk); clear_llc_lnk: smc_llc_link_clear(lnk, false); out: diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 8d06c8bb14e9d..5c18f08a4c8a5 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -175,6 +175,8 @@ struct smc_link { struct completion llc_testlink_resp; /* wait for rx of testlink */ int llc_testlink_time; /* testlink interval */ atomic_t conn_cnt; /* connections on this link */ + u16 max_send_wr; + u16 max_recv_wr; }; /* For now we just allow one parallel link per link group. The SMC protocol diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 883fb0f1ce433..5feafa98ab1a1 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -547,9 +547,9 @@ void smc_wr_remember_qp_attr(struct smc_link *lnk) IB_QP_DEST_QPN, &init_attr); - lnk->wr_tx_cnt = min_t(size_t, lnk->lgr->max_send_wr, + lnk->wr_tx_cnt = min_t(size_t, lnk->max_send_wr, lnk->qp_attr.cap.max_send_wr); - lnk->wr_rx_cnt = min_t(size_t, lnk->lgr->max_recv_wr, + lnk->wr_rx_cnt = min_t(size_t, lnk->max_recv_wr, lnk->qp_attr.cap.max_recv_wr); } @@ -741,51 +741,51 @@ int smc_wr_alloc_lgr_mem(struct smc_link_group *lgr) int smc_wr_alloc_link_mem(struct smc_link *link) { /* allocate link related memory */ - link->wr_tx_bufs = kcalloc(link->lgr->max_send_wr, + link->wr_tx_bufs = kcalloc(link->max_send_wr, SMC_WR_BUF_SIZE, GFP_KERNEL); if (!link->wr_tx_bufs) goto no_mem; - link->wr_rx_bufs = kcalloc(link->lgr->max_recv_wr, link->wr_rx_buflen, + link->wr_rx_bufs = kcalloc(link->max_recv_wr, link->wr_rx_buflen, GFP_KERNEL); if (!link->wr_rx_bufs) goto no_mem_wr_tx_bufs; - link->wr_tx_ibs = kcalloc(link->lgr->max_send_wr, + link->wr_tx_ibs = kcalloc(link->max_send_wr, sizeof(link->wr_tx_ibs[0]), GFP_KERNEL); if (!link->wr_tx_ibs) goto no_mem_wr_rx_bufs; - link->wr_rx_ibs = kcalloc(link->lgr->max_recv_wr, + link->wr_rx_ibs = kcalloc(link->max_recv_wr, sizeof(link->wr_rx_ibs[0]), GFP_KERNEL); if (!link->wr_rx_ibs) goto no_mem_wr_tx_ibs; - link->wr_tx_rdmas = kcalloc(link->lgr->max_send_wr, + link->wr_tx_rdmas = kcalloc(link->max_send_wr, sizeof(link->wr_tx_rdmas[0]), GFP_KERNEL); if (!link->wr_tx_rdmas) goto no_mem_wr_rx_ibs; - link->wr_tx_rdma_sges = kcalloc(link->lgr->max_send_wr, + link->wr_tx_rdma_sges = kcalloc(link->max_send_wr, sizeof(link->wr_tx_rdma_sges[0]), GFP_KERNEL); if (!link->wr_tx_rdma_sges) goto no_mem_wr_tx_rdmas; - link->wr_tx_sges = kcalloc(link->lgr->max_send_wr, sizeof(link->wr_tx_sges[0]), + link->wr_tx_sges = kcalloc(link->max_send_wr, sizeof(link->wr_tx_sges[0]), GFP_KERNEL); if (!link->wr_tx_sges) goto no_mem_wr_tx_rdma_sges; - link->wr_rx_sges = kcalloc(link->lgr->max_recv_wr, + link->wr_rx_sges = kcalloc(link->max_recv_wr, sizeof(link->wr_rx_sges[0]) * link->wr_rx_sge_cnt, GFP_KERNEL); if (!link->wr_rx_sges) goto no_mem_wr_tx_sges; - link->wr_tx_mask = bitmap_zalloc(link->lgr->max_send_wr, GFP_KERNEL); + link->wr_tx_mask = bitmap_zalloc(link->max_send_wr, GFP_KERNEL); if (!link->wr_tx_mask) goto no_mem_wr_rx_sges; - link->wr_tx_pends = kcalloc(link->lgr->max_send_wr, + link->wr_tx_pends = kcalloc(link->max_send_wr, sizeof(link->wr_tx_pends[0]), GFP_KERNEL); if (!link->wr_tx_pends) goto no_mem_wr_tx_mask; - link->wr_tx_compl = kcalloc(link->lgr->max_send_wr, + link->wr_tx_compl = kcalloc(link->max_send_wr, sizeof(link->wr_tx_compl[0]), GFP_KERNEL); if (!link->wr_tx_compl) @@ -906,7 +906,7 @@ int smc_wr_create_link(struct smc_link *lnk) goto dma_unmap; } smc_wr_init_sge(lnk); - bitmap_zero(lnk->wr_tx_mask, lnk->lgr->max_send_wr); + bitmap_zero(lnk->wr_tx_mask, lnk->max_send_wr); init_waitqueue_head(&lnk->wr_tx_wait); rc = percpu_ref_init(&lnk->wr_tx_refs, smcr_wr_tx_refs_free, 0, GFP_KERNEL); if (rc) From 2618849f31e7cf51fadd4a5242458501a6d5b315 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 23 Oct 2025 19:44:04 +1030 Subject: [PATCH 412/867] btrfs: ensure no dirty metadata is written back for an fs with errors [BUG] During development of a minor feature (make sure all btrfs_bio::end_io() is called in task context), I noticed a crash in generic/388, where metadata writes triggered new works after btrfs_stop_all_workers(). It turns out that it can even happen without any code modification, just using RAID5 for metadata and the same workload from generic/388 is going to trigger the use-after-free. [CAUSE] If btrfs hits an error, the fs is marked as error, no new transaction is allowed thus metadata is in a frozen state. But there are some metadata modifications before that error, and they are still in the btree inode page cache. Since there will be no real transaction commit, all those dirty folios are just kept as is in the page cache, and they can not be invalidated by invalidate_inode_pages2() call inside close_ctree(), because they are dirty. And finally after btrfs_stop_all_workers(), we call iput() on btree inode, which triggers writeback of those dirty metadata. And if the fs is using RAID56 metadata, this will trigger RMW and queue new works into rmw_workers, which is already stopped, causing warning from queue_work() and use-after-free. [FIX] Add a special handling for write_one_eb(), that if the fs is already in an error state, immediately mark the bbio as failure, instead of really submitting them. Then during close_ctree(), iput() will just discard all those dirty tree blocks without really writing them back, thus no more new jobs for already stopped-and-freed workqueues. The extra discard in write_one_eb() also acts as an extra safenet. E.g. the transaction abort is triggered by some extent/free space tree corruptions, and since extent/free space tree is already corrupted some tree blocks may be allocated where they shouldn't be (overwriting existing tree blocks). In that case writing them back will further corrupting the fs. CC: stable@vger.kernel.org # 6.6+ Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 755ec6dfd51cb..23273d0e6f224 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2228,6 +2228,14 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb, wbc_account_cgroup_owner(wbc, folio, range_len); folio_unlock(folio); } + /* + * If the fs is already in error status, do not submit any writeback + * but immediately finish it. + */ + if (unlikely(BTRFS_FS_ERROR(fs_info))) { + btrfs_bio_end_io(bbio, errno_to_blk_status(BTRFS_FS_ERROR(fs_info))); + return; + } btrfs_submit_bbio(bbio, 0); } From f260c6aff0b8af236084012d14f9f1bf792ea883 Mon Sep 17 00:00:00 2001 From: Shardul Bankar Date: Sun, 26 Oct 2025 01:30:21 +0530 Subject: [PATCH 413/867] btrfs: fix memory leak of qgroup_list in btrfs_add_qgroup_relation When btrfs_add_qgroup_relation() is called with invalid qgroup levels (src >= dst), the function returns -EINVAL directly without freeing the preallocated qgroup_list structure passed by the caller. This causes a memory leak because the caller unconditionally sets the pointer to NULL after the call, preventing any cleanup. The issue occurs because the level validation check happens before the mutex is acquired and before any error handling path that would free the prealloc pointer. On this early return, the cleanup code at the 'out' label (which includes kfree(prealloc)) is never reached. In btrfs_ioctl_qgroup_assign(), the code pattern is: prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL); ret = btrfs_add_qgroup_relation(trans, sa->src, sa->dst, prealloc); prealloc = NULL; // Always set to NULL regardless of return value ... kfree(prealloc); // This becomes kfree(NULL), does nothing When the level check fails, 'prealloc' is never freed by either the callee or the caller, resulting in a 64-byte memory leak per failed operation. This can be triggered repeatedly by an unprivileged user with access to a writable btrfs mount, potentially exhausting kernel memory. Fix this by freeing prealloc before the early return, ensuring prealloc is always freed on all error paths. Fixes: 4addc1ffd67a ("btrfs: qgroup: preallocate memory before adding a relation") Reviewed-by: Qu Wenruo Signed-off-by: Shardul Bankar Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 1175b8192cd7d..31ad8580322a6 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1539,8 +1539,10 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, u64 dst ASSERT(prealloc); /* Check the level of src and dst first */ - if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) + if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) { + kfree(prealloc); return -EINVAL; + } mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_root) { From 953902e4fb4c373c81a977f78e40f9f93a79e20f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 24 Oct 2025 12:30:56 +0100 Subject: [PATCH 414/867] btrfs: set inode flag BTRFS_INODE_COPY_EVERYTHING when logging new name If we are logging a new name make sure our inode has the runtime flag BTRFS_INODE_COPY_EVERYTHING set so that at btrfs_log_inode() we will find new inode refs/extrefs in the subvolume tree and copy them into the log tree. We are currently doing it when adding a new link but we are missing it when renaming. An example where this makes a new name not persisted: 1) create symlink with name foo in directory A 2) fsync directory A, which persists the symlink 3) rename the symlink from foo to bar 4) fsync directory A to persist the new symlink name Step 4 isn't working correctly as it's not logging the new name and also leaving the old inode ref in the log tree, so after a power failure the symlink still has the old name of "foo". This is because when we first fsync directoy A we log the symlink's inode (as it's a new entry) and at btrfs_log_inode() we set the log mode to LOG_INODE_ALL and then because we are using that mode and the inode has the runtime flag BTRFS_INODE_NEEDS_FULL_SYNC set, we clear that flag as well as the flag BTRFS_INODE_COPY_EVERYTHING. That means the next time we log the inode, during the rename through the call to btrfs_log_new_name() (calling btrfs_log_inode_parent() and then btrfs_log_inode()), we will not search the subvolume tree for new refs/extrefs and jump directory to the 'log_extents' label. Fix this by making sure we set BTRFS_INODE_COPY_EVERYTHING on an inode when we are about to log a new name. A test case for fstests will follow soon. Reported-by: Vyacheslav Kovalevsky Link: https://lore.kernel.org/linux-btrfs/ac949c74-90c2-4b9a-b7fd-1ffc5c3175c7@gmail.com/ Reviewed-by: Boris Burkov Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 1 - fs/btrfs/tree-log.c | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 02cb081697fea..b95175116ea3e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6873,7 +6873,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, BTRFS_I(inode)->dir_index = 0ULL; inode_inc_iversion(inode); inode_set_ctime_current(inode); - set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), &fname.disk_name, 1, index); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 6aad6b65522b2..00a59fb791674 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -7910,6 +7910,9 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, bool log_pinned = false; int ret; + /* The inode has a new name (ref/extref), so make sure we log it. */ + set_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags); + btrfs_init_log_ctx(&ctx, inode); ctx.logging_new_name = true; From 3b1a4a59a2086badab391687a6a0b86e03048393 Mon Sep 17 00:00:00 2001 From: austinchang Date: Wed, 29 Oct 2025 09:35:27 +0000 Subject: [PATCH 415/867] btrfs: mark dirty extent range for out of bound prealloc extents In btrfs_fallocate(), when the allocated range overlaps with a prealloc extent and the extent starts after i_size, the range doesn't get marked dirty in file_extent_tree. This results in persisting an incorrect disk_i_size for the inode when not using the no-holes feature. This is reproducible since commit 41a2ee75aab0 ("btrfs: introduce per-inode file extent tree"), then became hidden since commit 3d7db6e8bd22 ("btrfs: don't allocate file extent tree for non regular files") and then visible again after commit 8679d2687c35 ("btrfs: initialize inode::file_extent_tree after i_mode has been set"), which fixes the previous commit. The following reproducer triggers the problem: $ cat test.sh MNT=/mnt/test DEV=/dev/vdb mkdir -p $MNT mkfs.btrfs -f -O ^no-holes $DEV mount $DEV $MNT touch $MNT/file1 fallocate -n -o 1M -l 2M $MNT/file1 umount $MNT mount $DEV $MNT len=$((1 * 1024 * 1024)) fallocate -o 1M -l $len $MNT/file1 du --bytes $MNT/file1 umount $MNT mount $DEV $MNT du --bytes $MNT/file1 umount $MNT Running the reproducer gives the following result: $ ./test.sh (...) 2097152 /mnt/test/file1 1048576 /mnt/test/file1 The difference is exactly 1048576 as we assigned. Fix by adding a call to btrfs_inode_set_file_extent_range() in btrfs_fallocate_update_isize(). Fixes: 41a2ee75aab0 ("btrfs: introduce per-inode file extent tree") Signed-off-by: austinchang Reviewed-by: Filipe Manana Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/file.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 7efd1f8a19121..fa82def46e395 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2854,12 +2854,22 @@ static int btrfs_fallocate_update_isize(struct inode *inode, { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; + u64 range_start; + u64 range_end; int ret; int ret2; if (mode & FALLOC_FL_KEEP_SIZE || end <= i_size_read(inode)) return 0; + range_start = round_down(i_size_read(inode), root->fs_info->sectorsize); + range_end = round_up(end, root->fs_info->sectorsize); + + ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), range_start, + range_end - range_start); + if (ret) + return ret; + trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) return PTR_ERR(trans); From be5febd51c478bc8e24ad3480435f2754a403b14 Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Tue, 28 Oct 2025 22:34:55 +0530 Subject: [PATCH 416/867] wifi: ath12k: fix potential memory leak in ath12k_wow_arp_ns_offload() When the call to ath12k_wmi_arp_ns_offload() fails, the temporary memory allocation for offload is not freed before returning. Fix that by freeing offload in the error path. Fixes: 1666108c74c4 ("wifi: ath12k: support ARP and NS offload") Signed-off-by: Abdun Nihaal Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20251028170457.134608-1-nihaal@cse.iitm.ac.in Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/wow.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath12k/wow.c b/drivers/net/wireless/ath/ath12k/wow.c index dce9bd0bcaefb..e8481626f1940 100644 --- a/drivers/net/wireless/ath/ath12k/wow.c +++ b/drivers/net/wireless/ath/ath12k/wow.c @@ -758,6 +758,7 @@ static int ath12k_wow_arp_ns_offload(struct ath12k *ar, bool enable) if (ret) { ath12k_warn(ar->ab, "failed to set arp ns offload vdev %i: enable %d, ret %d\n", arvif->vdev_id, enable, ret); + kfree(offload); return ret; } } From 00575bb44b2c2aa53d0a768de2b80c9c1af0174d Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Wed, 29 Oct 2025 10:07:14 +0800 Subject: [PATCH 417/867] wifi: ath12k: fix reusing m3 memory During firmware recovery or suspend/resume, m3 memory could be reused if the size of the new m3 binary is equal to or less than that of the existing memory. There will be issues for the latter case, since m3_mem->size will be updated with a smaller value and this value is eventually used in the free path, where the original total size should be used instead. To fix it, add a new member in m3_mem_region structure to track the original memory size and use it in free path. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00302-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.115823.3 Fixes: 05090ae82f44 ("wifi: ath12k: check M3 buffer size as well whey trying to reuse it") Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20251029-ath12k-fix-m3-reuse-v1-1-69225bacfc5d@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/qmi.c | 11 +++++++---- drivers/net/wireless/ath/ath12k/qmi.h | 5 ++++- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/qmi.c b/drivers/net/wireless/ath/ath12k/qmi.c index 36325e62aa242..8de9aee2498ec 100644 --- a/drivers/net/wireless/ath/ath12k/qmi.c +++ b/drivers/net/wireless/ath/ath12k/qmi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include @@ -3114,9 +3114,10 @@ static void ath12k_qmi_m3_free(struct ath12k_base *ab) if (!m3_mem->vaddr) return; - dma_free_coherent(ab->dev, m3_mem->size, + dma_free_coherent(ab->dev, m3_mem->total_size, m3_mem->vaddr, m3_mem->paddr); m3_mem->vaddr = NULL; + m3_mem->total_size = 0; m3_mem->size = 0; } @@ -3152,7 +3153,7 @@ static int ath12k_qmi_m3_load(struct ath12k_base *ab) /* In recovery/resume cases, M3 buffer is not freed, try to reuse that */ if (m3_mem->vaddr) { - if (m3_mem->size >= m3_len) + if (m3_mem->total_size >= m3_len) goto skip_m3_alloc; /* Old buffer is too small, free and reallocate */ @@ -3164,11 +3165,13 @@ static int ath12k_qmi_m3_load(struct ath12k_base *ab) GFP_KERNEL); if (!m3_mem->vaddr) { ath12k_err(ab, "failed to allocate memory for M3 with size %zu\n", - fw->size); + m3_len); ret = -ENOMEM; goto out; } + m3_mem->total_size = m3_len; + skip_m3_alloc: memcpy(m3_mem->vaddr, m3_data, m3_len); m3_mem->size = m3_len; diff --git a/drivers/net/wireless/ath/ath12k/qmi.h b/drivers/net/wireless/ath/ath12k/qmi.h index 4767d9a2e309e..7a88268aa1e9e 100644 --- a/drivers/net/wireless/ath/ath12k/qmi.h +++ b/drivers/net/wireless/ath/ath12k/qmi.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #ifndef ATH12K_QMI_H @@ -120,6 +120,9 @@ struct target_info { }; struct m3_mem_region { + /* total memory allocated */ + u32 total_size; + /* actual memory being used */ u32 size; dma_addr_t paddr; void *vaddr; From 088a099690e4c0d291db505013317ab5dd58b4d5 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 30 Oct 2025 10:08:43 +0800 Subject: [PATCH 418/867] wifi: ath12k: fix error handling in creating hardware group In ath12k_core_init() when ath12k_core_hw_group_create() fails, ath12k_core_hw_group_destroy() is called where for each device below path would get executed ath12k_core_soc_destroy() ath12k_qmi_deinit_service() qmi_handle_release() This results in kernel crash in case one of the device fails at qmi_handle_init() when creating hardware group: ath12k_pci 0000:10:00.0: failed to initialize qmi handle ath12k_pci 0000:10:00.0: failed to initialize qmi :-517 ath12k_pci 0000:10:00.0: failed to create soc core: -517 ath12k_pci 0000:10:00.0: unable to create hw group BUG: unable to handle page fault for address: ffffffffffffffb7 RIP: 0010:qmi_handle_release Call Trace: ath12k_qmi_deinit_service ath12k_core_hw_group_destroy ath12k_core_init ath12k_pci_probe The detailed reason is, when qmi_handle_init() fails for a device ab->qmi.handle is not correctly initialized. Then ath12k_core_hw_group_create() returns failure, since error handing is done for all device, eventually qmi_handle_release() is called for the issue device and finally kernel crashes due to the uninitialized ab->qmi.handle. Fix this by moving error handling to ath12k_core_hw_group_create(), this way the issue device can be skipped. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00284.1-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Fixes: 6f245ea0ec6c ("wifi: ath12k: introduce device group abstraction") Link: https://lore.kernel.org/ath12k/fabc97122016d1a66a53ddedd965d134@posteo.net Reported-by: a-development Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220518 Tested-by: a-development Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20251030-fix-hw-group-create-err-handling-v1-1-0659e4d15fb9@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 5d494c5cdc0da..a2137b363c2fe 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -2106,14 +2106,27 @@ static int ath12k_core_hw_group_create(struct ath12k_hw_group *ag) ret = ath12k_core_soc_create(ab); if (ret) { mutex_unlock(&ab->core_lock); - ath12k_err(ab, "failed to create soc core: %d\n", ret); - return ret; + ath12k_err(ab, "failed to create soc %d core: %d\n", i, ret); + goto destroy; } mutex_unlock(&ab->core_lock); } return 0; + +destroy: + for (i--; i >= 0; i--) { + ab = ag->ab[i]; + if (!ab) + continue; + + mutex_lock(&ab->core_lock); + ath12k_core_soc_destroy(ab); + mutex_unlock(&ab->core_lock); + } + + return ret; } void ath12k_core_hw_group_set_mlo_capable(struct ath12k_hw_group *ag) @@ -2188,7 +2201,7 @@ int ath12k_core_init(struct ath12k_base *ab) if (ret) { mutex_unlock(&ag->mutex); ath12k_warn(ab, "unable to create hw group\n"); - goto err_destroy_hw_group; + goto err_unassign_hw_group; } } @@ -2196,8 +2209,7 @@ int ath12k_core_init(struct ath12k_base *ab) return 0; -err_destroy_hw_group: - ath12k_core_hw_group_destroy(ab->ag); +err_unassign_hw_group: ath12k_core_hw_group_unassign(ab); err_unregister_notifier: ath12k_core_panic_notifier_unregister(ab); From 770bff79424beec2edb8e7cc63b0e8d1b1a927a3 Mon Sep 17 00:00:00 2001 From: Muna Sinada Date: Thu, 23 Oct 2025 17:19:23 -0700 Subject: [PATCH 419/867] wifi: ath12k: generalize GI and LTF fixed rate functions Currently, functions in mac.c for setting GI and LTF rates are specifically for HE rates. Remove any mention of "HE" in such functions in order to allow for other modes to utilize the functions. The intention is to prepare for the addition of EHT GI and LTF fixed rate settings. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Signed-off-by: Muna Sinada Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20251024001928.257356-2-muna.sinada@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 77 ++++++++++++++------------- drivers/net/wireless/ath/ath12k/mac.h | 14 ++++- drivers/net/wireless/ath/ath12k/wmi.h | 12 ++--- 3 files changed, 60 insertions(+), 43 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index e79d457e3c03d..c17500a3e95f9 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -12094,55 +12094,57 @@ ath12k_mac_get_single_legacy_rate(struct ath12k *ar, } static int -ath12k_mac_set_fixed_rate_gi_ltf(struct ath12k_link_vif *arvif, u8 he_gi, u8 he_ltf) +ath12k_mac_set_fixed_rate_gi_ltf(struct ath12k_link_vif *arvif, u8 gi, u8 ltf) { struct ath12k *ar = arvif->ar; - int ret; + int param, ret; lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); /* 0.8 = 0, 1.6 = 2 and 3.2 = 3. */ - if (he_gi && he_gi != 0xFF) - he_gi += 1; + if (gi && gi != 0xFF) + gi += 1; ret = ath12k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, - WMI_VDEV_PARAM_SGI, he_gi); + WMI_VDEV_PARAM_SGI, gi); if (ret) { - ath12k_warn(ar->ab, "failed to set HE GI:%d, error:%d\n", - he_gi, ret); + ath12k_warn(ar->ab, "failed to set GI:%d, error:%d\n", + gi, ret); return ret; } /* start from 1 */ - if (he_ltf != 0xFF) - he_ltf += 1; + if (ltf != 0xFF) + ltf += 1; + + param = WMI_VDEV_PARAM_HE_LTF; ret = ath12k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, - WMI_VDEV_PARAM_HE_LTF, he_ltf); + param, ltf); if (ret) { - ath12k_warn(ar->ab, "failed to set HE LTF:%d, error:%d\n", - he_ltf, ret); + ath12k_warn(ar->ab, "failed to set LTF:%d, error:%d\n", + ltf, ret); return ret; } return 0; } static int -ath12k_mac_set_auto_rate_gi_ltf(struct ath12k_link_vif *arvif, u16 he_gi, u8 he_ltf) +ath12k_mac_set_auto_rate_gi_ltf(struct ath12k_link_vif *arvif, u16 gi, u8 ltf) { struct ath12k *ar = arvif->ar; int ret; - u32 he_ar_gi_ltf; + u32 ar_gi_ltf; - if (he_gi != 0xFF) { - switch (he_gi) { - case NL80211_RATE_INFO_HE_GI_0_8: - he_gi = WMI_AUTORATE_800NS_GI; + if (gi != 0xFF) { + switch (gi) { + case ATH12K_RATE_INFO_GI_0_8: + gi = WMI_AUTORATE_800NS_GI; break; - case NL80211_RATE_INFO_HE_GI_1_6: - he_gi = WMI_AUTORATE_1600NS_GI; + case ATH12K_RATE_INFO_GI_1_6: + gi = WMI_AUTORATE_1600NS_GI; break; - case NL80211_RATE_INFO_HE_GI_3_2: - he_gi = WMI_AUTORATE_3200NS_GI; + case ATH12K_RATE_INFO_GI_3_2: + gi = WMI_AUTORATE_3200NS_GI; break; default: ath12k_warn(ar->ab, "Invalid GI\n"); @@ -12150,16 +12152,16 @@ ath12k_mac_set_auto_rate_gi_ltf(struct ath12k_link_vif *arvif, u16 he_gi, u8 he_ } } - if (he_ltf != 0xFF) { - switch (he_ltf) { - case NL80211_RATE_INFO_HE_1XLTF: - he_ltf = WMI_HE_AUTORATE_LTF_1X; + if (ltf != 0xFF) { + switch (ltf) { + case ATH12K_RATE_INFO_1XLTF: + ltf = WMI_AUTORATE_LTF_1X; break; - case NL80211_RATE_INFO_HE_2XLTF: - he_ltf = WMI_HE_AUTORATE_LTF_2X; + case ATH12K_RATE_INFO_2XLTF: + ltf = WMI_AUTORATE_LTF_2X; break; - case NL80211_RATE_INFO_HE_4XLTF: - he_ltf = WMI_HE_AUTORATE_LTF_4X; + case ATH12K_RATE_INFO_4XLTF: + ltf = WMI_AUTORATE_LTF_4X; break; default: ath12k_warn(ar->ab, "Invalid LTF\n"); @@ -12167,15 +12169,15 @@ ath12k_mac_set_auto_rate_gi_ltf(struct ath12k_link_vif *arvif, u16 he_gi, u8 he_ } } - he_ar_gi_ltf = he_gi | he_ltf; + ar_gi_ltf = gi | ltf; ret = ath12k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, WMI_VDEV_PARAM_AUTORATE_MISC_CFG, - he_ar_gi_ltf); + ar_gi_ltf); if (ret) { ath12k_warn(ar->ab, - "failed to set HE autorate GI:%u, LTF:%u params, error:%d\n", - he_gi, he_ltf, ret); + "failed to set autorate GI:%u, LTF:%u params, error:%d\n", + gi, ltf, ret); return ret; } @@ -12200,10 +12202,10 @@ static int ath12k_mac_set_rate_params(struct ath12k_link_vif *arvif, { struct ieee80211_bss_conf *link_conf; struct ath12k *ar = arvif->ar; + bool he_support, gi_ltf_set = false; u32 vdev_param; u32 param_value; int ret; - bool he_support; lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); @@ -12257,7 +12259,10 @@ static int ath12k_mac_set_rate_params(struct ath12k_link_vif *arvif, ret = ath12k_mac_set_auto_rate_gi_ltf(arvif, he_gi, he_ltf); if (ret) return ret; - } else { + gi_ltf_set = true; + } + + if (!gi_ltf_set) { vdev_param = WMI_VDEV_PARAM_SGI; param_value = ath12k_mac_nlgi_to_wmigi(sgi); ret = ath12k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, diff --git a/drivers/net/wireless/ath/ath12k/mac.h b/drivers/net/wireless/ath/ath12k/mac.h index c05af40bd7a20..1f689e367c8a1 100644 --- a/drivers/net/wireless/ath/ath12k/mac.h +++ b/drivers/net/wireless/ath/ath12k/mac.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #ifndef ATH12K_MAC_H @@ -84,6 +84,18 @@ enum ath12k_supported_bw { ATH12K_BW_320 = 4, }; +enum ath12k_gi { + ATH12K_RATE_INFO_GI_0_8, + ATH12K_RATE_INFO_GI_1_6, + ATH12K_RATE_INFO_GI_3_2, +}; + +enum ath12k_ltf { + ATH12K_RATE_INFO_1XLTF, + ATH12K_RATE_INFO_2XLTF, + ATH12K_RATE_INFO_4XLTF, +}; + struct ath12k_mac_get_any_chanctx_conf_arg { struct ath12k *ar; struct ieee80211_chanctx_conf *chanctx_conf; diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index 911ef9d528177..467fc32feee23 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -223,15 +223,15 @@ enum WMI_HOST_WLAN_BAND { }; /* Parameters used for WMI_VDEV_PARAM_AUTORATE_MISC_CFG command. - * Used only for HE auto rate mode. + * Used for HE and EHT auto rate mode. */ enum { - /* HE LTF related configuration */ - WMI_HE_AUTORATE_LTF_1X = BIT(0), - WMI_HE_AUTORATE_LTF_2X = BIT(1), - WMI_HE_AUTORATE_LTF_4X = BIT(2), + /* LTF related configuration */ + WMI_AUTORATE_LTF_1X = BIT(0), + WMI_AUTORATE_LTF_2X = BIT(1), + WMI_AUTORATE_LTF_4X = BIT(2), - /* HE GI related configuration */ + /* GI related configuration */ WMI_AUTORATE_400NS_GI = BIT(8), WMI_AUTORATE_800NS_GI = BIT(9), WMI_AUTORATE_1600NS_GI = BIT(10), From ec1d9b79be5df30f1998f37a49bf14d34ecd6c50 Mon Sep 17 00:00:00 2001 From: Muna Sinada Date: Thu, 23 Oct 2025 17:19:24 -0700 Subject: [PATCH 420/867] wifi: ath12k: add EHT rate handling to existing set rate functions Add EHT rate handling to the existing rate functions that validate, prepare and set rates. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Co-developed-by: Aloka Dixit Signed-off-by: Aloka Dixit Signed-off-by: Muna Sinada Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20251024001928.257356-3-muna.sinada@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 215 +++++++++++++++++++++----- 1 file changed, 173 insertions(+), 42 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index c17500a3e95f9..0efdf87153e2f 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -533,6 +533,18 @@ ath12k_mac_max_he_nss(const u16 he_mcs_mask[NL80211_HE_NSS_MAX]) return 1; } +static u32 +ath12k_mac_max_eht_nss(const u16 eht_mcs_mask[NL80211_EHT_NSS_MAX]) +{ + int nss; + + for (nss = NL80211_EHT_NSS_MAX - 1; nss >= 0; nss--) + if (eht_mcs_mask[nss]) + return nss + 1; + + return 1; +} + static u8 ath12k_parse_mpdudensity(u8 mpdudensity) { /* From IEEE Std 802.11-2020 defined values for "Minimum MPDU Start Spacing": @@ -3102,37 +3114,50 @@ static void ath12k_peer_assoc_h_phymode(struct ath12k *ar, WARN_ON(phymode == MODE_UNKNOWN); } +#define ATH12K_EHT_MCS_7_ENABLED 0x00FF +#define ATH12K_EHT_MCS_9_ENABLED 0x0300 +#define ATH12K_EHT_MCS_11_ENABLED 0x0C00 +#define ATH12K_EHT_MCS_13_ENABLED 0x3000 + static void ath12k_mac_set_eht_mcs(u8 rx_tx_mcs7, u8 rx_tx_mcs9, u8 rx_tx_mcs11, u8 rx_tx_mcs13, - u32 *rx_mcs, u32 *tx_mcs) -{ - *rx_mcs = 0; - u32p_replace_bits(rx_mcs, - u8_get_bits(rx_tx_mcs7, IEEE80211_EHT_MCS_NSS_RX), - WMI_EHT_MCS_NSS_0_7); - u32p_replace_bits(rx_mcs, - u8_get_bits(rx_tx_mcs9, IEEE80211_EHT_MCS_NSS_RX), - WMI_EHT_MCS_NSS_8_9); - u32p_replace_bits(rx_mcs, - u8_get_bits(rx_tx_mcs11, IEEE80211_EHT_MCS_NSS_RX), - WMI_EHT_MCS_NSS_10_11); - u32p_replace_bits(rx_mcs, - u8_get_bits(rx_tx_mcs13, IEEE80211_EHT_MCS_NSS_RX), - WMI_EHT_MCS_NSS_12_13); - - *tx_mcs = 0; - u32p_replace_bits(tx_mcs, - u8_get_bits(rx_tx_mcs7, IEEE80211_EHT_MCS_NSS_TX), - WMI_EHT_MCS_NSS_0_7); - u32p_replace_bits(tx_mcs, - u8_get_bits(rx_tx_mcs9, IEEE80211_EHT_MCS_NSS_TX), - WMI_EHT_MCS_NSS_8_9); - u32p_replace_bits(tx_mcs, - u8_get_bits(rx_tx_mcs11, IEEE80211_EHT_MCS_NSS_TX), - WMI_EHT_MCS_NSS_10_11); - u32p_replace_bits(tx_mcs, - u8_get_bits(rx_tx_mcs13, IEEE80211_EHT_MCS_NSS_TX), - WMI_EHT_MCS_NSS_12_13); + u32 *rx_mcs, u32 *tx_mcs, + const u16 eht_mcs_limit[NL80211_EHT_NSS_MAX]) +{ + int nss; + u8 mcs_7 = 0, mcs_9 = 0, mcs_11 = 0, mcs_13 = 0; + u8 peer_mcs_7, peer_mcs_9, peer_mcs_11, peer_mcs_13; + + for (nss = 0; nss < NL80211_EHT_NSS_MAX; nss++) { + if (eht_mcs_limit[nss] & ATH12K_EHT_MCS_7_ENABLED) + mcs_7++; + if (eht_mcs_limit[nss] & ATH12K_EHT_MCS_9_ENABLED) + mcs_9++; + if (eht_mcs_limit[nss] & ATH12K_EHT_MCS_11_ENABLED) + mcs_11++; + if (eht_mcs_limit[nss] & ATH12K_EHT_MCS_13_ENABLED) + mcs_13++; + } + + peer_mcs_7 = u8_get_bits(rx_tx_mcs7, IEEE80211_EHT_MCS_NSS_RX); + peer_mcs_9 = u8_get_bits(rx_tx_mcs9, IEEE80211_EHT_MCS_NSS_RX); + peer_mcs_11 = u8_get_bits(rx_tx_mcs11, IEEE80211_EHT_MCS_NSS_RX); + peer_mcs_13 = u8_get_bits(rx_tx_mcs13, IEEE80211_EHT_MCS_NSS_RX); + + *rx_mcs = u32_encode_bits(min(peer_mcs_7, mcs_7), WMI_EHT_MCS_NSS_0_7) | + u32_encode_bits(min(peer_mcs_9, mcs_9), WMI_EHT_MCS_NSS_8_9) | + u32_encode_bits(min(peer_mcs_11, mcs_11), WMI_EHT_MCS_NSS_10_11) | + u32_encode_bits(min(peer_mcs_13, mcs_13), WMI_EHT_MCS_NSS_12_13); + + peer_mcs_7 = u8_get_bits(rx_tx_mcs7, IEEE80211_EHT_MCS_NSS_TX); + peer_mcs_9 = u8_get_bits(rx_tx_mcs9, IEEE80211_EHT_MCS_NSS_TX); + peer_mcs_11 = u8_get_bits(rx_tx_mcs11, IEEE80211_EHT_MCS_NSS_TX); + peer_mcs_13 = u8_get_bits(rx_tx_mcs13, IEEE80211_EHT_MCS_NSS_TX); + + *tx_mcs = u32_encode_bits(min(peer_mcs_7, mcs_7), WMI_EHT_MCS_NSS_0_7) | + u32_encode_bits(min(peer_mcs_9, mcs_9), WMI_EHT_MCS_NSS_8_9) | + u32_encode_bits(min(peer_mcs_11, mcs_11), WMI_EHT_MCS_NSS_10_11) | + u32_encode_bits(min(peer_mcs_13, mcs_13), WMI_EHT_MCS_NSS_12_13); } static void ath12k_mac_set_eht_ppe_threshold(const u8 *ppe_thres, @@ -3171,13 +3196,17 @@ static void ath12k_peer_assoc_h_eht(struct ath12k *ar, struct ath12k_wmi_peer_assoc_arg *arg) { struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); + struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); const struct ieee80211_eht_mcs_nss_supp_20mhz_only *bw_20; const struct ieee80211_eht_mcs_nss_supp_bw *bw; const struct ieee80211_sta_eht_cap *eht_cap; const struct ieee80211_sta_he_cap *he_cap; struct ieee80211_link_sta *link_sta; struct ieee80211_bss_conf *link_conf; + struct cfg80211_chan_def def; + enum nl80211_band band; u32 *rx_mcs, *tx_mcs; + u16 *eht_mcs_mask; lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); @@ -3199,6 +3228,12 @@ static void ath12k_peer_assoc_h_eht(struct ath12k *ar, if (!he_cap->has_he || !eht_cap->has_eht) return; + if (WARN_ON(ath12k_mac_vif_link_chan(vif, arvif->link_id, &def))) + return; + + band = def.chan->band; + eht_mcs_mask = arvif->bitrate_mask.control[band].eht_mcs; + arg->eht_flag = true; if ((eht_cap->eht_cap_elem.phy_cap_info[5] & @@ -3223,7 +3258,8 @@ static void ath12k_peer_assoc_h_eht(struct ath12k *ar, bw->rx_tx_mcs11_max_nss, bw->rx_tx_mcs13_max_nss, &rx_mcs[WMI_EHTCAP_TXRX_MCS_NSS_IDX_320], - &tx_mcs[WMI_EHTCAP_TXRX_MCS_NSS_IDX_320]); + &tx_mcs[WMI_EHTCAP_TXRX_MCS_NSS_IDX_320], + eht_mcs_mask); arg->peer_eht_mcs_count++; fallthrough; case IEEE80211_STA_RX_BW_160: @@ -3233,7 +3269,8 @@ static void ath12k_peer_assoc_h_eht(struct ath12k *ar, bw->rx_tx_mcs11_max_nss, bw->rx_tx_mcs13_max_nss, &rx_mcs[WMI_EHTCAP_TXRX_MCS_NSS_IDX_160], - &tx_mcs[WMI_EHTCAP_TXRX_MCS_NSS_IDX_160]); + &tx_mcs[WMI_EHTCAP_TXRX_MCS_NSS_IDX_160], + eht_mcs_mask); arg->peer_eht_mcs_count++; fallthrough; default: @@ -3249,7 +3286,8 @@ static void ath12k_peer_assoc_h_eht(struct ath12k *ar, bw_20->rx_tx_mcs11_max_nss, bw_20->rx_tx_mcs13_max_nss, &rx_mcs[WMI_EHTCAP_TXRX_MCS_NSS_IDX_80], - &tx_mcs[WMI_EHTCAP_TXRX_MCS_NSS_IDX_80]); + &tx_mcs[WMI_EHTCAP_TXRX_MCS_NSS_IDX_80], + eht_mcs_mask); } else { bw = &eht_cap->eht_mcs_nss_supp.bw._80; ath12k_mac_set_eht_mcs(bw->rx_tx_mcs9_max_nss, @@ -3257,7 +3295,8 @@ static void ath12k_peer_assoc_h_eht(struct ath12k *ar, bw->rx_tx_mcs11_max_nss, bw->rx_tx_mcs13_max_nss, &rx_mcs[WMI_EHTCAP_TXRX_MCS_NSS_IDX_80], - &tx_mcs[WMI_EHTCAP_TXRX_MCS_NSS_IDX_80]); + &tx_mcs[WMI_EHTCAP_TXRX_MCS_NSS_IDX_80], + eht_mcs_mask); } arg->peer_eht_mcs_count++; @@ -3908,6 +3947,8 @@ static void ath12k_mac_init_arvif(struct ath12k_vif *ahvif, sizeof(arvif->bitrate_mask.control[i].vht_mcs)); memset(arvif->bitrate_mask.control[i].he_mcs, 0xff, sizeof(arvif->bitrate_mask.control[i].he_mcs)); + memset(arvif->bitrate_mask.control[i].eht_mcs, 0xff, + sizeof(arvif->bitrate_mask.control[i].eht_mcs)); } /* Handle MLO related assignments */ @@ -5835,6 +5876,20 @@ ath12k_mac_bitrate_mask_num_he_rates(struct ath12k *ar, return num_rates; } +static int +ath12k_mac_bitrate_mask_num_eht_rates(struct ath12k *ar, + enum nl80211_band band, + const struct cfg80211_bitrate_mask *mask) +{ + int num_rates = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(mask->control[band].eht_mcs); i++) + num_rates += hweight16(mask->control[band].eht_mcs[i]); + + return num_rates; +} + static int ath12k_mac_set_peer_vht_fixed_rate(struct ath12k_link_vif *arvif, struct ath12k_link_sta *arsta, @@ -11976,6 +12031,9 @@ ath12k_mac_has_single_legacy_rate(struct ath12k *ar, if (ath12k_mac_bitrate_mask_num_he_rates(ar, band, mask)) return false; + if (ath12k_mac_bitrate_mask_num_eht_rates(ar, band, mask)) + return false; + return num_rates == 1; } @@ -11998,11 +12056,15 @@ ath12k_mac_bitrate_mask_get_single_nss(struct ath12k *ar, { struct ieee80211_supported_band *sband = &ar->mac.sbands[band]; u16 vht_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); + const struct ieee80211_sband_iftype_data *data; const struct ieee80211_sta_he_cap *he_cap; u16 he_mcs_map = 0; + u16 eht_mcs_map = 0; u8 ht_nss_mask = 0; u8 vht_nss_mask = 0; u8 he_nss_mask = 0; + u8 eht_nss_mask = 0; + u8 mcs_nss_len; int i; /* No need to consider legacy here. Basic rates are always present @@ -12046,7 +12108,60 @@ ath12k_mac_bitrate_mask_get_single_nss(struct ath12k *ar, return false; } - if (ht_nss_mask != vht_nss_mask || ht_nss_mask != he_nss_mask) + data = ieee80211_get_sband_iftype_data(sband, vif->type); + + mcs_nss_len = ieee80211_eht_mcs_nss_size(&data->he_cap.he_cap_elem, + &data->eht_cap.eht_cap_elem, + false); + if (mcs_nss_len == 4) { + /* 20 MHz only STA case */ + const struct ieee80211_eht_mcs_nss_supp_20mhz_only *eht_mcs_nss = + &data->eht_cap.eht_mcs_nss_supp.only_20mhz; + if (eht_mcs_nss->rx_tx_mcs13_max_nss) + eht_mcs_map = 0x1fff; + else if (eht_mcs_nss->rx_tx_mcs11_max_nss) + eht_mcs_map = 0x07ff; + else if (eht_mcs_nss->rx_tx_mcs9_max_nss) + eht_mcs_map = 0x01ff; + else + eht_mcs_map = 0x007f; + } else { + const struct ieee80211_eht_mcs_nss_supp_bw *eht_mcs_nss; + + switch (mcs_nss_len) { + case 9: + eht_mcs_nss = &data->eht_cap.eht_mcs_nss_supp.bw._320; + break; + case 6: + eht_mcs_nss = &data->eht_cap.eht_mcs_nss_supp.bw._160; + break; + case 3: + eht_mcs_nss = &data->eht_cap.eht_mcs_nss_supp.bw._80; + break; + default: + return false; + } + + if (eht_mcs_nss->rx_tx_mcs13_max_nss) + eht_mcs_map = 0x1fff; + else if (eht_mcs_nss->rx_tx_mcs11_max_nss) + eht_mcs_map = 0x7ff; + else + eht_mcs_map = 0x1ff; + } + + for (i = 0; i < ARRAY_SIZE(mask->control[band].eht_mcs); i++) { + if (mask->control[band].eht_mcs[i] == 0) + continue; + + if (mask->control[band].eht_mcs[i] < eht_mcs_map) + eht_nss_mask |= BIT(i); + else + return false; + } + + if (ht_nss_mask != vht_nss_mask || ht_nss_mask != he_nss_mask || + ht_nss_mask != eht_nss_mask) return false; if (ht_nss_mask == 0) @@ -12198,11 +12313,12 @@ static u32 ath12k_mac_nlgi_to_wmigi(enum nl80211_txrate_gi gi) static int ath12k_mac_set_rate_params(struct ath12k_link_vif *arvif, u32 rate, u8 nss, u8 sgi, u8 ldpc, - u8 he_gi, u8 he_ltf, bool he_fixed_rate) + u8 he_gi, u8 he_ltf, bool he_fixed_rate, + bool eht_fixed_rate) { struct ieee80211_bss_conf *link_conf; struct ath12k *ar = arvif->ar; - bool he_support, gi_ltf_set = false; + bool he_support, eht_support, gi_ltf_set = false; u32 vdev_param; u32 param_value; int ret; @@ -12214,6 +12330,7 @@ static int ath12k_mac_set_rate_params(struct ath12k_link_vif *arvif, return -EINVAL; he_support = link_conf->he_support; + eht_support = link_conf->eht_support; ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac set rate params vdev %i rate 0x%02x nss 0x%02x sgi 0x%02x ldpc 0x%02x\n", @@ -12223,7 +12340,10 @@ static int ath12k_mac_set_rate_params(struct ath12k_link_vif *arvif, "he_gi 0x%02x he_ltf 0x%02x he_fixed_rate %d\n", he_gi, he_ltf, he_fixed_rate); - if (!he_support) { + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "eht_fixed_rate %d\n", + eht_fixed_rate); + + if (!he_support && !eht_support) { vdev_param = WMI_VDEV_PARAM_FIXED_RATE; ret = ath12k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, vdev_param, rate); @@ -12381,15 +12501,16 @@ ath12k_mac_validate_fixed_rate_settings(struct ath12k *ar, enum nl80211_band ban const struct cfg80211_bitrate_mask *mask, unsigned int link_id) { - bool he_fixed_rate = false, vht_fixed_rate = false; - const u16 *vht_mcs_mask, *he_mcs_mask; + bool eht_fixed_rate = false, he_fixed_rate = false, vht_fixed_rate = false; + const u16 *vht_mcs_mask, *he_mcs_mask, *eht_mcs_mask; struct ieee80211_link_sta *link_sta; struct ath12k_peer *peer, *tmp; - u8 vht_nss, he_nss; + u8 vht_nss, he_nss, eht_nss; int ret = true; vht_mcs_mask = mask->control[band].vht_mcs; he_mcs_mask = mask->control[band].he_mcs; + eht_mcs_mask = mask->control[band].eht_mcs; if (ath12k_mac_bitrate_mask_num_vht_rates(ar, band, mask) == 1) vht_fixed_rate = true; @@ -12397,11 +12518,15 @@ ath12k_mac_validate_fixed_rate_settings(struct ath12k *ar, enum nl80211_band ban if (ath12k_mac_bitrate_mask_num_he_rates(ar, band, mask) == 1) he_fixed_rate = true; - if (!vht_fixed_rate && !he_fixed_rate) + if (ath12k_mac_bitrate_mask_num_eht_rates(ar, band, mask) == 1) + eht_fixed_rate = true; + + if (!vht_fixed_rate && !he_fixed_rate && !eht_fixed_rate) return true; vht_nss = ath12k_mac_max_vht_nss(vht_mcs_mask); he_nss = ath12k_mac_max_he_nss(he_mcs_mask); + eht_nss = ath12k_mac_max_eht_nss(eht_mcs_mask); rcu_read_lock(); spin_lock_bh(&ar->ab->base_lock); @@ -12423,6 +12548,11 @@ ath12k_mac_validate_fixed_rate_settings(struct ath12k *ar, enum nl80211_band ban ret = false; goto exit; } + if (eht_fixed_rate && (!link_sta->eht_cap.has_eht || + link_sta->rx_nss < eht_nss)) { + ret = false; + goto exit; + } } } exit: @@ -12454,6 +12584,7 @@ ath12k_mac_op_set_bitrate_mask(struct ieee80211_hw *hw, int ret; int num_rates; bool he_fixed_rate = false; + bool eht_fixed_rate = false; lockdep_assert_wiphy(hw->wiphy); @@ -12578,7 +12709,7 @@ ath12k_mac_op_set_bitrate_mask(struct ieee80211_hw *hw, } ret = ath12k_mac_set_rate_params(arvif, rate, nss, sgi, ldpc, he_gi, - he_ltf, he_fixed_rate); + he_ltf, he_fixed_rate, eht_fixed_rate); if (ret) { ath12k_warn(ar->ab, "failed to set rate params on vdev %i: %d\n", arvif->vdev_id, ret); From 6c95151e2e776462de6b6fd9e577865e8b32203e Mon Sep 17 00:00:00 2001 From: Muna Sinada Date: Thu, 23 Oct 2025 17:19:25 -0700 Subject: [PATCH 421/867] wifi: ath12k: Add EHT MCS/NSS rates to Peer Assoc Add EHT MCS/NSS rate functionality to peer association. As part of ath12k_peer_assoc_h_eht() add the calculation of EHT MCS/NSS using intersection of link_sta and phy capability. ath12k_mac_max_eht_mcs_nss() function is utilized when comparing the max NSS of link STA and phy capability since in split phy case, phy supports max NSS of 2 for 5G band. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Co-developed-by: Vishal Kumar Signed-off-by: Vishal Kumar Co-developed-by: Aloka Dixit Signed-off-by: Aloka Dixit Signed-off-by: Muna Sinada Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20251024001928.257356-4-muna.sinada@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 110 ++++++++++++++++++++++++-- 1 file changed, 103 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 0efdf87153e2f..f1b5755bb85e1 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -545,6 +545,18 @@ ath12k_mac_max_eht_nss(const u16 eht_mcs_mask[NL80211_EHT_NSS_MAX]) return 1; } +static u32 +ath12k_mac_max_eht_mcs_nss(const u8 *eht_mcs, int eht_mcs_set_size) +{ + int i; + u8 nss = 0; + + for (i = 0; i < eht_mcs_set_size; i++) + nss = max(nss, u8_get_bits(eht_mcs[i], IEEE80211_EHT_MCS_NSS_RX)); + + return nss; +} + static u8 ath12k_parse_mpdudensity(u8 mpdudensity) { /* From IEEE Std 802.11-2020 defined values for "Minimum MPDU Start Spacing": @@ -3016,6 +3028,18 @@ static enum wmi_phy_mode ath12k_mac_get_phymode_eht(struct ath12k *ar, return MODE_UNKNOWN; } +static bool +ath12k_peer_assoc_h_eht_masked(const u16 eht_mcs_mask[NL80211_EHT_NSS_MAX]) +{ + int nss; + + for (nss = 0; nss < NL80211_EHT_NSS_MAX; nss++) + if (eht_mcs_mask[nss]) + return false; + + return true; +} + static void ath12k_peer_assoc_h_phymode(struct ath12k *ar, struct ath12k_link_vif *arvif, struct ath12k_link_sta *arsta, @@ -3027,6 +3051,7 @@ static void ath12k_peer_assoc_h_phymode(struct ath12k *ar, const u8 *ht_mcs_mask; const u16 *vht_mcs_mask; const u16 *he_mcs_mask; + const u16 *eht_mcs_mask; enum wmi_phy_mode phymode = MODE_UNKNOWN; lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); @@ -3041,6 +3066,7 @@ static void ath12k_peer_assoc_h_phymode(struct ath12k *ar, ht_mcs_mask = arvif->bitrate_mask.control[band].ht_mcs; vht_mcs_mask = arvif->bitrate_mask.control[band].vht_mcs; he_mcs_mask = arvif->bitrate_mask.control[band].he_mcs; + eht_mcs_mask = arvif->bitrate_mask.control[band].eht_mcs; link_sta = ath12k_mac_get_link_sta(arsta); if (!link_sta) { @@ -3051,7 +3077,8 @@ static void ath12k_peer_assoc_h_phymode(struct ath12k *ar, switch (band) { case NL80211_BAND_2GHZ: - if (link_sta->eht_cap.has_eht) { + if (link_sta->eht_cap.has_eht && + !ath12k_peer_assoc_h_eht_masked(eht_mcs_mask)) { if (link_sta->bandwidth == IEEE80211_STA_RX_BW_40) phymode = MODE_11BE_EHT40_2G; else @@ -3197,16 +3224,21 @@ static void ath12k_peer_assoc_h_eht(struct ath12k *ar, { struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); + const struct ieee80211_eht_mcs_nss_supp *own_eht_mcs_nss_supp; const struct ieee80211_eht_mcs_nss_supp_20mhz_only *bw_20; + const struct ieee80211_sta_eht_cap *eht_cap, *own_eht_cap; + const struct ieee80211_sband_iftype_data *iftd; const struct ieee80211_eht_mcs_nss_supp_bw *bw; - const struct ieee80211_sta_eht_cap *eht_cap; const struct ieee80211_sta_he_cap *he_cap; struct ieee80211_link_sta *link_sta; struct ieee80211_bss_conf *link_conf; struct cfg80211_chan_def def; + bool user_rate_valid = true; enum nl80211_band band; + int eht_nss, nss_idx; u32 *rx_mcs, *tx_mcs; u16 *eht_mcs_mask; + u8 max_nss = 0; lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); @@ -3234,6 +3266,16 @@ static void ath12k_peer_assoc_h_eht(struct ath12k *ar, band = def.chan->band; eht_mcs_mask = arvif->bitrate_mask.control[band].eht_mcs; + iftd = ieee80211_get_sband_iftype_data(&ar->mac.sbands[band], vif->type); + if (!iftd) { + ath12k_warn(ar->ab, + "unable to access iftype_data in struct ieee80211_supported_band\n"); + return; + } + + own_eht_cap = &iftd->eht_cap; + own_eht_mcs_nss_supp = &own_eht_cap->eht_mcs_nss_supp; + arg->eht_flag = true; if ((eht_cap->eht_cap_elem.phy_cap_info[5] & @@ -3250,6 +3292,28 @@ static void ath12k_peer_assoc_h_eht(struct ath12k *ar, rx_mcs = arg->peer_eht_rx_mcs_set; tx_mcs = arg->peer_eht_tx_mcs_set; + eht_nss = ath12k_mac_max_eht_mcs_nss((void *)own_eht_mcs_nss_supp, + sizeof(*own_eht_mcs_nss_supp)); + if (eht_nss > link_sta->rx_nss) { + user_rate_valid = false; + for (nss_idx = (link_sta->rx_nss - 1); nss_idx >= 0; nss_idx--) { + if (eht_mcs_mask[nss_idx]) { + user_rate_valid = true; + break; + } + } + } + + if (!user_rate_valid) { + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, + "Setting eht range MCS value to peer supported nss %d for peer %pM\n", + link_sta->rx_nss, arsta->addr); + eht_mcs_mask[link_sta->rx_nss - 1] = eht_mcs_mask[eht_nss - 1]; + } + + bw_20 = &eht_cap->eht_mcs_nss_supp.only_20mhz; + bw = &eht_cap->eht_mcs_nss_supp.bw._80; + switch (link_sta->bandwidth) { case IEEE80211_STA_RX_BW_320: bw = &eht_cap->eht_mcs_nss_supp.bw._320; @@ -3274,11 +3338,8 @@ static void ath12k_peer_assoc_h_eht(struct ath12k *ar, arg->peer_eht_mcs_count++; fallthrough; default: - if ((he_cap->he_cap_elem.phy_cap_info[0] & - (IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G | - IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G | - IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G | - IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G)) == 0) { + if (!(link_sta->he_cap.he_cap_elem.phy_cap_info[0] & + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK_ALL)) { bw_20 = &eht_cap->eht_mcs_nss_supp.only_20mhz; ath12k_mac_set_eht_mcs(bw_20->rx_tx_mcs7_max_nss, @@ -3305,6 +3366,41 @@ static void ath12k_peer_assoc_h_eht(struct ath12k *ar, arg->punct_bitmap = ~arvif->punct_bitmap; arg->eht_disable_mcs15 = link_conf->eht_disable_mcs15; + + if (!(link_sta->he_cap.he_cap_elem.phy_cap_info[0] & + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK_ALL)) { + if (bw_20->rx_tx_mcs13_max_nss) + max_nss = max(max_nss, u8_get_bits(bw_20->rx_tx_mcs13_max_nss, + IEEE80211_EHT_MCS_NSS_RX)); + if (bw_20->rx_tx_mcs11_max_nss) + max_nss = max(max_nss, u8_get_bits(bw_20->rx_tx_mcs11_max_nss, + IEEE80211_EHT_MCS_NSS_RX)); + if (bw_20->rx_tx_mcs9_max_nss) + max_nss = max(max_nss, u8_get_bits(bw_20->rx_tx_mcs9_max_nss, + IEEE80211_EHT_MCS_NSS_RX)); + if (bw_20->rx_tx_mcs7_max_nss) + max_nss = max(max_nss, u8_get_bits(bw_20->rx_tx_mcs7_max_nss, + IEEE80211_EHT_MCS_NSS_RX)); + } else { + if (bw->rx_tx_mcs13_max_nss) + max_nss = max(max_nss, u8_get_bits(bw->rx_tx_mcs13_max_nss, + IEEE80211_EHT_MCS_NSS_RX)); + if (bw->rx_tx_mcs11_max_nss) + max_nss = max(max_nss, u8_get_bits(bw->rx_tx_mcs11_max_nss, + IEEE80211_EHT_MCS_NSS_RX)); + if (bw->rx_tx_mcs9_max_nss) + max_nss = max(max_nss, u8_get_bits(bw->rx_tx_mcs9_max_nss, + IEEE80211_EHT_MCS_NSS_RX)); + } + + max_nss = min(max_nss, (uint8_t)eht_nss); + + arg->peer_nss = min(link_sta->rx_nss, max_nss); + + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, + "mac eht peer %pM nss %d mcs cnt %d ru_punct_bitmap 0x%x\n", + arsta->addr, arg->peer_nss, arg->peer_eht_mcs_count, + arg->punct_bitmap); } static void ath12k_peer_assoc_h_mlo(struct ath12k_link_sta *arsta, From ab31a9b73c95ca8a1b527a0d9fd9192a27acb26f Mon Sep 17 00:00:00 2001 From: Muna Sinada Date: Thu, 23 Oct 2025 17:19:26 -0700 Subject: [PATCH 422/867] wifi: ath12k: Add EHT fixed GI/LTF Add EHT functionality to set fixed GI/LTF parameters. Add new wmi vdev parameter id for EHT LTF Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Signed-off-by: Muna Sinada Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20251024001928.257356-5-muna.sinada@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 50 +++++++++++++++++++++------ drivers/net/wireless/ath/ath12k/wmi.h | 1 + 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index f1b5755bb85e1..ca06807c4e0fc 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -12305,10 +12305,11 @@ ath12k_mac_get_single_legacy_rate(struct ath12k *ar, } static int -ath12k_mac_set_fixed_rate_gi_ltf(struct ath12k_link_vif *arvif, u8 gi, u8 ltf) +ath12k_mac_set_fixed_rate_gi_ltf(struct ath12k_link_vif *arvif, u8 gi, u8 ltf, + u32 param) { struct ath12k *ar = arvif->ar; - int param, ret; + int ret; lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); @@ -12323,11 +12324,16 @@ ath12k_mac_set_fixed_rate_gi_ltf(struct ath12k_link_vif *arvif, u8 gi, u8 ltf) gi, ret); return ret; } - /* start from 1 */ - if (ltf != 0xFF) - ltf += 1; - param = WMI_VDEV_PARAM_HE_LTF; + if (param == WMI_VDEV_PARAM_HE_LTF) { + /* HE values start from 1 */ + if (ltf != 0xFF) + ltf += 1; + } else { + /* EHT values start from 5 */ + if (ltf != 0xFF) + ltf += 4; + } ret = ath12k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, param, ltf); @@ -12410,6 +12416,7 @@ static u32 ath12k_mac_nlgi_to_wmigi(enum nl80211_txrate_gi gi) static int ath12k_mac_set_rate_params(struct ath12k_link_vif *arvif, u32 rate, u8 nss, u8 sgi, u8 ldpc, u8 he_gi, u8 he_ltf, bool he_fixed_rate, + u8 eht_gi, u8 eht_ltf, bool eht_fixed_rate) { struct ieee80211_bss_conf *link_conf; @@ -12436,8 +12443,9 @@ static int ath12k_mac_set_rate_params(struct ath12k_link_vif *arvif, "he_gi 0x%02x he_ltf 0x%02x he_fixed_rate %d\n", he_gi, he_ltf, he_fixed_rate); - ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "eht_fixed_rate %d\n", - eht_fixed_rate); + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, + "eht_gi 0x%02x eht_ltf 0x%02x eht_fixed_rate %d\n", + eht_gi, eht_ltf, eht_fixed_rate); if (!he_support && !eht_support) { vdev_param = WMI_VDEV_PARAM_FIXED_RATE; @@ -12468,9 +12476,26 @@ static int ath12k_mac_set_rate_params(struct ath12k_link_vif *arvif, return ret; } + if (eht_support) { + if (eht_fixed_rate) + ret = ath12k_mac_set_fixed_rate_gi_ltf(arvif, eht_gi, eht_ltf, + WMI_VDEV_PARAM_EHT_LTF); + else + ret = ath12k_mac_set_auto_rate_gi_ltf(arvif, eht_gi, eht_ltf); + + if (ret) { + ath12k_warn(ar->ab, + "failed to set EHT LTF/GI params %d/%d: %d\n", + eht_gi, eht_ltf, ret); + return ret; + } + gi_ltf_set = true; + } + if (he_support) { if (he_fixed_rate) - ret = ath12k_mac_set_fixed_rate_gi_ltf(arvif, he_gi, he_ltf); + ret = ath12k_mac_set_fixed_rate_gi_ltf(arvif, he_gi, he_ltf, + WMI_VDEV_PARAM_HE_LTF); else ret = ath12k_mac_set_auto_rate_gi_ltf(arvif, he_gi, he_ltf); if (ret) @@ -12672,6 +12697,7 @@ ath12k_mac_op_set_bitrate_mask(struct ieee80211_hw *hw, const u16 *he_mcs_mask; u8 he_ltf = 0; u8 he_gi = 0; + u8 eht_ltf = 0, eht_gi = 0; u32 rate; u8 nss, mac_nss; u8 sgi; @@ -12707,6 +12733,9 @@ ath12k_mac_op_set_bitrate_mask(struct ieee80211_hw *hw, he_gi = mask->control[band].he_gi; he_ltf = mask->control[band].he_ltf; + eht_gi = mask->control[band].eht_gi; + eht_ltf = mask->control[band].eht_ltf; + /* mac80211 doesn't support sending a fixed HT/VHT MCS alone, rather it * requires passing at least one of used basic rates along with them. * Fixed rate setting across different preambles(legacy, HT, VHT) is @@ -12805,7 +12834,8 @@ ath12k_mac_op_set_bitrate_mask(struct ieee80211_hw *hw, } ret = ath12k_mac_set_rate_params(arvif, rate, nss, sgi, ldpc, he_gi, - he_ltf, he_fixed_rate, eht_fixed_rate); + he_ltf, he_fixed_rate, eht_gi, eht_ltf, + eht_fixed_rate); if (ret) { ath12k_warn(ar->ab, "failed to set rate params on vdev %i: %d\n", arvif->vdev_id, ret); diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index 467fc32feee23..f99fced1610e6 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -1197,6 +1197,7 @@ enum wmi_tlv_vdev_param { WMI_VDEV_PARAM_SET_HEMU_MODE, WMI_VDEV_PARAM_HEOPS_0_31 = 0x8003, WMI_VDEV_PARAM_SET_EHT_MU_MODE = 0x8005, + WMI_VDEV_PARAM_EHT_LTF, }; enum wmi_tlv_peer_flags { From 5ee9cb2c236b45c4d58d6d464a12d985e453576b Mon Sep 17 00:00:00 2001 From: Muna Sinada Date: Thu, 23 Oct 2025 17:19:27 -0700 Subject: [PATCH 423/867] wifi: ath12k: add EHT rates to ath12k_mac_op_set_bitrate_mask() Extend ath12k_mac_op_set_bitrate_mask() to handle EHT rates. Create and pass EHT mask containing MCS and NSS along with EHT GI and LTF when calling ath12k_mac_set_rate_params() Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Co-developed-by: Aaradhana Sahu Signed-off-by: Aaradhana Sahu Signed-off-by: Muna Sinada Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20251024001928.257356-6-muna.sinada@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 55 +++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index ca06807c4e0fc..fa53a588ceff5 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -12568,6 +12568,38 @@ ath12k_mac_he_mcs_range_present(struct ath12k *ar, return true; } +static bool +ath12k_mac_eht_mcs_range_present(struct ath12k *ar, + enum nl80211_band band, + const struct cfg80211_bitrate_mask *mask) +{ + u16 eht_mcs; + int i; + + for (i = 0; i < NL80211_EHT_NSS_MAX; i++) { + eht_mcs = mask->control[band].eht_mcs[i]; + + switch (eht_mcs) { + case 0: + case BIT(8) - 1: + case BIT(10) - 1: + case BIT(12) - 1: + case BIT(14) - 1: + break; + case BIT(15) - 1: + case BIT(16) - 1: + case BIT(16) - BIT(14) - 1: + if (i != 0) + return false; + break; + default: + return false; + } + } + + return true; +} + static void ath12k_mac_set_bitrate_mask_iter(void *data, struct ieee80211_sta *sta) { @@ -12695,6 +12727,7 @@ ath12k_mac_op_set_bitrate_mask(struct ieee80211_hw *hw, const u8 *ht_mcs_mask; const u16 *vht_mcs_mask; const u16 *he_mcs_mask; + const u16 *eht_mcs_mask; u8 he_ltf = 0; u8 he_gi = 0; u8 eht_ltf = 0, eht_gi = 0; @@ -12722,6 +12755,7 @@ ath12k_mac_op_set_bitrate_mask(struct ieee80211_hw *hw, ht_mcs_mask = mask->control[band].ht_mcs; vht_mcs_mask = mask->control[band].vht_mcs; he_mcs_mask = mask->control[band].he_mcs; + eht_mcs_mask = mask->control[band].eht_mcs; ldpc = !!(ar->ht_cap_info & WMI_HT_CAP_LDPC); sgi = mask->control[band].gi; @@ -12773,9 +12807,10 @@ ath12k_mac_op_set_bitrate_mask(struct ieee80211_hw *hw, ath12k_warn(ar->ab, "failed to update fixed rate settings due to mcs/nss incompatibility\n"); - mac_nss = max3(ath12k_mac_max_ht_nss(ht_mcs_mask), - ath12k_mac_max_vht_nss(vht_mcs_mask), - ath12k_mac_max_he_nss(he_mcs_mask)); + mac_nss = max(max3(ath12k_mac_max_ht_nss(ht_mcs_mask), + ath12k_mac_max_vht_nss(vht_mcs_mask), + ath12k_mac_max_he_nss(he_mcs_mask)), + ath12k_mac_max_eht_nss(eht_mcs_mask)); nss = min_t(u32, ar->num_tx_chains, mac_nss); /* If multiple rates across different preambles are given @@ -12823,6 +12858,20 @@ ath12k_mac_op_set_bitrate_mask(struct ieee80211_hw *hw, ret = -EINVAL; goto out; } + + num_rates = ath12k_mac_bitrate_mask_num_eht_rates(ar, band, + mask); + if (num_rates == 1) + eht_fixed_rate = true; + + if (!ath12k_mac_eht_mcs_range_present(ar, band, mask) && + num_rates > 1) { + ath12k_warn(ar->ab, + "Setting more than one EHT MCS Value in bitrate mask not supported\n"); + ret = -EINVAL; + goto out; + } + ieee80211_iterate_stations_mtx(hw, ath12k_mac_disable_peer_fixed_rate, arvif); From 09486128caef9efd88794c8aaa1e9ab16b16f383 Mon Sep 17 00:00:00 2001 From: Muna Sinada Date: Thu, 23 Oct 2025 17:19:28 -0700 Subject: [PATCH 424/867] wifi: ath12k: Set EHT fixed rates for associated STAs Fixed rate is set for STAs that are associated. This will be done during association or with ath12k_sta_rc_update_wk(). Add EHT fixed rate setting for STAs by adding call to ath12k_mac_set_peer_eht_fixed_rate() during the times fixed rate is set for STAs. This new function sets EHT fixed rate for a peer, which sends WMI command with the updated MCS/NSS rate using WMI_PEER_PARAM_FIXED_RATE command id. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Co-developed-by: Aaradhana Sahu Signed-off-by: Aaradhana Sahu Signed-off-by: Muna Sinada Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20251024001928.257356-7-muna.sinada@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 92 +++++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index fa53a588ceff5..29a0b022c32eb 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -6086,6 +6086,65 @@ ath12k_mac_set_peer_he_fixed_rate(struct ath12k_link_vif *arvif, return ret; } +static int +ath12k_mac_set_peer_eht_fixed_rate(struct ath12k_link_vif *arvif, + struct ath12k_link_sta *arsta, + const struct cfg80211_bitrate_mask *mask, + enum nl80211_band band) +{ + struct ath12k_sta *ahsta = arsta->ahsta; + struct ath12k *ar = arvif->ar; + struct ieee80211_sta *sta; + struct ieee80211_link_sta *link_sta; + u8 eht_rate, nss = 0; + u32 rate_code; + int ret, i; + + lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); + + sta = ath12k_ahsta_to_sta(ahsta); + + for (i = 0; i < ARRAY_SIZE(mask->control[band].eht_mcs); i++) { + if (hweight16(mask->control[band].eht_mcs[i]) == 1) { + nss = i + 1; + eht_rate = ffs(mask->control[band].eht_mcs[i]) - 1; + } + } + + if (!nss) { + ath12k_warn(ar->ab, "No single EHT Fixed rate found to set for %pM\n", + arsta->addr); + return -EINVAL; + } + + /* Avoid updating invalid nss as fixed rate*/ + link_sta = ath12k_mac_get_link_sta(arsta); + if (!link_sta || nss > link_sta->rx_nss) { + ath12k_warn(ar->ab, + "unable to access link sta for sta %pM link %u or fixed nss of %u is not supported by sta\n", + sta->addr, arsta->link_id, nss); + return -EINVAL; + } + + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, + "Setting Fixed EHT Rate for peer %pM. Device will not switch to any other selected rates\n", + arsta->addr); + + rate_code = ATH12K_HW_RATE_CODE(eht_rate, nss - 1, + WMI_RATE_PREAMBLE_EHT); + + ret = ath12k_wmi_set_peer_param(ar, arsta->addr, + arvif->vdev_id, + WMI_PEER_PARAM_FIXED_RATE, + rate_code); + if (ret) + ath12k_warn(ar->ab, + "failed to update STA %pM Fixed Rate %d: %d\n", + arsta->addr, rate_code, ret); + + return ret; +} + static int ath12k_mac_station_assoc(struct ath12k *ar, struct ath12k_link_vif *arvif, struct ath12k_link_sta *arsta, @@ -6098,7 +6157,7 @@ static int ath12k_mac_station_assoc(struct ath12k *ar, struct cfg80211_chan_def def; enum nl80211_band band; struct cfg80211_bitrate_mask *mask; - u8 num_vht_rates, num_he_rates; + u8 num_vht_rates, num_he_rates, num_eht_rates; u8 link_id = arvif->link_id; lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); @@ -6141,10 +6200,11 @@ static int ath12k_mac_station_assoc(struct ath12k *ar, num_vht_rates = ath12k_mac_bitrate_mask_num_vht_rates(ar, band, mask); num_he_rates = ath12k_mac_bitrate_mask_num_he_rates(ar, band, mask); + num_eht_rates = ath12k_mac_bitrate_mask_num_eht_rates(ar, band, mask); - /* If single VHT/HE rate is configured (by set_bitrate_mask()), - * peer_assoc will disable VHT/HE. This is now enabled by a peer specific - * fixed param. + /* If single VHT/HE/EHT rate is configured (by set_bitrate_mask()), + * peer_assoc will disable VHT/HE/EHT. This is now enabled by a peer + * specific fixed param. * Note that all other rates and NSS will be disabled for this peer. */ link_sta = ath12k_mac_get_link_sta(arsta); @@ -6164,6 +6224,10 @@ static int ath12k_mac_station_assoc(struct ath12k *ar, ret = ath12k_mac_set_peer_he_fixed_rate(arvif, arsta, mask, band); if (ret) return ret; + } else if (link_sta->eht_cap.has_eht && num_eht_rates == 1) { + ret = ath12k_mac_set_peer_eht_fixed_rate(arvif, arsta, mask, band); + if (ret) + return ret; } /* Re-assoc is run only to update supported rates for given station. It @@ -6226,8 +6290,9 @@ static void ath12k_sta_rc_update_wk(struct wiphy *wiphy, struct wiphy_work *wk) const u8 *ht_mcs_mask; const u16 *vht_mcs_mask; const u16 *he_mcs_mask; + const u16 *eht_mcs_mask; u32 changed, bw, nss, mac_nss, smps, bw_prev; - int err, num_vht_rates, num_he_rates; + int err, num_vht_rates, num_he_rates, num_eht_rates; const struct cfg80211_bitrate_mask *mask; enum wmi_phy_mode peer_phymode; struct ath12k_link_sta *arsta; @@ -6248,6 +6313,7 @@ static void ath12k_sta_rc_update_wk(struct wiphy *wiphy, struct wiphy_work *wk) ht_mcs_mask = arvif->bitrate_mask.control[band].ht_mcs; vht_mcs_mask = arvif->bitrate_mask.control[band].vht_mcs; he_mcs_mask = arvif->bitrate_mask.control[band].he_mcs; + eht_mcs_mask = arvif->bitrate_mask.control[band].eht_mcs; spin_lock_bh(&ar->data_lock); @@ -6265,6 +6331,7 @@ static void ath12k_sta_rc_update_wk(struct wiphy *wiphy, struct wiphy_work *wk) mac_nss = max3(ath12k_mac_max_ht_nss(ht_mcs_mask), ath12k_mac_max_vht_nss(vht_mcs_mask), ath12k_mac_max_he_nss(he_mcs_mask)); + mac_nss = max(mac_nss, ath12k_mac_max_eht_nss(eht_mcs_mask)); nss = min(nss, mac_nss); struct ath12k_wmi_peer_assoc_arg *peer_arg __free(kfree) = @@ -6350,6 +6417,8 @@ static void ath12k_sta_rc_update_wk(struct wiphy *wiphy, struct wiphy_work *wk) mask); num_he_rates = ath12k_mac_bitrate_mask_num_he_rates(ar, band, mask); + num_eht_rates = ath12k_mac_bitrate_mask_num_eht_rates(ar, band, + mask); /* Peer_assoc_prepare will reject vht rates in * bitrate_mask if its not available in range format and @@ -6374,9 +6443,18 @@ static void ath12k_sta_rc_update_wk(struct wiphy *wiphy, struct wiphy_work *wk) band); } else if (link_sta->he_cap.has_he && num_he_rates == 1) { ath12k_mac_set_peer_he_fixed_rate(arvif, arsta, mask, band); + } else if (link_sta->eht_cap.has_eht && num_eht_rates == 1) { + err = ath12k_mac_set_peer_eht_fixed_rate(arvif, arsta, + mask, band); + if (err) { + ath12k_warn(ar->ab, + "failed to set peer EHT fixed rate for STA %pM ret %d\n", + arsta->addr, err); + return; + } } else { - /* If the peer is non-VHT/HE or no fixed VHT/HE rate - * is provided in the new bitrate mask we set the + /* If the peer is non-VHT/HE/EHT or no fixed VHT/HE/EHT + * rate is provided in the new bitrate mask we set the * other rates using peer_assoc command. Also clear * the peer fixed rate settings as it has higher proprity * than peer assoc From 448bf7b51426bcca54b5ac1ddd1045a36c9d1dea Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Sun, 26 Oct 2025 23:52:53 +0530 Subject: [PATCH 425/867] wifi: ath12k: enforce vdev limit in ath12k_mac_vdev_create() Currently, vdev limit check is performed only in ath12k_mac_assign_vif_to_vdev(). If the host has already created maximum number of vdevs for the radio (ar) and a scan request arrives for the same radio, ath12k_mac_initiate_hw_scan() attempts to create a vdev without checking the limit, causing firmware asserts. Centralize the vdev limit guard by moving the check into ath12k_mac_vdev_create() so that all callers obey the limit. While doing this, update the condition from `num_created_vdevs > (TARGET_NUM_VDEVS(ab) - 1)` to `num_created_vdevs >= TARGET_NUM_VDEVS(ab)` for clarity and to eliminate unnecessary arithmetic. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Fixes: 0d6e6736ed9f ("wifi: ath12k: scan statemachine changes for single wiphy") Fixes: 4938ba733ee2 ("wifi: ath12k: modify remain on channel for single wiphy") Signed-off-by: Rameshkumar Sundaram Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20251026182254.1399650-2-rameshkumar.sundaram@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 29a0b022c32eb..8f0d914d7fc3a 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -10031,6 +10031,12 @@ int ath12k_mac_vdev_create(struct ath12k *ar, struct ath12k_link_vif *arvif) if (vif->type == NL80211_IFTYPE_MONITOR && ar->monitor_vdev_created) return -EINVAL; + if (ar->num_created_vdevs >= TARGET_NUM_VDEVS(ab)) { + ath12k_warn(ab, "failed to create vdev, reached max vdev limit %d\n", + TARGET_NUM_VDEVS(ab)); + return -ENOSPC; + } + link_id = arvif->link_id; if (link_id < IEEE80211_MLD_MAX_NUM_LINKS) { @@ -10390,12 +10396,6 @@ static struct ath12k *ath12k_mac_assign_vif_to_vdev(struct ieee80211_hw *hw, if (arvif->is_created) goto flush; - if (ar->num_created_vdevs > (TARGET_NUM_VDEVS(ab) - 1)) { - ath12k_warn(ab, "failed to create vdev, reached max vdev limit %d\n", - TARGET_NUM_VDEVS(ab)); - goto unlock; - } - ret = ath12k_mac_vdev_create(ar, arvif); if (ret) { ath12k_warn(ab, "failed to create vdev %pM ret %d", vif->addr, ret); From e70515039d44be61b6a73aafb401d141b0034d12 Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Sun, 26 Oct 2025 23:52:54 +0530 Subject: [PATCH 426/867] wifi: ath12k: unassign arvif on scan vdev create failure During scan and remain-on-channel requests, a scan link vif (arvif) is assigned and a temporary vdev is created. If vdev creation fails, the assigned arvif is left attached until the virtual interface is removed, leaving a stale link in ahvif. Fix this by freeing the stale arvif and resetting the corresponding link in ahvif by calling ath12k_mac_unassign_link_vif() when vdev creation fails. While at it, propagate the actual error code from ath12k_mac_vdev_create() instead of returning -EINVAL in ath12k_mac_initiate_hw_scan(). Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Fixes: 477cabfdb776 ("wifi: ath12k: modify link arvif creation and removal for MLO") Signed-off-by: Rameshkumar Sundaram Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20251026182254.1399650-3-rameshkumar.sundaram@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 8f0d914d7fc3a..7639fb2c52fe2 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -5313,7 +5313,8 @@ static int ath12k_mac_initiate_hw_scan(struct ieee80211_hw *hw, ret = ath12k_mac_vdev_create(ar, arvif); if (ret) { ath12k_warn(ar->ab, "unable to create scan vdev %d\n", ret); - return -EINVAL; + ath12k_mac_unassign_link_vif(arvif); + return ret; } } @@ -13411,6 +13412,7 @@ static int ath12k_mac_op_remain_on_channel(struct ieee80211_hw *hw, if (ret) { ath12k_warn(ar->ab, "unable to create scan vdev for roc: %d\n", ret); + ath12k_mac_unassign_link_vif(arvif); return ret; } } From 877f9c22fdf424c657de757bfe8543cf77461324 Mon Sep 17 00:00:00 2001 From: Zhongqiu Han Date: Thu, 30 Oct 2025 21:00:22 +0800 Subject: [PATCH 427/867] wifi: ath10k: use = {} to initialize pm_qos_request instead of memset Initialize the pm_qos_request structure using = {} instead of memset() in ath10k_download_fw(). This ensures the structure is properly zeroed before passing it to cpu_latency_qos_add_request(), and improves efficiency by avoiding an explicit runtime memset. Signed-off-by: Zhongqiu Han Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20251030130023.1836808-2-zhongqiu.han@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath10k/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 9ae3595fb6986..670c31a52a12e 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -1186,7 +1186,7 @@ static int ath10k_download_fw(struct ath10k *ar) u32 address, data_len; const void *data; int ret; - struct pm_qos_request latency_qos; + struct pm_qos_request latency_qos = {}; address = ar->hw_params.patch_load_addr; @@ -1220,7 +1220,6 @@ static int ath10k_download_fw(struct ath10k *ar) ret); } - memset(&latency_qos, 0, sizeof(latency_qos)); cpu_latency_qos_add_request(&latency_qos, 0); ret = ath10k_bmi_fast_download(ar, address, data, data_len); From 059ca8fd692b67a77fb89e9d4e8f57cf08e32b08 Mon Sep 17 00:00:00 2001 From: Zhongqiu Han Date: Thu, 30 Oct 2025 21:00:23 +0800 Subject: [PATCH 428/867] wifi: ath10k: use = {} to initialize bmi_target_info instead of memset Initialize the bmi_target_info structure using = {} at declaration time instead of calling memset() in each bus-specific code path. This simplifies the code and avoids an explicit memset. Signed-off-by: Zhongqiu Han Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20251030130023.1836808-3-zhongqiu.han@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath10k/core.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 670c31a52a12e..7c2939cbde5f0 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -3352,7 +3352,7 @@ EXPORT_SYMBOL(ath10k_core_stop); */ static int ath10k_core_probe_fw(struct ath10k *ar) { - struct bmi_target_info target_info; + struct bmi_target_info target_info = {}; int ret = 0; ret = ath10k_hif_power_up(ar, ATH10K_FIRMWARE_MODE_NORMAL); @@ -3363,7 +3363,6 @@ static int ath10k_core_probe_fw(struct ath10k *ar) switch (ar->hif.bus) { case ATH10K_BUS_SDIO: - memset(&target_info, 0, sizeof(target_info)); ret = ath10k_bmi_get_target_info_sdio(ar, &target_info); if (ret) { ath10k_err(ar, "could not get target info (%d)\n", ret); @@ -3375,7 +3374,6 @@ static int ath10k_core_probe_fw(struct ath10k *ar) case ATH10K_BUS_PCI: case ATH10K_BUS_AHB: case ATH10K_BUS_USB: - memset(&target_info, 0, sizeof(target_info)); ret = ath10k_bmi_get_target_info(ar, &target_info); if (ret) { ath10k_err(ar, "could not get target info (%d)\n", ret); @@ -3385,7 +3383,6 @@ static int ath10k_core_probe_fw(struct ath10k *ar) ar->hw->wiphy->hw_version = target_info.version; break; case ATH10K_BUS_SNOC: - memset(&target_info, 0, sizeof(target_info)); ret = ath10k_hif_get_target_info(ar, &target_info); if (ret) { ath10k_err(ar, "could not get target info (%d)\n", ret); From ecca75ae5ae66d13f701c4a6523644d51946ddec Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 29 Oct 2025 09:49:30 -0700 Subject: [PATCH 429/867] selftests: drv-net: replace the nsim ring test with a drv-net one We are trying to move away from netdevsim-only tests and towards tests which can be run both against netdevsim and real drivers. Replace the simple bash script we have for checking ethtool -g/-G on netdevsim with a Python test tweaking those params as well as channel count. The new test is not exactly equivalent to the netdevsim one, but real drivers don't often support random ring sizes, let alone modifying max values via debugfs. Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251029164930.2923448-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/Makefile | 1 + .../selftests/drivers/net/netdevsim/Makefile | 1 - .../drivers/net/netdevsim/ethtool-ring.sh | 85 --------- .../selftests/drivers/net/ring_reconfig.py | 167 ++++++++++++++++++ 4 files changed, 168 insertions(+), 86 deletions(-) delete mode 100755 tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh create mode 100755 tools/testing/selftests/drivers/net/ring_reconfig.py diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 6e41635bd55a4..68e0bb603a9d6 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -22,6 +22,7 @@ TEST_PROGS := \ ping.py \ psp.py \ queues.py \ + ring_reconfig.py \ shaper.py \ stats.py \ xdp.py \ diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile index daf51113c8272..833abd8e6fdcc 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/Makefile +++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile @@ -8,7 +8,6 @@ TEST_PROGS := \ ethtool-features.sh \ ethtool-fec.sh \ ethtool-pause.sh \ - ethtool-ring.sh \ fib.sh \ fib_notifications.sh \ hw_stats_l3.sh \ diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh deleted file mode 100755 index c969559ffa7a1..0000000000000 --- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0-only - -source ethtool-common.sh - -function get_value { - local query="${SETTINGS_MAP[$1]}" - - echo $(ethtool -g $NSIM_NETDEV | \ - tail -n +$CURR_SETT_LINE | \ - awk -F':' -v pattern="$query:" '$0 ~ pattern {gsub(/[\t ]/, "", $2); print $2}') -} - -function update_current_settings { - for key in ${!SETTINGS_MAP[@]}; do - CURRENT_SETTINGS[$key]=$(get_value $key) - done - echo ${CURRENT_SETTINGS[@]} -} - -if ! ethtool -h | grep -q set-ring >/dev/null; then - echo "SKIP: No --set-ring support in ethtool" - exit 4 -fi - -NSIM_NETDEV=$(make_netdev) - -set -o pipefail - -declare -A SETTINGS_MAP=( - ["rx"]="RX" - ["rx-mini"]="RX Mini" - ["rx-jumbo"]="RX Jumbo" - ["tx"]="TX" -) - -declare -A EXPECTED_SETTINGS=( - ["rx"]="" - ["rx-mini"]="" - ["rx-jumbo"]="" - ["tx"]="" -) - -declare -A CURRENT_SETTINGS=( - ["rx"]="" - ["rx-mini"]="" - ["rx-jumbo"]="" - ["tx"]="" -) - -MAX_VALUE=$((RANDOM % $((2**32-1)))) -RING_MAX_LIST=$(ls $NSIM_DEV_DFS/ethtool/ring/) - -for ring_max_entry in $RING_MAX_LIST; do - echo $MAX_VALUE > $NSIM_DEV_DFS/ethtool/ring/$ring_max_entry -done - -CURR_SETT_LINE=$(ethtool -g $NSIM_NETDEV | grep -i -m1 -n 'Current hardware settings' | cut -f1 -d:) - -# populate the expected settings map -for key in ${!SETTINGS_MAP[@]}; do - EXPECTED_SETTINGS[$key]=$(get_value $key) -done - -# test -for key in ${!SETTINGS_MAP[@]}; do - value=$((RANDOM % $MAX_VALUE)) - - ethtool -G $NSIM_NETDEV "$key" "$value" - - EXPECTED_SETTINGS[$key]="$value" - expected=${EXPECTED_SETTINGS[@]} - current=$(update_current_settings) - - check $? "$current" "$expected" - set +x -done - -if [ $num_errors -eq 0 ]; then - echo "PASSED all $((num_passes)) checks" - exit 0 -else - echo "FAILED $num_errors/$((num_errors+num_passes)) checks" - exit 1 -fi diff --git a/tools/testing/selftests/drivers/net/ring_reconfig.py b/tools/testing/selftests/drivers/net/ring_reconfig.py new file mode 100755 index 0000000000000..f9530a8b08567 --- /dev/null +++ b/tools/testing/selftests/drivers/net/ring_reconfig.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Test channel and ring size configuration via ethtool (-L / -G). +""" + +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import ksft_eq +from lib.py import NetDrvEpEnv, EthtoolFamily, GenerateTraffic +from lib.py import defer, NlError + + +def channels(cfg) -> None: + """ + Twiddle channel counts in various combinations of parameters. + We're only looking for driver adhering to the requested config + if the config is accepted and crashes. + """ + ehdr = {'header':{'dev-index': cfg.ifindex}} + chans = cfg.eth.channels_get(ehdr) + + all_keys = ["rx", "tx", "combined"] + mixes = [{"combined"}, {"rx", "tx"}, {"rx", "combined"}, {"tx", "combined"}, + {"rx", "tx", "combined"},] + + # Get the set of keys that device actually supports + restore = {} + supported = set() + for key in all_keys: + if key + "-max" in chans: + supported.add(key) + restore |= {key + "-count": chans[key + "-count"]} + + defer(cfg.eth.channels_set, ehdr | restore) + + def test_config(config): + try: + cfg.eth.channels_set(ehdr | config) + get = cfg.eth.channels_get(ehdr) + for k, v in config.items(): + ksft_eq(get.get(k, 0), v) + except NlError as e: + failed.append(mix) + ksft_pr("Can't set", config, e) + else: + ksft_pr("Okay", config) + + failed = [] + for mix in mixes: + if not mix.issubset(supported): + continue + + # Set all the values in the mix to 1, other supported to 0 + config = {} + for key in all_keys: + config[key + "-count"] = 1 if key in mix else 0 + test_config(config) + + for mix in mixes: + if not mix.issubset(supported): + continue + if mix in failed: + continue + + # Set all the values in the mix to max, other supported to 0 + config = {} + for key in all_keys: + config[key + "-count"] = chans[key + '-max'] if key in mix else 0 + test_config(config) + + +def _configure_min_ring_cnt(cfg) -> None: + """ Try to configure a single Rx/Tx ring. """ + ehdr = {'header':{'dev-index': cfg.ifindex}} + chans = cfg.eth.channels_get(ehdr) + + all_keys = ["rx-count", "tx-count", "combined-count"] + restore = {} + config = {} + for key in all_keys: + if key in chans: + restore[key] = chans[key] + config[key] = 0 + + if chans.get('combined-count', 0) > 1: + config['combined-count'] = 1 + elif chans.get('rx-count', 0) > 1 and chans.get('tx-count', 0) > 1: + config['tx-count'] = 1 + config['rx-count'] = 1 + else: + # looks like we're already on 1 channel + return + + cfg.eth.channels_set(ehdr | config) + defer(cfg.eth.channels_set, ehdr | restore) + + +def ringparam(cfg) -> None: + """ + Tweak the ringparam configuration. Try to run some traffic over min + ring size to make sure it actually functions. + """ + ehdr = {'header':{'dev-index': cfg.ifindex}} + rings = cfg.eth.rings_get(ehdr) + + restore = {} + maxes = {} + params = set() + for key in rings.keys(): + if 'max' in key: + param = key[:-4] + maxes[param] = rings[key] + params.add(param) + restore[param] = rings[param] + + defer(cfg.eth.rings_set, ehdr | restore) + + # Speed up the reconfig by configuring just one ring + _configure_min_ring_cnt(cfg) + + # Try to reach min on all settings + for param in params: + val = rings[param] + while True: + try: + cfg.eth.rings_set({'header':{'dev-index': cfg.ifindex}, + param: val // 2}) + if val == 0: + break + val //= 2 + except NlError: + break + + get = cfg.eth.rings_get(ehdr) + ksft_eq(get[param], val) + + ksft_pr(f"Reached min for '{param}' at {val} (max {rings[param]})") + + GenerateTraffic(cfg).wait_pkts_and_stop(10000) + + # Try max across all params, if the driver supports large rings + # this may OOM so we ignore errors + try: + ksft_pr("Applying max settings") + config = {p: maxes[p] for p in params} + cfg.eth.rings_set(ehdr | config) + except NlError as e: + ksft_pr("Can't set max params", config, e) + else: + GenerateTraffic(cfg).wait_pkts_and_stop(10000) + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEpEnv(__file__) as cfg: + cfg.eth = EthtoolFamily() + + ksft_run([channels, + ringparam], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() From 0d0eb186421d0886ac466008235f6d9eedaf918e Mon Sep 17 00:00:00 2001 From: Harshita V Rajput Date: Tue, 28 Oct 2025 13:22:55 +0530 Subject: [PATCH 430/867] cxgb4: flower: add support for fragmentation This patch adds support for matching fragmented packets in tc flower filters. Previously, commit 93a8540aac72 ("cxgb4: flower: validate control flags") added a check using flow_rule_match_has_control_flags() to reject any rules with control flags, as the driver did not support fragmentation at that time. Now, with this patch, support for FLOW_DIS_IS_FRAGMENT is added: - The driver checks for control flags using flow_rule_is_supp_control_flags(), as recommended in commit d11e63119432 ("flow_offload: add control flag checking helpers"). - If the fragmentation flag is present, the driver sets `fs->val.frag` and `fs->mask.frag` accordingly in the filter specification. Since fragmentation is now supported, the earlier check that rejected all control flags (flow_rule_match_has_control_flags()) has been removed. Signed-off-by: Harshita V Rajput Signed-off-by: Potnuri Bharat Teja Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251028075255.1391596-1-harshitha.vr@chelsio.com Signed-off-by: Jakub Kicinski --- .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 40 +++++++++++-------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 0765d000eaef0..e2b5554531b57 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -161,20 +161,9 @@ static struct ch_tc_flower_entry *ch_flower_lookup(struct adapter *adap, static void cxgb4_process_flow_match(struct net_device *dev, struct flow_rule *rule, + u16 addr_type, struct ch_filter_specification *fs) { - u16 addr_type = 0; - - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { - struct flow_match_control match; - - flow_rule_match_control(rule, &match); - addr_type = match.key->addr_type; - } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { - addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; - } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { - addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; - } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { struct flow_match_basic match; @@ -327,9 +316,6 @@ static int cxgb4_validate_flow_match(struct netlink_ext_ack *extack, return -EOPNOTSUPP; } - if (flow_rule_match_has_control_flags(rule, extack)) - return -EOPNOTSUPP; - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { struct flow_match_basic match; @@ -858,6 +844,7 @@ int cxgb4_flow_rule_replace(struct net_device *dev, struct flow_rule *rule, { struct adapter *adap = netdev2adap(dev); struct filter_ctx ctx; + u16 addr_type = 0; u8 inet_family; int fidx, ret; @@ -867,7 +854,28 @@ int cxgb4_flow_rule_replace(struct net_device *dev, struct flow_rule *rule, if (cxgb4_validate_flow_match(extack, rule)) return -EOPNOTSUPP; - cxgb4_process_flow_match(dev, rule, fs); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_control(rule, &match); + addr_type = match.key->addr_type; + + if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) { + fs->val.frag = match.key->flags & FLOW_DIS_IS_FRAGMENT; + fs->mask.frag = true; + } + + if (!flow_rule_is_supp_control_flags(FLOW_DIS_IS_FRAGMENT, + match.mask->flags, extack)) + return -EOPNOTSUPP; + + } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { + addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { + addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + } + + cxgb4_process_flow_match(dev, rule, addr_type, fs); cxgb4_process_flow_actions(dev, &rule->action, fs); fs->hash = is_filter_exact_match(adap, fs); From 5c5f1f64681cc889d9b13e4a61285e9e029d6ab5 Mon Sep 17 00:00:00 2001 From: Raphael Pinsonneault-Thibeault Date: Fri, 24 Oct 2025 12:29:10 -0400 Subject: [PATCH 431/867] Bluetooth: hci_event: validate skb length for unknown CC opcode In hci_cmd_complete_evt(), if the command complete event has an unknown opcode, we assume the first byte of the remaining skb->data contains the return status. However, parameter data has previously been pulled in hci_event_func(), which may leave the skb empty. If so, using skb->data[0] for the return status uses un-init memory. The fix is to check skb->len before using skb->data. Reported-by: syzbot+a9a4bedfca6aa9d7fa24@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=a9a4bedfca6aa9d7fa24 Tested-by: syzbot+a9a4bedfca6aa9d7fa24@syzkaller.appspotmail.com Fixes: afcb3369f46ed ("Bluetooth: hci_event: Fix vendor (unknown) opcode status handling") Signed-off-by: Raphael Pinsonneault-Thibeault Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index d37db364acf74..f20c826509b6f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4218,6 +4218,13 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, void *data, } if (i == ARRAY_SIZE(hci_cc_table)) { + if (!skb->len) { + bt_dev_err(hdev, "Unexpected cc 0x%4.4x with no status", + *opcode); + *status = HCI_ERROR_UNSPECIFIED; + return; + } + /* Unknown opcode, assume byte 0 contains the status, so * that e.g. __hci_cmd_sync() properly returns errors * for vendor specific commands send by HCI drivers. From 1c21cf89a66413eb04b2d22c955b7a50edc14dfa Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Tue, 28 Oct 2025 23:26:30 +0530 Subject: [PATCH 432/867] Bluetooth: btrtl: Fix memory leak in rtlbt_parse_firmware_v2() The memory allocated for ptr using kvmalloc() is not freed on the last error path. Fix that by freeing it on that error path. Fixes: 9a24ce5e29b1 ("Bluetooth: btrtl: Firmware format v2 support") Signed-off-by: Abdun Nihaal Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btrtl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index 6abd962502e36..1d4a7887abccf 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -625,8 +625,10 @@ static int rtlbt_parse_firmware_v2(struct hci_dev *hdev, len += entry->len; } - if (!len) + if (!len) { + kvfree(ptr); return -EPERM; + } *_buf = ptr; return len; From 8d59fba49362c65332395789fd82771f1028d87e Mon Sep 17 00:00:00 2001 From: Ilia Gavrilov Date: Mon, 20 Oct 2025 15:12:55 +0000 Subject: [PATCH 433/867] Bluetooth: MGMT: Fix OOB access in parse_adv_monitor_pattern() In the parse_adv_monitor_pattern() function, the value of the 'length' variable is currently limited to HCI_MAX_EXT_AD_LENGTH(251). The size of the 'value' array in the mgmt_adv_pattern structure is 31. If the value of 'pattern[i].length' is set in the user space and exceeds 31, the 'patterns[i].value' array can be accessed out of bound when copied. Increasing the size of the 'value' array in the 'mgmt_adv_pattern' structure will break the userspace. Considering this, and to avoid OOB access revert the limits for 'offset' and 'length' back to the value of HCI_MAX_AD_LENGTH. Found by InfoTeCS on behalf of Linux Verification Center (linuxtesting.org) with SVACE. Fixes: db08722fc7d4 ("Bluetooth: hci_core: Fix missing instances using HCI_MAX_AD_LENGTH") Cc: stable@vger.kernel.org Signed-off-by: Ilia Gavrilov Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/mgmt.h | 2 +- net/bluetooth/mgmt.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index bca0333f1e991..f5be96f08b9d9 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -780,7 +780,7 @@ struct mgmt_adv_pattern { __u8 ad_type; __u8 offset; __u8 length; - __u8 value[31]; + __u8 value[HCI_MAX_AD_LENGTH]; } __packed; #define MGMT_OP_ADD_ADV_PATTERNS_MONITOR 0x0052 diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 24e335e3a7271..79762bfaea5ff 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5395,9 +5395,9 @@ static u8 parse_adv_monitor_pattern(struct adv_monitor *m, u8 pattern_count, for (i = 0; i < pattern_count; i++) { offset = patterns[i].offset; length = patterns[i].length; - if (offset >= HCI_MAX_EXT_AD_LENGTH || - length > HCI_MAX_EXT_AD_LENGTH || - (offset + length) > HCI_MAX_EXT_AD_LENGTH) + if (offset >= HCI_MAX_AD_LENGTH || + length > HCI_MAX_AD_LENGTH || + (offset + length) > HCI_MAX_AD_LENGTH) return MGMT_STATUS_INVALID_PARAMS; p = kmalloc(sizeof(*p), GFP_KERNEL); From 6ab753b5d8e521616cd9bd10b09891cbeb7e0235 Mon Sep 17 00:00:00 2001 From: Tim Hostetler Date: Wed, 29 Oct 2025 11:45:39 -0700 Subject: [PATCH 434/867] gve: Implement gettimex64 with -EOPNOTSUPP gve implemented a ptp_clock for sole use of do_aux_work at this time. ptp_clock_gettime() and ptp_sys_offset() assume every ptp_clock has implemented either gettimex64 or gettime64. Stub gettimex64 and return -EOPNOTSUPP to prevent NULL dereferencing. Fixes: acd16380523b ("gve: Add initial PTP device support") Reported-by: syzbot+c8c0e7ccabd456541612@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c8c0e7ccabd456541612 Signed-off-by: Tim Hostetler Reviewed-by: Harshitha Ramamurthy Reviewed-by: Kuniyuki Iwashima Signed-off-by: Joshua Washington Link: https://patch.msgid.link/20251029184555.3852952-2-joshwash@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_ptp.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/google/gve/gve_ptp.c b/drivers/net/ethernet/google/gve/gve_ptp.c index e96247c9d68d2..19ae699d4b18d 100644 --- a/drivers/net/ethernet/google/gve/gve_ptp.c +++ b/drivers/net/ethernet/google/gve/gve_ptp.c @@ -26,6 +26,13 @@ int gve_clock_nic_ts_read(struct gve_priv *priv) return 0; } +static int gve_ptp_gettimex64(struct ptp_clock_info *info, + struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + return -EOPNOTSUPP; +} + static long gve_ptp_do_aux_work(struct ptp_clock_info *info) { const struct gve_ptp *ptp = container_of(info, struct gve_ptp, info); @@ -47,6 +54,7 @@ static long gve_ptp_do_aux_work(struct ptp_clock_info *info) static const struct ptp_clock_info gve_ptp_caps = { .owner = THIS_MODULE, .name = "gve clock", + .gettimex64 = gve_ptp_gettimex64, .do_aux_work = gve_ptp_do_aux_work, }; From 329d050bbe63c2999f657cf2d3855be11a473745 Mon Sep 17 00:00:00 2001 From: Tim Hostetler Date: Wed, 29 Oct 2025 11:45:40 -0700 Subject: [PATCH 435/867] gve: Implement settime64 with -EOPNOTSUPP ptp_clock_settime() assumes every ptp_clock has implemented settime64(). Stub it with -EOPNOTSUPP to prevent a NULL dereference. Fixes: acd16380523b ("gve: Add initial PTP device support") Reported-by: syzbot+a546141ca6d53b90aba3@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=a546141ca6d53b90aba3 Signed-off-by: Tim Hostetler Reviewed-by: Kuniyuki Iwashima Signed-off-by: Joshua Washington Link: https://patch.msgid.link/20251029184555.3852952-3-joshwash@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_ptp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/google/gve/gve_ptp.c b/drivers/net/ethernet/google/gve/gve_ptp.c index 19ae699d4b18d..a384a9ed4914e 100644 --- a/drivers/net/ethernet/google/gve/gve_ptp.c +++ b/drivers/net/ethernet/google/gve/gve_ptp.c @@ -33,6 +33,12 @@ static int gve_ptp_gettimex64(struct ptp_clock_info *info, return -EOPNOTSUPP; } +static int gve_ptp_settime64(struct ptp_clock_info *info, + const struct timespec64 *ts) +{ + return -EOPNOTSUPP; +} + static long gve_ptp_do_aux_work(struct ptp_clock_info *info) { const struct gve_ptp *ptp = container_of(info, struct gve_ptp, info); @@ -55,6 +61,7 @@ static const struct ptp_clock_info gve_ptp_caps = { .owner = THIS_MODULE, .name = "gve clock", .gettimex64 = gve_ptp_gettimex64, + .settime64 = gve_ptp_settime64, .do_aux_work = gve_ptp_do_aux_work, }; From 54133f9b4b53ffa2204eb27cfc9d50072c9a52d2 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Wed, 29 Oct 2025 13:43:10 -0700 Subject: [PATCH 436/867] net: mana: Support HW link state events Handle the NIC hardware link state events received from the HW channel, then set the proper link state accordingly. And, add a feature bit, GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE, to inform the NIC hardware this handler exists. Our MANA NIC only sends out the link state down/up messages when we need to let the VM rerun DHCP client and change IP address. So, add netif_carrier_on() in the probe(), let the NIC show the right initial state in /sys/class/net/ethX/operstate. Signed-off-by: Haiyang Zhang Link: https://patch.msgid.link/1761770601-16920-1-git-send-email-haiyangz@linux.microsoft.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/microsoft/mana/gdma_main.c | 1 + .../net/ethernet/microsoft/mana/hw_channel.c | 12 +++++ drivers/net/ethernet/microsoft/mana/mana_en.c | 54 +++++++++++++++++-- include/net/mana/gdma.h | 4 +- include/net/mana/hw_channel.h | 2 + include/net/mana/mana.h | 4 ++ 6 files changed, 71 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 43f034e180c41..effe0a2f207aa 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -528,6 +528,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq) case GDMA_EQE_HWC_INIT_DONE: case GDMA_EQE_HWC_SOC_SERVICE: case GDMA_EQE_RNIC_QP_FATAL: + case GDMA_EQE_HWC_SOC_RECONFIG_DATA: if (!eq->eq.callback) break; diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index ada6c78a2bef4..aa4e2731e2ba7 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -118,6 +118,7 @@ static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self, struct gdma_dev *gd = hwc->gdma_dev; union hwc_init_type_data type_data; union hwc_init_eq_id_db eq_db; + struct mana_context *ac; u32 type, val; int ret; @@ -196,6 +197,17 @@ static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self, hwc->hwc_timeout = val; break; + case HWC_DATA_HW_LINK_CONNECT: + case HWC_DATA_HW_LINK_DISCONNECT: + ac = gd->gdma_context->mana.driver_data; + if (!ac) + break; + + WRITE_ONCE(ac->link_event, type); + schedule_work(&ac->link_change_work); + + break; + default: dev_warn(hwc->dev, "Received unknown reconfig type %u\n", type); break; diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 0142fd98392c2..739087081dfde 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -20,6 +20,7 @@ #include #include +#include static DEFINE_IDA(mana_adev_ida); @@ -84,7 +85,6 @@ static int mana_open(struct net_device *ndev) /* Ensure port state updated before txq state */ smp_wmb(); - netif_carrier_on(ndev); netif_tx_wake_all_queues(ndev); netdev_dbg(ndev, "%s successful\n", __func__); return 0; @@ -100,6 +100,46 @@ static int mana_close(struct net_device *ndev) return mana_detach(ndev, true); } +static void mana_link_state_handle(struct work_struct *w) +{ + struct mana_context *ac; + struct net_device *ndev; + u32 link_event; + bool link_up; + int i; + + ac = container_of(w, struct mana_context, link_change_work); + + rtnl_lock(); + + link_event = READ_ONCE(ac->link_event); + + if (link_event == HWC_DATA_HW_LINK_CONNECT) + link_up = true; + else if (link_event == HWC_DATA_HW_LINK_DISCONNECT) + link_up = false; + else + goto out; + + /* Process all ports */ + for (i = 0; i < ac->num_ports; i++) { + ndev = ac->ports[i]; + if (!ndev) + continue; + + if (link_up) { + netif_carrier_on(ndev); + + __netdev_notify_peers(ndev); + } else { + netif_carrier_off(ndev); + } + } + +out: + rtnl_unlock(); +} + static bool mana_can_tx(struct gdma_queue *wq) { return mana_gd_wq_avail_space(wq) >= MAX_TX_WQE_SIZE; @@ -3059,9 +3099,6 @@ int mana_attach(struct net_device *ndev) /* Ensure port state updated before txq state */ smp_wmb(); - if (apc->port_is_up) - netif_carrier_on(ndev); - netif_device_attach(ndev); return 0; @@ -3154,7 +3191,6 @@ int mana_detach(struct net_device *ndev, bool from_close) smp_wmb(); netif_tx_disable(ndev); - netif_carrier_off(ndev); if (apc->port_st_save) { err = mana_dealloc_queues(ndev); @@ -3243,6 +3279,8 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, goto free_indir; } + netif_carrier_on(ndev); + debugfs_create_u32("current_speed", 0400, apc->mana_port_debugfs, &apc->speed); return 0; @@ -3431,6 +3469,8 @@ int mana_probe(struct gdma_dev *gd, bool resuming) if (!resuming) { ac->num_ports = num_ports; + + INIT_WORK(&ac->link_change_work, mana_link_state_handle); } else { if (ac->num_ports != num_ports) { dev_err(dev, "The number of vPorts changed: %d->%d\n", @@ -3438,6 +3478,8 @@ int mana_probe(struct gdma_dev *gd, bool resuming) err = -EPROTO; goto out; } + + enable_work(&ac->link_change_work); } if (ac->num_ports == 0) @@ -3500,6 +3542,8 @@ void mana_remove(struct gdma_dev *gd, bool suspending) int err; int i; + disable_work_sync(&ac->link_change_work); + /* adev currently doesn't support suspending, always remove it */ if (gd->adev) remove_adev(gd); diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 57df78cfbf82c..637f42485dba6 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -590,6 +590,7 @@ enum { /* Driver can self reset on FPGA Reconfig EQE notification */ #define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17) +#define GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE BIT(6) #define GDMA_DRV_CAP_FLAGS1 \ (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \ @@ -599,7 +600,8 @@ enum { GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \ GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \ GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \ - GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE) + GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \ + GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE) #define GDMA_DRV_CAP_FLAGS2 0 diff --git a/include/net/mana/hw_channel.h b/include/net/mana/hw_channel.h index 83cf93338eb38..16feb39616c1b 100644 --- a/include/net/mana/hw_channel.h +++ b/include/net/mana/hw_channel.h @@ -24,6 +24,8 @@ #define HWC_INIT_DATA_PF_DEST_CQ_ID 11 #define HWC_DATA_CFG_HWC_TIMEOUT 1 +#define HWC_DATA_HW_LINK_CONNECT 2 +#define HWC_DATA_HW_LINK_DISCONNECT 3 #define HW_CHANNEL_WAIT_RESOURCE_TIMEOUT_MS 30000 diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 0921485565c05..8906901535f55 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -477,6 +477,10 @@ struct mana_context { struct dentry *mana_eqs_debugfs; struct net_device *ports[MAX_PORTS_IN_MANA_DEV]; + + /* Link state change work */ + struct work_struct link_change_work; + u32 link_event; }; struct mana_port_context { From 5a89b27afd3d010680f9355f7ff5b048cfe89333 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Wed, 29 Oct 2025 10:38:13 +0200 Subject: [PATCH 437/867] ptp: Allow exposing cycles only for clocks with free-running counter The PTP core falls back to gettimex64 and getcrosststamp when getcycles64 or getcyclesx64 are not implemented. This causes the CYCLES ioctls to retrieve PHC real time instead of free-running cycles. Reject PTP_SYS_OFFSET_{PRECISE,EXTENDED}_CYCLES for clocks without free-running counter support since the result would represent PHC real time and system time rather than cycles and system time. Fixes: faf23f54d366 ("ptp: Add ioctl commands to expose raw cycle counter values") Signed-off-by: Carolina Jubran Reviewed-by: Dragos Tatulea Reviewed-by: Tariq Toukan Reviewed-by: Pavan Chebbi Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20251029083813.2276997-1-cjubran@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_chardev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 8106eb617c8c4..c61cf9edac48b 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -561,10 +561,14 @@ long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd, return ptp_mask_en_single(pccontext->private_clkdata, argptr); case PTP_SYS_OFFSET_PRECISE_CYCLES: + if (!ptp->has_cycles) + return -EOPNOTSUPP; return ptp_sys_offset_precise(ptp, argptr, ptp->info->getcrosscycles); case PTP_SYS_OFFSET_EXTENDED_CYCLES: + if (!ptp->has_cycles) + return -EOPNOTSUPP; return ptp_sys_offset_extended(ptp, argptr, ptp->info->getcyclesx64); default: From 3d18a84eddde169d6dbf3c72cc5358b988c347d0 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 27 Oct 2025 20:46:21 +0100 Subject: [PATCH 438/867] net: dsa: tag_brcm: legacy: fix untagged rx on unbridged ports for bcm63xx The internal switch on BCM63XX SoCs will unconditionally add 802.1Q VLAN tags on egress to CPU when 802.1Q mode is enabled. We do this unconditionally since commit ed409f3bbaa5 ("net: dsa: b53: Configure VLANs while not filtering"). This is fine for VLAN aware bridges, but for standalone ports and vlan unaware bridges this means all packets are tagged with the default VID, which is 0. While the kernel will treat that like untagged, this can break userspace applications processing raw packets, expecting untagged traffic, like STP daemons. This also breaks several bridge tests, where the tcpdump output then does not match the expected output anymore. Since 0 isn't a valid VID, just strip out the VLAN tag if we encounter it, unless the priority field is set, since that would be a valid tag again. Fixes: 964dbf186eaa ("net: dsa: tag_brcm: add support for legacy tags") Signed-off-by: Jonas Gorski Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20251027194621.133301-1-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- net/dsa/tag_brcm.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 26bb657ceac36..d9c77fa553b53 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -224,12 +224,14 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, { int len = BRCM_LEG_TAG_LEN; int source_port; + __be16 *proto; u8 *brcm_tag; if (unlikely(!pskb_may_pull(skb, BRCM_LEG_TAG_LEN + VLAN_HLEN))) return NULL; brcm_tag = dsa_etype_header_pos_rx(skb); + proto = (__be16 *)(brcm_tag + BRCM_LEG_TAG_LEN); source_port = brcm_tag[5] & BRCM_LEG_PORT_ID; @@ -237,8 +239,12 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, if (!skb->dev) return NULL; - /* VLAN tag is added by BCM63xx internal switch */ - if (netdev_uses_dsa(skb->dev)) + /* The internal switch in BCM63XX SoCs always tags on egress on the CPU + * port. We use VID 0 internally for untagged traffic, so strip the tag + * if the TCI field is all 0, and keep it otherwise to also retain + * e.g. 802.1p tagged packets. + */ + if (proto[0] == htons(ETH_P_8021Q) && proto[1] == 0) len += VLAN_HLEN; /* Remove Broadcom tag and update checksum */ From 7ea4376b3972d89385599307d1ad4f20eb763a05 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Thu, 30 Oct 2025 12:25:05 +0200 Subject: [PATCH 439/867] net/mlx5e: Remove redundant tstamp pointer from channel structures Remove the tstamp pointer field from mlx5e_channel, mlx5e_ptp, and mlx5e_trap structures, since it was only used to reference the tstamp field in the priv structure. Instead, directly use the tstamp field from priv when initializing RQ structures. Also remove the unused hwtstamp_config field from mlx5_clock structure as part of the cleanup. Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761819910-1011051-2-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 1 - drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/en/trap.c | 1 - drivers/net/ethernet/mellanox/mlx5/core/en/trap.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h | 1 - 8 files changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 14e3207b14e74..5485cf0149267 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -784,7 +784,6 @@ struct mlx5e_channel { /* control */ struct mlx5e_priv *priv; struct mlx5_core_dev *mdev; - struct hwtstamp_config *tstamp; DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); int ix; int vec_ix; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index c93ee969ea647..96a78b6d4904b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -896,7 +896,6 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, c->priv = priv; c->mdev = priv->mdev; - c->tstamp = &priv->tstamp; c->pdev = mlx5_core_dma_dev(priv->mdev); c->netdev = priv->netdev; c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h index 1b3c9648220b3..1c0e0a86a9ac8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h @@ -64,7 +64,6 @@ struct mlx5e_ptp { /* control */ struct mlx5e_priv *priv; struct mlx5_core_dev *mdev; - struct hwtstamp_config *tstamp; DECLARE_BITMAP(state, MLX5E_PTP_STATE_NUM_STATES); struct mlx5_sq_bfreg *bfreg; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index 996fcdb5a29d0..db6932b0cedfa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -144,7 +144,6 @@ static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv) t->priv = priv; t->mdev = priv->mdev; - t->tstamp = &priv->tstamp; t->pdev = mlx5_core_dma_dev(priv->mdev); t->netdev = priv->netdev; t->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h index aa3f17658c6d4..394e917ea2b0a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h @@ -22,7 +22,6 @@ struct mlx5e_trap { /* control */ struct mlx5e_priv *priv; struct mlx5_core_dev *mdev; - struct hwtstamp_config *tstamp; DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); struct mlx5e_params params; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index dbd88eb5c082a..dc5a4afa4974c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -71,7 +71,7 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c, rq->pdev = c->pdev; rq->netdev = c->netdev; rq->priv = c->priv; - rq->tstamp = c->tstamp; + rq->tstamp = &c->priv->tstamp; rq->clock = mdev->clock; rq->icosq = &c->icosq; rq->ix = c->ix; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9c46511e7b437..20f55542433d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -735,7 +735,7 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param rq->pdev = c->pdev; rq->netdev = c->netdev; rq->priv = c->priv; - rq->tstamp = c->tstamp; + rq->tstamp = &c->priv->tstamp; rq->clock = mdev->clock; rq->icosq = &c->icosq; rq->ix = c->ix; @@ -2803,7 +2803,6 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->priv = priv; c->mdev = mdev; - c->tstamp = &priv->tstamp; c->ix = ix; c->vec_ix = vec_ix; c->sd_ix = mlx5_sd_ch_ix_get_dev_ix(mdev, ix); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h index c18a652c0faa1..aff3aed62c748 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h @@ -54,7 +54,6 @@ struct mlx5_timer { struct mlx5_clock { seqlock_t lock; - struct hwtstamp_config hwtstamp_config; struct ptp_clock *ptp; struct ptp_clock_info ptp_info; struct mlx5_pps pps_info; From bf791659743b1a8e20f5810b1ac893b7b24f650e Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Thu, 30 Oct 2025 12:25:06 +0200 Subject: [PATCH 440/867] net/mlx5e: Remove unnecessary tstamp local variable in mlx5i_complete_rx_cqe Remove the tstamp local variable in mlx5i_complete_rx_cqe() and directly pass the tstamp field from priv to mlx5e_rx_hw_stamp(). The local variable was only used once and provided no additional value. Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761819910-1011051-3-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 26621a2972ec2..2fc1ec76770da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -2650,7 +2650,6 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, u32 cqe_bcnt, struct sk_buff *skb) { - struct hwtstamp_config *tstamp; struct mlx5e_rq_stats *stats; struct net_device *netdev; struct mlx5e_priv *priv; @@ -2674,7 +2673,6 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, } priv = mlx5i_epriv(netdev); - tstamp = &priv->tstamp; stats = &priv->channel_stats[rq->ix]->rq; flags_rqpn = be32_to_cpu(cqe->flags_rqpn); @@ -2710,7 +2708,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, stats->csum_none++; } - if (unlikely(mlx5e_rx_hw_stamp(tstamp))) + if (unlikely(mlx5e_rx_hw_stamp(&priv->tstamp))) skb_hwtstamps(skb)->hwtstamp = mlx5e_cqe_ts_to_ns(rq->ptp_cyc2time, rq->clock, get_cqe_ts(cqe)); skb_record_rx_queue(skb, rq->ix); From fee182371a59414d43633a5ea6f1cda160418a16 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Thu, 30 Oct 2025 12:25:07 +0200 Subject: [PATCH 441/867] net/mlx5e: Rename hwstamp functions to hwtstamp Rename mlx5e_hwstamp_set/get() functions to mlx5e_hwtstamp_set/get() to better reflect that these functions handle hardware timestamping, not just hardware stamping. Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761819910-1011051-4-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 ++++---- drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 5485cf0149267..ebd7493888d73 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1026,8 +1026,8 @@ void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest, u64 *buf); void mlx5e_set_rx_mode_work(struct work_struct *work); -int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr); -int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr); +int mlx5e_hwtstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr); +int mlx5e_hwtstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr); int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val, bool rx_filter); int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 20f55542433d3..2ecbd735584e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4740,7 +4740,7 @@ static int mlx5e_hwstamp_config_ptp_rx(struct mlx5e_priv *priv, bool ptp_rx) &new_params.ptp_rx, true); } -int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) +int mlx5e_hwtstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) { struct hwtstamp_config config; bool rx_cqe_compress_def; @@ -4818,7 +4818,7 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) return err; } -int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr) +int mlx5e_hwtstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr) { struct hwtstamp_config *cfg = &priv->tstamp; @@ -4834,9 +4834,9 @@ static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCSHWTSTAMP: - return mlx5e_hwstamp_set(priv, ifr); + return mlx5e_hwtstamp_set(priv, ifr); case SIOCGHWTSTAMP: - return mlx5e_hwstamp_get(priv, ifr); + return mlx5e_hwtstamp_get(priv, ifr); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 79ae3a51a4b3e..11d950f58ae33 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -563,9 +563,9 @@ int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCSHWTSTAMP: - return mlx5e_hwstamp_set(priv, ifr); + return mlx5e_hwtstamp_set(priv, ifr); case SIOCGHWTSTAMP: - return mlx5e_hwstamp_get(priv, ifr); + return mlx5e_hwtstamp_get(priv, ifr); default: return -EOPNOTSUPP; } From 91baaf96f5d0b764aec462dd50a8433f5c8d621f Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Thu, 30 Oct 2025 12:25:08 +0200 Subject: [PATCH 442/867] net/mlx5e: Rename timestamp fields to hwtstamp_config Rename hardware timestamp-related fields from 'tstamp' to 'hwtstamp_config' throughout the MLX5 driver. The new name is more descriptive as it clearly indicates these fields contain hardware timestamp configuration. Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761819910-1011051-5-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 2 +- .../net/ethernet/mellanox/mlx5/core/en/reporter_rx.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/en/trap.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 +++++----- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 4 ++-- 9 files changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index ebd7493888d73..eb3eef1a496e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -696,7 +696,7 @@ struct mlx5e_rq { struct mlx5e_rq_stats *stats; struct mlx5e_cq cq; struct mlx5e_cq_decomp cqd; - struct hwtstamp_config *tstamp; + struct hwtstamp_config *hwtstamp_config; struct mlx5_clock *clock; struct mlx5e_icosq *icosq; struct mlx5e_priv *priv; @@ -917,7 +917,7 @@ struct mlx5e_priv { u8 max_opened_tc; bool tx_ptp_opened; bool rx_ptp_opened; - struct hwtstamp_config tstamp; + struct hwtstamp_config hwtstamp_config; u16 q_counter[MLX5_SD_MAX_GROUP_SZ]; u16 drop_rq_q_counter; struct notifier_block events_nb; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 96a78b6d4904b..12e10feb30f06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -713,7 +713,7 @@ static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, rq->netdev = priv->netdev; rq->priv = priv; rq->clock = mdev->clock; - rq->tstamp = &priv->tstamp; + rq->hwtstamp_config = &priv->hwtstamp_config; rq->mdev = mdev; rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); rq->stats = &c->priv->ptp_stats.rq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index b1415992ffa24..0686fbdd5a059 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -318,7 +318,8 @@ mlx5e_rx_reporter_diagnose_common_ptp_config(struct mlx5e_priv *priv, struct mlx struct devlink_fmsg *fmsg) { mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP"); - devlink_fmsg_u32_pair_put(fmsg, "filter_type", priv->tstamp.rx_filter); + devlink_fmsg_u32_pair_put(fmsg, "filter_type", + priv->hwtstamp_config.rx_filter); mlx5e_rx_reporter_diagnose_generic_rq(&ptp_ch->rq, fmsg); mlx5e_health_fmsg_named_obj_nest_end(fmsg); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index db6932b0cedfa..da8c44f46edb1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -47,7 +47,7 @@ static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params rq->netdev = priv->netdev; rq->priv = priv; rq->clock = mdev->clock; - rq->tstamp = &priv->tstamp; + rq->hwtstamp_config = &priv->hwtstamp_config; rq->mdev = mdev; rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); rq->stats = &priv->trap_stats.rq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 5d51600935a6f..80f9fc10877ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -179,7 +179,7 @@ static int mlx5e_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) { const struct mlx5e_xdp_buff *_ctx = (void *)ctx; - if (unlikely(!mlx5e_rx_hw_stamp(_ctx->rq->tstamp))) + if (unlikely(!mlx5e_rx_hw_stamp(_ctx->rq->hwtstamp_config))) return -ENODATA; *timestamp = mlx5e_cqe_ts_to_ns(_ctx->rq->ptp_cyc2time, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index dc5a4afa4974c..5981c71cae2d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -71,7 +71,7 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c, rq->pdev = c->pdev; rq->netdev = c->netdev; rq->priv = c->priv; - rq->tstamp = &c->priv->tstamp; + rq->hwtstamp_config = &c->priv->hwtstamp_config; rq->clock = mdev->clock; rq->icosq = &c->icosq; rq->ix = c->ix; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 53e5ae252eac5..47a3770fb0f74 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -2273,7 +2273,7 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev, if (!MLX5_CAP_GEN(mdev, cqe_compression)) return -EOPNOTSUPP; - rx_filter = priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE; + rx_filter = priv->hwtstamp_config.rx_filter != HWTSTAMP_FILTER_NONE; err = mlx5e_modify_rx_cqe_compression_locked(priv, enable, rx_filter); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2ecbd735584e0..5b2491e19baa7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -735,7 +735,7 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param rq->pdev = c->pdev; rq->netdev = c->netdev; rq->priv = c->priv; - rq->tstamp = &c->priv->tstamp; + rq->hwtstamp_config = &c->priv->hwtstamp_config; rq->clock = mdev->clock; rq->icosq = &c->icosq; rq->ix = c->ix; @@ -3444,8 +3444,8 @@ int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv) void mlx5e_timestamp_init(struct mlx5e_priv *priv) { - priv->tstamp.tx_type = HWTSTAMP_TX_OFF; - priv->tstamp.rx_filter = HWTSTAMP_FILTER_NONE; + priv->hwtstamp_config.tx_type = HWTSTAMP_TX_OFF; + priv->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; } static void mlx5e_modify_admin_state(struct mlx5_core_dev *mdev, @@ -4805,7 +4805,7 @@ int mlx5e_hwtstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) if (err) goto err_unlock; - memcpy(&priv->tstamp, &config, sizeof(config)); + memcpy(&priv->hwtstamp_config, &config, sizeof(config)); mutex_unlock(&priv->state_lock); /* might need to fix some features */ @@ -4820,7 +4820,7 @@ int mlx5e_hwtstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) int mlx5e_hwtstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr) { - struct hwtstamp_config *cfg = &priv->tstamp; + struct hwtstamp_config *cfg = &priv->hwtstamp_config; if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 2fc1ec76770da..6c8ce1d0d2333 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1602,7 +1602,7 @@ static inline bool mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, stats->lro_bytes += cqe_bcnt; } - if (unlikely(mlx5e_rx_hw_stamp(rq->tstamp))) + if (unlikely(mlx5e_rx_hw_stamp(rq->hwtstamp_config))) skb_hwtstamps(skb)->hwtstamp = mlx5e_cqe_ts_to_ns(rq->ptp_cyc2time, rq->clock, get_cqe_ts(cqe)); skb_record_rx_queue(skb, rq->ix); @@ -2708,7 +2708,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, stats->csum_none++; } - if (unlikely(mlx5e_rx_hw_stamp(&priv->tstamp))) + if (unlikely(mlx5e_rx_hw_stamp(&priv->hwtstamp_config))) skb_hwtstamps(skb)->hwtstamp = mlx5e_cqe_ts_to_ns(rq->ptp_cyc2time, rq->clock, get_cqe_ts(cqe)); skb_record_rx_queue(skb, rq->ix); From 250da3c8fe811b14fd5e610760ed7469166fd0f7 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Thu, 30 Oct 2025 12:25:09 +0200 Subject: [PATCH 443/867] IB/IPoIB: Add support for hwtstamp get/set ndos Add support for the ndo_hwtstamp_get and ndo_hwtstamp_set operations in IPoIB. This allows lower devices to handle hardware timestamp configuration through the new ndos instead of the legacy ioctls. Signed-off-by: Carolina Jubran Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761819910-1011051-6-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 29 +++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 5b4d76e97437d..300afc27c5612 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1825,6 +1825,31 @@ static int ipoib_ioctl(struct net_device *dev, struct ifreq *ifr, return priv->rn_ops->ndo_eth_ioctl(dev, ifr, cmd); } +static int ipoib_hwtstamp_get(struct net_device *dev, + struct kernel_hwtstamp_config *config) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + if (!priv->rn_ops->ndo_hwtstamp_get) + /* legacy */ + return dev_eth_ioctl(dev, config->ifr, SIOCGHWTSTAMP); + + return priv->rn_ops->ndo_hwtstamp_get(dev, config); +} + +static int ipoib_hwtstamp_set(struct net_device *dev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + if (!priv->rn_ops->ndo_hwtstamp_set) + /* legacy */ + return dev_eth_ioctl(dev, config->ifr, SIOCSHWTSTAMP); + + return priv->rn_ops->ndo_hwtstamp_set(dev, config, extack); +} + static int ipoib_dev_init(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -2149,6 +2174,8 @@ static const struct net_device_ops ipoib_netdev_ops_pf = { .ndo_set_mac_address = ipoib_set_mac, .ndo_get_stats64 = ipoib_get_stats, .ndo_eth_ioctl = ipoib_ioctl, + .ndo_hwtstamp_get = ipoib_hwtstamp_get, + .ndo_hwtstamp_set = ipoib_hwtstamp_set, }; static const struct net_device_ops ipoib_netdev_ops_vf = { @@ -2164,6 +2191,8 @@ static const struct net_device_ops ipoib_netdev_ops_vf = { .ndo_get_iflink = ipoib_get_iflink, .ndo_get_stats64 = ipoib_get_stats, .ndo_eth_ioctl = ipoib_ioctl, + .ndo_hwtstamp_get = ipoib_hwtstamp_get, + .ndo_hwtstamp_set = ipoib_hwtstamp_set, }; static const struct net_device_ops ipoib_netdev_default_pf = { From 1c7fe48a90158486a66665f1401d0b90bd390ef0 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Thu, 30 Oct 2025 12:25:10 +0200 Subject: [PATCH 444/867] net/mlx5e: Convert to new hwtstamp_get/set interface Migrate from the legacy ioctl hardware timestamping interface to the ndo_hwtstamp_get/set operations. Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761819910-1011051-7-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 11 ++-- .../net/ethernet/mellanox/mlx5/core/en/txrx.h | 2 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 57 ++++++++++--------- .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 34 ++++++----- .../ethernet/mellanox/mlx5/core/ipoib/ipoib.h | 6 +- .../mellanox/mlx5/core/ipoib/ipoib_vlan.c | 9 +-- 6 files changed, 65 insertions(+), 54 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index eb3eef1a496e2..fd107906bc28b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -696,7 +696,7 @@ struct mlx5e_rq { struct mlx5e_rq_stats *stats; struct mlx5e_cq cq; struct mlx5e_cq_decomp cqd; - struct hwtstamp_config *hwtstamp_config; + struct kernel_hwtstamp_config *hwtstamp_config; struct mlx5_clock *clock; struct mlx5e_icosq *icosq; struct mlx5e_priv *priv; @@ -917,7 +917,7 @@ struct mlx5e_priv { u8 max_opened_tc; bool tx_ptp_opened; bool rx_ptp_opened; - struct hwtstamp_config hwtstamp_config; + struct kernel_hwtstamp_config hwtstamp_config; u16 q_counter[MLX5_SD_MAX_GROUP_SZ]; u16 drop_rq_q_counter; struct notifier_block events_nb; @@ -1026,8 +1026,11 @@ void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest, u64 *buf); void mlx5e_set_rx_mode_work(struct work_struct *work); -int mlx5e_hwtstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr); -int mlx5e_hwtstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr); +int mlx5e_hwtstamp_set(struct mlx5e_priv *priv, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); +int mlx5e_hwtstamp_get(struct mlx5e_priv *priv, + struct kernel_hwtstamp_config *config); int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val, bool rx_filter); int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 6760bb0336df9..7e191e1569e82 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -92,7 +92,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); void mlx5e_free_rx_descs(struct mlx5e_rq *rq); void mlx5e_free_rx_missing_descs(struct mlx5e_rq *rq); -static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) +static inline bool mlx5e_rx_hw_stamp(struct kernel_hwtstamp_config *config) { return config->rx_filter == HWTSTAMP_FILTER_ALL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 5b2491e19baa7..6b905848fe86e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4740,22 +4740,23 @@ static int mlx5e_hwstamp_config_ptp_rx(struct mlx5e_priv *priv, bool ptp_rx) &new_params.ptp_rx, true); } -int mlx5e_hwtstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) +int mlx5e_hwtstamp_set(struct mlx5e_priv *priv, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { - struct hwtstamp_config config; bool rx_cqe_compress_def; bool ptp_rx; int err; if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) || - (mlx5_clock_get_ptp_index(priv->mdev) == -1)) + (mlx5_clock_get_ptp_index(priv->mdev) == -1)) { + NL_SET_ERR_MSG_MOD(extack, + "Timestamps are not supported on this device"); return -EOPNOTSUPP; - - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; + } /* TX HW timestamp */ - switch (config.tx_type) { + switch (config->tx_type) { case HWTSTAMP_TX_OFF: case HWTSTAMP_TX_ON: break; @@ -4767,7 +4768,7 @@ int mlx5e_hwtstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def; /* RX HW timestamp */ - switch (config.rx_filter) { + switch (config->rx_filter) { case HWTSTAMP_FILTER_NONE: ptp_rx = false; break; @@ -4786,7 +4787,7 @@ int mlx5e_hwtstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: - config.rx_filter = HWTSTAMP_FILTER_ALL; + config->rx_filter = HWTSTAMP_FILTER_ALL; /* ptp_rx is set if both HW TS is set and CQE * compression is set */ @@ -4799,47 +4800,50 @@ int mlx5e_hwtstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX)) err = mlx5e_hwstamp_config_no_ptp_rx(priv, - config.rx_filter != HWTSTAMP_FILTER_NONE); + config->rx_filter != HWTSTAMP_FILTER_NONE); else err = mlx5e_hwstamp_config_ptp_rx(priv, ptp_rx); if (err) goto err_unlock; - memcpy(&priv->hwtstamp_config, &config, sizeof(config)); + priv->hwtstamp_config = *config; mutex_unlock(&priv->state_lock); /* might need to fix some features */ netdev_update_features(priv->netdev); - return copy_to_user(ifr->ifr_data, &config, - sizeof(config)) ? -EFAULT : 0; + return 0; err_unlock: mutex_unlock(&priv->state_lock); return err; } -int mlx5e_hwtstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr) +static int mlx5e_hwtstamp_set_ndo(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { - struct hwtstamp_config *cfg = &priv->hwtstamp_config; + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_hwtstamp_set(priv, config, extack); +} +int mlx5e_hwtstamp_get(struct mlx5e_priv *priv, + struct kernel_hwtstamp_config *config) +{ if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) return -EOPNOTSUPP; - return copy_to_user(ifr->ifr_data, cfg, sizeof(*cfg)) ? -EFAULT : 0; + *config = priv->hwtstamp_config; + + return 0; } -static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int mlx5e_hwtstamp_get_ndo(struct net_device *dev, + struct kernel_hwtstamp_config *config) { struct mlx5e_priv *priv = netdev_priv(dev); - switch (cmd) { - case SIOCSHWTSTAMP: - return mlx5e_hwtstamp_set(priv, ifr); - case SIOCGHWTSTAMP: - return mlx5e_hwtstamp_get(priv, ifr); - default: - return -EOPNOTSUPP; - } + return mlx5e_hwtstamp_get(priv, config); } #ifdef CONFIG_MLX5_ESWITCH @@ -5280,13 +5284,14 @@ const struct net_device_ops mlx5e_netdev_ops = { .ndo_set_features = mlx5e_set_features, .ndo_fix_features = mlx5e_fix_features, .ndo_change_mtu = mlx5e_change_nic_mtu, - .ndo_eth_ioctl = mlx5e_ioctl, .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, .ndo_features_check = mlx5e_features_check, .ndo_tx_timeout = mlx5e_tx_timeout, .ndo_bpf = mlx5e_xdp, .ndo_xdp_xmit = mlx5e_xdp_xmit, .ndo_xsk_wakeup = mlx5e_xsk_wakeup, + .ndo_hwtstamp_get = mlx5e_hwtstamp_get_ndo, + .ndo_hwtstamp_set = mlx5e_hwtstamp_set_ndo, #ifdef CONFIG_MLX5_EN_ARFS .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 11d950f58ae33..906b1fbc27aa0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -45,6 +45,23 @@ static int mlx5i_open(struct net_device *netdev); static int mlx5i_close(struct net_device *netdev); static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu); +int mlx5i_hwtstamp_set(struct net_device *dev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(dev); + + return mlx5e_hwtstamp_set(epriv, config, extack); +} + +int mlx5i_hwtstamp_get(struct net_device *dev, + struct kernel_hwtstamp_config *config) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(dev); + + return mlx5e_hwtstamp_get(epriv, config); +} + static const struct net_device_ops mlx5i_netdev_ops = { .ndo_open = mlx5i_open, .ndo_stop = mlx5i_close, @@ -52,7 +69,8 @@ static const struct net_device_ops mlx5i_netdev_ops = { .ndo_init = mlx5i_dev_init, .ndo_uninit = mlx5i_dev_cleanup, .ndo_change_mtu = mlx5i_change_mtu, - .ndo_eth_ioctl = mlx5i_ioctl, + .ndo_hwtstamp_get = mlx5i_hwtstamp_get, + .ndo_hwtstamp_set = mlx5i_hwtstamp_set, }; /* IPoIB mlx5 netdev profile */ @@ -557,20 +575,6 @@ int mlx5i_dev_init(struct net_device *dev) return 0; } -int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -{ - struct mlx5e_priv *priv = mlx5i_epriv(dev); - - switch (cmd) { - case SIOCSHWTSTAMP: - return mlx5e_hwtstamp_set(priv, ifr); - case SIOCGHWTSTAMP: - return mlx5e_hwtstamp_get(priv, ifr); - default: - return -EOPNOTSUPP; - } -} - void mlx5i_dev_cleanup(struct net_device *dev) { struct mlx5e_priv *priv = mlx5i_epriv(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index 2ab6437a1c49f..d67d5a72bb419 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -88,7 +88,11 @@ struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn); /* Shared ndo functions */ int mlx5i_dev_init(struct net_device *dev); void mlx5i_dev_cleanup(struct net_device *dev); -int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); +int mlx5i_hwtstamp_set(struct net_device *dev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); +int mlx5i_hwtstamp_get(struct net_device *dev, + struct kernel_hwtstamp_config *config); /* Parent profile functions */ int mlx5i_init(struct mlx5_core_dev *mdev, struct net_device *netdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index 028a76944d82d..04444dad3a0db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -140,7 +140,6 @@ static int mlx5i_pkey_close(struct net_device *netdev); static int mlx5i_pkey_dev_init(struct net_device *dev); static void mlx5i_pkey_dev_cleanup(struct net_device *netdev); static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu); -static int mlx5i_pkey_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); static const struct net_device_ops mlx5i_pkey_netdev_ops = { .ndo_open = mlx5i_pkey_open, @@ -149,7 +148,8 @@ static const struct net_device_ops mlx5i_pkey_netdev_ops = { .ndo_get_stats64 = mlx5i_get_stats, .ndo_uninit = mlx5i_pkey_dev_cleanup, .ndo_change_mtu = mlx5i_pkey_change_mtu, - .ndo_eth_ioctl = mlx5i_pkey_ioctl, + .ndo_hwtstamp_get = mlx5i_hwtstamp_get, + .ndo_hwtstamp_set = mlx5i_hwtstamp_set, }; /* Child NDOs */ @@ -184,11 +184,6 @@ static int mlx5i_pkey_dev_init(struct net_device *dev) return mlx5i_dev_init(dev); } -static int mlx5i_pkey_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -{ - return mlx5i_ioctl(dev, ifr, cmd); -} - static void mlx5i_pkey_dev_cleanup(struct net_device *netdev) { mlx5i_parent_put(netdev); From 9b443e58a896fce4d377d83da8dfd083664d8739 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 30 Oct 2025 10:20:32 +0000 Subject: [PATCH 445/867] net: stmmac: qcom-ethqos: remove MAC_CTRL_REG modification When operating in "SGMII" mode (Cisco SGMII or 2500BASE-X), qcom-ethqos modifies the MAC control register in its ethqos_configure_sgmii() function, which is only called from one path: stmmac_mac_link_up() +- reads MAC_CTRL_REG +- masks out priv->hw->link.speed_mask +- sets bits according to speed (2500, 1000, 100, 10) from priv->hw.link.speed* +- ethqos_fix_mac_speed() | +- qcom_ethqos_set_sgmii_loopback(false) | +- ethqos_update_link_clk(speed) | `- ethqos_configure(speed) | `- ethqos_configure_sgmii(speed) | +- reads MAC_CTRL_REG, | +- configures PS/FES bits according to speed | `- writes MAC_CTRL_REG as the last operation +- sets duplex bit(s) +- stmmac_mac_flow_ctrl() +- writes MAC_CTRL_REG if changed from original read ... As can be seen, the modification of the control register that stmmac_mac_link_up() overwrites the changes that ethqos_fix_mac_speed() does to the register. This makes ethqos_configure_sgmii()'s modification questionable at best. Analysing the values written, GMAC4 sets the speed bits as: speed_mask = GMAC_CONFIG_FES | GMAC_CONFIG_PS speed2500 = GMAC_CONFIG_FES B14=1 B15=0 speed1000 = 0 B14=0 B15=0 speed100 = GMAC_CONFIG_FES | GMAC_CONFIG_PS B14=1 B15=1 speed10 = GMAC_CONFIG_PS B14=0 B15=1 Whereas ethqos_configure_sgmii(): 2500: clears ETHQOS_MAC_CTRL_PORT_SEL B14=X B15=0 1000: clears ETHQOS_MAC_CTRL_PORT_SEL B14=X B15=0 100: sets ETHQOS_MAC_CTRL_PORT_SEL | B14=1 B15=1 ETHQOS_MAC_CTRL_SPEED_MODE 10: sets ETHQOS_MAC_CTRL_PORT_SEL B14=0 B15=1 clears ETHQOS_MAC_CTRL_SPEED_MODE Thus, they appear to be doing very similar, with the exception of the FES bit (bit 14) for 1G and 2.5G speeds. Given that stmmac_mac_link_up() will write the MAC_CTRL_REG after ethqos_configure_sgmii(), remove the unnecessary update in the glue driver's ethqos_configure_sgmii() method, simplifying the code. Konrad states: Without any additional knowledge, the register description says: 2500: B14=1 B15=0 1000: B14=0 B15=0 100: B14=1 B15=1 10: B14=0 B15=1 Tested-by: Mohd Ayaan Anwar Reviewed-by: Konrad Dybcio Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vEPlg-0000000CFHY-282A@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index d1e48b524d7a9..1a616a71c36ac 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -76,10 +76,6 @@ #define RGMII_CONFIG2_DATA_DIVIDE_CLK_SEL BIT(6) #define RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN BIT(5) -/* MAC_CTRL_REG bits */ -#define ETHQOS_MAC_CTRL_SPEED_MODE BIT(14) -#define ETHQOS_MAC_CTRL_PORT_SEL BIT(15) - /* EMAC_WRAPPER_SGMII_PHY_CNTRL1 bits */ #define SGMII_PHY_CNTRL1_SGMII_TX_TO_RX_LOOPBACK_EN BIT(3) @@ -632,13 +628,9 @@ static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos, int speed) { struct net_device *dev = platform_get_drvdata(ethqos->pdev); struct stmmac_priv *priv = netdev_priv(dev); - int val; - - val = readl(ethqos->mac_base + MAC_CTRL_REG); switch (speed) { case SPEED_2500: - val &= ~ETHQOS_MAC_CTRL_PORT_SEL; rgmii_updatel(ethqos, RGMII_CONFIG2_RGMII_CLK_SEL_CFG, RGMII_CONFIG2_RGMII_CLK_SEL_CFG, RGMII_IO_MACRO_CONFIG2); @@ -646,7 +638,6 @@ static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos, int speed) ethqos_pcs_set_inband(priv, false); break; case SPEED_1000: - val &= ~ETHQOS_MAC_CTRL_PORT_SEL; rgmii_updatel(ethqos, RGMII_CONFIG2_RGMII_CLK_SEL_CFG, RGMII_CONFIG2_RGMII_CLK_SEL_CFG, RGMII_IO_MACRO_CONFIG2); @@ -654,13 +645,10 @@ static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos, int speed) ethqos_pcs_set_inband(priv, true); break; case SPEED_100: - val |= ETHQOS_MAC_CTRL_PORT_SEL | ETHQOS_MAC_CTRL_SPEED_MODE; ethqos_set_serdes_speed(ethqos, SPEED_1000); ethqos_pcs_set_inband(priv, true); break; case SPEED_10: - val |= ETHQOS_MAC_CTRL_PORT_SEL; - val &= ~ETHQOS_MAC_CTRL_SPEED_MODE; rgmii_updatel(ethqos, RGMII_CONFIG_SGMII_CLK_DVDR, FIELD_PREP(RGMII_CONFIG_SGMII_CLK_DVDR, SGMII_10M_RX_CLK_DVDR), @@ -670,9 +658,7 @@ static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos, int speed) break; } - writel(val, ethqos->mac_base + MAC_CTRL_REG); - - return val; + return 0; } static int ethqos_configure(struct qcom_ethqos *ethqos, int speed) From e7e756779afa102fc8f9d648ccdc2ecbc41ce2f8 Mon Sep 17 00:00:00 2001 From: Parthiban Veerasooran Date: Thu, 30 Oct 2025 15:52:57 +0530 Subject: [PATCH 446/867] net: phy: microchip_t1s: add support for Microchip LAN867X Rev.D0 PHY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for the LAN8670/1/2 Rev.D0 10BASE-T1S PHYs from Microchip. The new Rev.D0 silicon requires a specific set of initialization settings to be configured for optimal performance and compliance with OPEN Alliance specifications, as described in Microchip Application Note AN1699 (Revision G, DS60001699G – October 2025). https://www.microchip.com/en-us/application-notes/an1699 Signed-off-by: Parthiban Veerasooran Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251030102258.180061-2-parthiban.veerasooran@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/Kconfig | 2 +- drivers/net/phy/microchip_t1s.c | 47 ++++++++++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 98700d069191a..a7ade7b95a2ee 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -308,7 +308,7 @@ config MICREL_PHY config MICROCHIP_T1S_PHY tristate "Microchip 10BASE-T1S Ethernet PHYs" help - Currently supports the LAN8670/1/2 Rev.B1/C1/C2 and + Currently supports the LAN8670/1/2 Rev.B1/C1/C2/D0 and LAN8650/1 Rev.B0/B1 Internal PHYs. config MICROCHIP_PHY diff --git a/drivers/net/phy/microchip_t1s.c b/drivers/net/phy/microchip_t1s.c index e50a0c102a862..03e3bacb02bd1 100644 --- a/drivers/net/phy/microchip_t1s.c +++ b/drivers/net/phy/microchip_t1s.c @@ -3,7 +3,7 @@ * Driver for Microchip 10BASE-T1S PHYs * * Support: Microchip Phys: - * lan8670/1/2 Rev.B1/C1/C2 + * lan8670/1/2 Rev.B1/C1/C2/D0 * lan8650/1 Rev.B0/B1 Internal PHYs */ @@ -14,6 +14,7 @@ #define PHY_ID_LAN867X_REVB1 0x0007C162 #define PHY_ID_LAN867X_REVC1 0x0007C164 #define PHY_ID_LAN867X_REVC2 0x0007C165 +#define PHY_ID_LAN867X_REVD0 0x0007C166 /* Both Rev.B0 and B1 clause 22 PHYID's are same due to B1 chip limitation */ #define PHY_ID_LAN865X_REVB 0x0007C1B3 @@ -109,6 +110,21 @@ static const u16 lan865x_revb_sqi_fixup_cfg_regs[3] = { 0x00AD, 0x00AE, 0x00AF, }; +/* LAN867x Rev.D0 configuration parameters from AN1699 + * As per the Configuration Application Note AN1699 published in the below link, + * https://www.microchip.com/en-us/application-notes/an1699 + * Revision G (DS60001699G - October 2025) + */ +static const u16 lan867x_revd0_fixup_regs[8] = { + 0x0037, 0x008A, 0x0118, 0x00D6, + 0x0082, 0x00FD, 0x00FD, 0x0091, +}; + +static const u16 lan867x_revd0_fixup_values[8] = { + 0x0800, 0xBFC0, 0x029C, 0x1001, + 0x001C, 0x0C0B, 0x8C07, 0x9660, +}; + /* Pulled from AN1760 describing 'indirect read' * * write_register(0x4, 0x00D8, addr) @@ -407,6 +423,25 @@ static int lan86xx_plca_set_cfg(struct phy_device *phydev, COL_DET_CTRL0_ENABLE_BIT_MASK, COL_DET_ENABLE); } +static int lan867x_revd0_config_init(struct phy_device *phydev) +{ + int ret; + + ret = lan867x_check_reset_complete(phydev); + if (ret) + return ret; + + for (int i = 0; i < ARRAY_SIZE(lan867x_revd0_fixup_regs); i++) { + ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, + lan867x_revd0_fixup_regs[i], + lan867x_revd0_fixup_values[i]); + if (ret) + return ret; + } + + return 0; +} + static int lan86xx_read_status(struct phy_device *phydev) { /* The phy has some limitations, namely: @@ -481,6 +516,15 @@ static struct phy_driver microchip_t1s_driver[] = { .set_plca_cfg = lan86xx_plca_set_cfg, .get_plca_status = genphy_c45_plca_get_status, }, + { + PHY_ID_MATCH_EXACT(PHY_ID_LAN867X_REVD0), + .name = "LAN867X Rev.D0", + .features = PHY_BASIC_T1S_P2MP_FEATURES, + .config_init = lan867x_revd0_config_init, + .get_plca_cfg = genphy_c45_plca_get_cfg, + .set_plca_cfg = lan86xx_plca_set_cfg, + .get_plca_status = genphy_c45_plca_get_status, + }, { PHY_ID_MATCH_EXACT(PHY_ID_LAN865X_REVB), .name = "LAN865X Rev.B0/B1 Internal Phy", @@ -501,6 +545,7 @@ static const struct mdio_device_id __maybe_unused tbl[] = { { PHY_ID_MATCH_EXACT(PHY_ID_LAN867X_REVB1) }, { PHY_ID_MATCH_EXACT(PHY_ID_LAN867X_REVC1) }, { PHY_ID_MATCH_EXACT(PHY_ID_LAN867X_REVC2) }, + { PHY_ID_MATCH_EXACT(PHY_ID_LAN867X_REVD0) }, { PHY_ID_MATCH_EXACT(PHY_ID_LAN865X_REVB) }, { } }; From 07f5765f26c3c7381a59d37c73d3ec51b4fd5cf0 Mon Sep 17 00:00:00 2001 From: Parthiban Veerasooran Date: Thu, 30 Oct 2025 15:52:58 +0530 Subject: [PATCH 447/867] net: phy: microchip_t1s: configure link status control for LAN867x Rev.D0 Configure the link status in the Link Status Control register for LAN8670/1/2 Rev.D0 PHYs, depending on whether PLCA or CSMA/CD mode is enabled. When PLCA is enabled, the link status reflects the PLCA status. When PLCA is disabled (CSMA/CD mode), the PHY does not support autonegotiation, so the link status is forced active by setting the LINK_STATUS_SEMAPHORE bit. The link status control is configured: - During PHY initialization, for default CSMA/CD mode. - Whenever PLCA configuration is updated. This ensures correct link reporting and consistent behavior for LAN867x Rev.D0 devices. Signed-off-by: Parthiban Veerasooran Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251030102258.180061-3-parthiban.veerasooran@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/microchip_t1s.c | 51 ++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/microchip_t1s.c b/drivers/net/phy/microchip_t1s.c index 03e3bacb02bd1..bce5cf087b194 100644 --- a/drivers/net/phy/microchip_t1s.c +++ b/drivers/net/phy/microchip_t1s.c @@ -33,6 +33,17 @@ #define COL_DET_ENABLE BIT(15) #define COL_DET_DISABLE 0x0000 +/* LAN8670/1/2 Rev.D0 Link Status Selection Register */ +#define LAN867X_REG_LINK_STATUS_CTRL 0x0012 +#define LINK_STATUS_CONFIGURATION GENMASK(12, 11) +#define LINK_STATUS_SEMAPHORE BIT(0) + +/* Link Status Configuration */ +#define LINK_STATUS_CONFIG_PLCA_STATUS 0x1 +#define LINK_STATUS_CONFIG_SEMAPHORE 0x2 + +#define LINK_STATUS_SEMAPHORE_SET 0x1 + #define LAN865X_CFGPARAM_READ_ENABLE BIT(1) /* The arrays below are pulled from the following table from AN1699 @@ -393,6 +404,32 @@ static int lan867x_revb1_config_init(struct phy_device *phydev) return 0; } +static int lan867x_revd0_link_active_selection(struct phy_device *phydev, + bool plca_enabled) +{ + u16 value; + + if (plca_enabled) { + /* 0x1 - When PLCA is enabled: link status reflects plca_status. + */ + value = FIELD_PREP(LINK_STATUS_CONFIGURATION, + LINK_STATUS_CONFIG_PLCA_STATUS); + } else { + /* 0x2 - Link status is controlled by the value written into the + * LINK_STATUS_SEMAPHORE bit written. Here the link semaphore + * bit is written with 0x1 to set the link always active in + * CSMA/CD mode as it doesn't support autoneg. + */ + value = FIELD_PREP(LINK_STATUS_CONFIGURATION, + LINK_STATUS_CONFIG_SEMAPHORE) | + FIELD_PREP(LINK_STATUS_SEMAPHORE, + LINK_STATUS_SEMAPHORE_SET); + } + + return phy_write_mmd(phydev, MDIO_MMD_VEND2, + LAN867X_REG_LINK_STATUS_CTRL, value); +} + /* As per LAN8650/1 Rev.B0/B1 AN1760 (Revision F (DS60001760G - June 2024)) and * LAN8670/1/2 Rev.C1/C2 AN1699 (Revision E (DS60001699F - June 2024)), under * normal operation, the device should be operated in PLCA mode. Disabling @@ -409,6 +446,14 @@ static int lan86xx_plca_set_cfg(struct phy_device *phydev, { int ret; + /* Link status selection must be configured for LAN8670/1/2 Rev.D0 */ + if (phydev->phy_id == PHY_ID_LAN867X_REVD0) { + ret = lan867x_revd0_link_active_selection(phydev, + plca_cfg->enabled); + if (ret) + return ret; + } + ret = genphy_c45_plca_set_cfg(phydev, plca_cfg); if (ret) return ret; @@ -439,7 +484,11 @@ static int lan867x_revd0_config_init(struct phy_device *phydev) return ret; } - return 0; + /* Initially the PHY will be in CSMA/CD mode by default. So it is + * required to set the link always active as it doesn't support + * autoneg. + */ + return lan867x_revd0_link_active_selection(phydev, false); } static int lan86xx_read_status(struct phy_device *phydev) From a7aca10c0091d511030ec7907667e1448869b71c Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Thu, 30 Oct 2025 14:50:13 +0700 Subject: [PATCH 448/867] Documentation: netconsole: Separate literal code blocks for full and short netcat command name versions Both full and short (abbreviated) command name versions of netcat example are combined in single literal code block due to 'or::' paragraph being indented one more space than the preceding paragraph (before the short version example). Unindent it to separate the versions. Signed-off-by: Bagas Sanjaya Reviewed-by: Randy Dunlap Tested-by: Randy Dunlap Link: https://patch.msgid.link/20251030075013.40418-1-bagasdotme@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/networking/netconsole.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/netconsole.rst b/Documentation/networking/netconsole.rst index 2555e75e5cc1c..4ab5d7b05cf10 100644 --- a/Documentation/networking/netconsole.rst +++ b/Documentation/networking/netconsole.rst @@ -88,7 +88,7 @@ for example: nc -u -l -p ' / 'nc -u -l - or:: + or:: netcat -u -l -p ' / 'netcat -u -l From 02d064de05b1fcca769391fa82d205bed8bb9bf0 Mon Sep 17 00:00:00 2001 From: Anubhav Singh Date: Thu, 30 Oct 2025 06:28:18 +0000 Subject: [PATCH 449/867] selftests/net: fix out-of-order delivery of FIN in gro:tcp test Due to the gro_sender sending data packets and FIN packets in very quick succession, these are received almost simultaneously by the gro_receiver. FIN packets are sometimes processed before the data packets leading to intermittent (~1/100) test failures. This change adds a delay of 100ms before sending FIN packets in gro:tcp test to avoid the out-of-order delivery. The same mitigation already exists for the gro:ip test. Fixes: 7d1575014a63 ("selftests/net: GRO coalesce test") Reviewed-by: Willem de Bruijn Signed-off-by: Anubhav Singh Link: https://patch.msgid.link/20251030062818.1562228-1-anubhavsinggh@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/gro.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c index 2b1d9f2b3e9e8..3fa63bd85dea7 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/net/gro.c @@ -989,6 +989,7 @@ static void check_recv_pkts(int fd, int *correct_payload, static void gro_sender(void) { + const int fin_delay_us = 100 * 1000; static char fin_pkt[MAX_HDR_LEN]; struct sockaddr_ll daddr = {}; int txfd = -1; @@ -1032,15 +1033,22 @@ static void gro_sender(void) write_packet(txfd, fin_pkt, total_hdr_len, &daddr); } else if (strcmp(testname, "tcp") == 0) { send_changed_checksum(txfd, &daddr); + /* Adding sleep before sending FIN so that it is not + * received prior to other packets. + */ + usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); send_changed_seq(txfd, &daddr); + usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); send_changed_ts(txfd, &daddr); + usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); send_diff_opt(txfd, &daddr); + usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); } else if (strcmp(testname, "ip") == 0) { send_changed_ECN(txfd, &daddr); From f8e8486702abb05b8c734093aab1606af0eac068 Mon Sep 17 00:00:00 2001 From: Anubhav Singh Date: Thu, 30 Oct 2025 06:04:36 +0000 Subject: [PATCH 450/867] selftests/net: use destination options instead of hop-by-hop The GRO self-test, gro.c, currently constructs IPv6 packets containing a Hop-by-Hop Options header (IPPROTO_HOPOPTS) to ensure the GRO path correctly handles IPv6 extension headers. However, network elements may be configured to drop packets with the Hop-by-Hop Options header (HBH). This causes the self-test to fail in environments where such network elements are present. To improve the robustness and reliability of this test in diverse network environments, switch from using IPPROTO_HOPOPTS to IPPROTO_DSTOPTS (Destination Options). The Destination Options header is less likely to be dropped by intermediate routers and still serves the core purpose of the test: validating GRO's handling of an IPv6 extension header. This change ensures the test can execute successfully without being incorrectly failed by network policies outside the kernel's control. Fixes: 7d1575014a63 ("selftests/net: GRO coalesce test") Reviewed-by: Willem de Bruijn Signed-off-by: Anubhav Singh Link: https://patch.msgid.link/20251030060436.1556664-1-anubhavsinggh@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/gro.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c index 3fa63bd85dea7..cfc39f70635df 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/net/gro.c @@ -754,11 +754,11 @@ static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE]; create_packet(buf, 0, 0, PAYLOAD_LEN, 0); - add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data1); + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data1); write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr); create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0); - add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data2); + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data2); write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr); } From 3f978e3f1570155a1327ffa25f60968bc7b9398f Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Thu, 30 Oct 2025 09:55:22 +0530 Subject: [PATCH 451/867] isdn: mISDN: hfcsusb: fix memory leak in hfcsusb_probe() In hfcsusb_probe(), the memory allocated for ctrl_urb gets leaked when setup_instance() fails with an error code. Fix that by freeing the urb before freeing the hw structure. Also change the error paths to use the goto ladder style. Compile tested only. Issue found using a prototype static analysis tool. Fixes: 69f52adb2d53 ("mISDN: Add HFC USB driver") Signed-off-by: Abdun Nihaal Link: https://patch.msgid.link/20251030042524.194812-1-nihaal@cse.iitm.ac.in Signed-off-by: Jakub Kicinski --- drivers/isdn/hardware/mISDN/hfcsusb.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c index e54419a4e731f..541a20cb58f16 100644 --- a/drivers/isdn/hardware/mISDN/hfcsusb.c +++ b/drivers/isdn/hardware/mISDN/hfcsusb.c @@ -1904,13 +1904,13 @@ setup_instance(struct hfcsusb *hw, struct device *parent) mISDN_freebchannel(&hw->bch[1]); mISDN_freebchannel(&hw->bch[0]); mISDN_freedchannel(&hw->dch); - kfree(hw); return err; } static int hfcsusb_probe(struct usb_interface *intf, const struct usb_device_id *id) { + int err; struct hfcsusb *hw; struct usb_device *dev = interface_to_usbdev(intf); struct usb_host_interface *iface = intf->cur_altsetting; @@ -2101,20 +2101,28 @@ hfcsusb_probe(struct usb_interface *intf, const struct usb_device_id *id) if (!hw->ctrl_urb) { pr_warn("%s: No memory for control urb\n", driver_info->vend_name); - kfree(hw); - return -ENOMEM; + err = -ENOMEM; + goto err_free_hw; } pr_info("%s: %s: detected \"%s\" (%s, if=%d alt=%d)\n", hw->name, __func__, driver_info->vend_name, conf_str[small_match], ifnum, alt_used); - if (setup_instance(hw, dev->dev.parent)) - return -EIO; + if (setup_instance(hw, dev->dev.parent)) { + err = -EIO; + goto err_free_urb; + } hw->intf = intf; usb_set_intfdata(hw->intf, hw); return 0; + +err_free_urb: + usb_free_urb(hw->ctrl_urb); +err_free_hw: + kfree(hw); + return err; } /* function called when an active device is removed */ From d01f8136d46b925798abcf86b35a4021e4cfb8bb Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Thu, 30 Oct 2025 12:03:40 +0800 Subject: [PATCH 452/867] selftests: netdevsim: Fix ethtool-coalesce.sh fail by installing ethtool-common.sh The script "ethtool-common.sh" is not installed in INSTALL_PATH, and triggers some errors when I try to run the test 'drivers/net/netdevsim/ethtool-coalesce.sh': TAP version 13 1..1 # timeout set to 600 # selftests: drivers/net/netdevsim: ethtool-coalesce.sh # ./ethtool-coalesce.sh: line 4: ethtool-common.sh: No such file or directory # ./ethtool-coalesce.sh: line 25: make_netdev: command not found # ethtool: bad command line argument(s) # ./ethtool-coalesce.sh: line 124: check: command not found # ./ethtool-coalesce.sh: line 126: [: -eq: unary operator expected # FAILED /0 checks not ok 1 selftests: drivers/net/netdevsim: ethtool-coalesce.sh # exit=1 Install this file to avoid this error. After this patch: TAP version 13 1..1 # timeout set to 600 # selftests: drivers/net/netdevsim: ethtool-coalesce.sh # PASSED all 22 checks ok 1 selftests: drivers/net/netdevsim: ethtool-coalesce.sh Fixes: fbb8531e58bd ("selftests: extract common functions in ethtool-common.sh") Signed-off-by: Wang Liang Link: https://patch.msgid.link/20251030040340.3258110-1-wangliang74@huawei.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/netdevsim/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile index daf51113c8272..df10c72435119 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/Makefile +++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile @@ -20,4 +20,8 @@ TEST_PROGS := \ udp_tunnel_nic.sh \ # end of TEST_PROGS +TEST_FILES := \ + ethtool-common.sh +# end of TEST_FILES + include ../../../lib.mk From c211f5d7cbd5cb34489d526648bb9c8ecc907dee Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 30 Oct 2025 07:35:39 +0000 Subject: [PATCH 453/867] net: vlan: sync VLAN features with lower device After registering a VLAN device and setting its feature flags, we need to synchronize the VLAN features with the lower device. For example, the VLAN device does not have the NETIF_F_LRO flag, it should be synchronized with the lower device based on the NETIF_F_UPPER_DISABLES definition. As the dev->vlan_features has changed, we need to call netdev_update_features(). The caller must run after netdev_upper_dev_link() links the lower devices, so this patch adds the netdev_update_features() call in register_vlan_dev(). Fixes: fd867d51f889 ("net/core: generic support for disabling netdev features down stack") Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20251030073539.133779-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- net/8021q/vlan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index fda3a80e9340c..2b74ed56eb166 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -193,6 +193,8 @@ int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack) vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, dev); grp->nr_vlan_devs++; + netdev_update_features(dev); + return 0; out_unregister_netdev: From d7d2fcf7ae31471b4e08b7e448b8fd0ec2e06a1b Mon Sep 17 00:00:00 2001 From: Gustavo Luiz Duarte Date: Wed, 29 Oct 2025 13:50:24 -0700 Subject: [PATCH 454/867] netconsole: Acquire su_mutex before navigating configs hierarchy There is a race between operations that iterate over the userdata cg_children list and concurrent add/remove of userdata items through configfs. The update_userdata() function iterates over the nt->userdata_group.cg_children list, and count_extradata_entries() also iterates over this same list to count nodes. Quoting from Documentation/filesystems/configfs.rst: > A subsystem can navigate the cg_children list and the ci_parent pointer > to see the tree created by the subsystem. This can race with configfs' > management of the hierarchy, so configfs uses the subsystem mutex to > protect modifications. Whenever a subsystem wants to navigate the > hierarchy, it must do so under the protection of the subsystem > mutex. Without proper locking, if a userdata item is added or removed concurrently while these functions are iterating, the list can be accessed in an inconsistent state. For example, the list_for_each() loop can reach a node that is being removed from the list by list_del_init() which sets the nodes' .next pointer to point to itself, so the loop will never end (or reach the WARN_ON_ONCE in update_userdata() ). Fix this by holding the configfs subsystem mutex (su_mutex) during all operations that iterate over cg_children. This includes: - userdatum_value_store() which calls update_userdata() to iterate over cg_children - All sysdata_*_enabled_store() functions which call count_extradata_entries() to iterate over cg_children The su_mutex must be acquired before dynamic_netconsole_mutex to avoid potential lock ordering issues, as configfs operations may already hold su_mutex when calling into our code. Fixes: df03f830d099 ("net: netconsole: cache userdata formatted string in netconsole_target") Signed-off-by: Gustavo Luiz Duarte Link: https://patch.msgid.link/20251029-netconsole-fix-warn-v1-1-0d0dd4622f48@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/netconsole.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 5d8d0214786c7..bb6e03a929565 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -936,6 +936,7 @@ static ssize_t userdatum_value_store(struct config_item *item, const char *buf, if (count > MAX_EXTRADATA_VALUE_LEN) return -EMSGSIZE; + mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); ret = strscpy(udm->value, buf, sizeof(udm->value)); @@ -949,6 +950,7 @@ static ssize_t userdatum_value_store(struct config_item *item, const char *buf, ret = count; out_unlock: mutex_unlock(&dynamic_netconsole_mutex); + mutex_unlock(&netconsole_subsys.su_mutex); return ret; } @@ -974,6 +976,7 @@ static ssize_t sysdata_msgid_enabled_store(struct config_item *item, if (ret) return ret; + mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_MSGID); if (msgid_enabled == curr) @@ -994,6 +997,7 @@ static ssize_t sysdata_msgid_enabled_store(struct config_item *item, ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); + mutex_unlock(&netconsole_subsys.su_mutex); return ret; } @@ -1008,6 +1012,7 @@ static ssize_t sysdata_release_enabled_store(struct config_item *item, if (ret) return ret; + mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_RELEASE); if (release_enabled == curr) @@ -1028,6 +1033,7 @@ static ssize_t sysdata_release_enabled_store(struct config_item *item, ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); + mutex_unlock(&netconsole_subsys.su_mutex); return ret; } @@ -1042,6 +1048,7 @@ static ssize_t sysdata_taskname_enabled_store(struct config_item *item, if (ret) return ret; + mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_TASKNAME); if (taskname_enabled == curr) @@ -1062,6 +1069,7 @@ static ssize_t sysdata_taskname_enabled_store(struct config_item *item, ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); + mutex_unlock(&netconsole_subsys.su_mutex); return ret; } @@ -1077,6 +1085,7 @@ static ssize_t sysdata_cpu_nr_enabled_store(struct config_item *item, if (ret) return ret; + mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_CPU_NR); if (cpu_nr_enabled == curr) @@ -1105,6 +1114,7 @@ static ssize_t sysdata_cpu_nr_enabled_store(struct config_item *item, ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); + mutex_unlock(&netconsole_subsys.su_mutex); return ret; } From 4d07797faaa19aa8e80e10a04ca1a72c643ef5cf Mon Sep 17 00:00:00 2001 From: Thomas Wismer Date: Wed, 29 Oct 2025 22:23:09 +0100 Subject: [PATCH 455/867] net: pse-pd: tps23881: Add support for TPS23881B The TPS23881B uses different firmware than the TPS23881. Trying to load the TPS23881 firmware on a TPS23881B device fails and must be omitted. The TPS23881B ships with a more recent ROM firmware. Moreover, no updated firmware has been released yet and so the firmware loading step must be skipped. As of today, the TPS23881B is intended to use its ROM firmware. Signed-off-by: Thomas Wismer Reviewed-by: Kory Maincent Acked-by: Oleksij Rempel Link: https://patch.msgid.link/20251029212312.108749-2-thomas@wismer.xyz Signed-off-by: Jakub Kicinski --- drivers/net/pse-pd/tps23881.c | 69 +++++++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 15 deletions(-) diff --git a/drivers/net/pse-pd/tps23881.c b/drivers/net/pse-pd/tps23881.c index b724b222ab44c..76ec1555d60df 100644 --- a/drivers/net/pse-pd/tps23881.c +++ b/drivers/net/pse-pd/tps23881.c @@ -55,8 +55,6 @@ #define TPS23881_REG_TPON BIT(0) #define TPS23881_REG_FWREV 0x41 #define TPS23881_REG_DEVID 0x43 -#define TPS23881_REG_DEVID_MASK 0xF0 -#define TPS23881_DEVICE_ID 0x02 #define TPS23881_REG_CHAN1_CLASS 0x4c #define TPS23881_REG_SRAM_CTRL 0x60 #define TPS23881_REG_SRAM_DATA 0x61 @@ -1012,8 +1010,28 @@ static const struct pse_controller_ops tps23881_ops = { .pi_get_pw_req = tps23881_pi_get_pw_req, }; -static const char fw_parity_name[] = "ti/tps23881/tps23881-parity-14.bin"; -static const char fw_sram_name[] = "ti/tps23881/tps23881-sram-14.bin"; +struct tps23881_info { + u8 dev_id; /* device ID and silicon revision */ + const char *fw_parity_name; /* parity code firmware file name */ + const char *fw_sram_name; /* SRAM code firmware file name */ +}; + +enum tps23881_model { + TPS23881, + TPS23881B, +}; + +static const struct tps23881_info tps23881_info[] = { + [TPS23881] = { + .dev_id = 0x22, + .fw_parity_name = "ti/tps23881/tps23881-parity-14.bin", + .fw_sram_name = "ti/tps23881/tps23881-sram-14.bin", + }, + [TPS23881B] = { + .dev_id = 0x24, + /* skip SRAM load, ROM provides Clause 145 hardware-level support */ + }, +}; struct tps23881_fw_conf { u8 reg; @@ -1085,16 +1103,17 @@ static int tps23881_flash_sram_fw_part(struct i2c_client *client, return ret; } -static int tps23881_flash_sram_fw(struct i2c_client *client) +static int tps23881_flash_sram_fw(struct i2c_client *client, + const struct tps23881_info *info) { int ret; - ret = tps23881_flash_sram_fw_part(client, fw_parity_name, + ret = tps23881_flash_sram_fw_part(client, info->fw_parity_name, tps23881_fw_parity_conf); if (ret) return ret; - ret = tps23881_flash_sram_fw_part(client, fw_sram_name, + ret = tps23881_flash_sram_fw_part(client, info->fw_sram_name, tps23881_fw_sram_conf); if (ret) return ret; @@ -1412,6 +1431,7 @@ static int tps23881_setup_irq(struct tps23881_priv *priv, int irq) static int tps23881_i2c_probe(struct i2c_client *client) { struct device *dev = &client->dev; + const struct tps23881_info *info; struct tps23881_priv *priv; struct gpio_desc *reset; int ret; @@ -1422,6 +1442,10 @@ static int tps23881_i2c_probe(struct i2c_client *client) return -ENXIO; } + info = i2c_get_match_data(client); + if (!info) + return -EINVAL; + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; @@ -1440,7 +1464,7 @@ static int tps23881_i2c_probe(struct i2c_client *client) * to Load TPS2388x SRAM and Parity Code over I2C" (Rev E)) * indicates we should delay that programming by at least 50ms. So * we'll wait the entire 50ms here to ensure we're safe to go to the - * SRAM loading proceedure. + * SRAM loading procedure. */ msleep(50); } @@ -1449,20 +1473,27 @@ static int tps23881_i2c_probe(struct i2c_client *client) if (ret < 0) return ret; - if (FIELD_GET(TPS23881_REG_DEVID_MASK, ret) != TPS23881_DEVICE_ID) { + if (ret != info->dev_id) { dev_err(dev, "Wrong device ID\n"); return -ENXIO; } - ret = tps23881_flash_sram_fw(client); - if (ret < 0) - return ret; + if (info->fw_sram_name) { + ret = tps23881_flash_sram_fw(client, info); + if (ret < 0) + return ret; + } ret = i2c_smbus_read_byte_data(client, TPS23881_REG_FWREV); if (ret < 0) return ret; - dev_info(&client->dev, "Firmware revision 0x%x\n", ret); + if (ret == 0xFF) { + dev_err(&client->dev, "Device entered safe mode\n"); + return -ENXIO; + } + dev_info(&client->dev, "Firmware revision 0x%x%s\n", ret, + ret == 0x00 ? " (ROM firmware)" : ""); /* Set configuration B, 16 bit access on a single device address */ ret = i2c_smbus_read_byte_data(client, TPS23881_REG_GEN_MASK); @@ -1498,13 +1529,21 @@ static int tps23881_i2c_probe(struct i2c_client *client) } static const struct i2c_device_id tps23881_id[] = { - { "tps23881" }, + { "tps23881", .driver_data = (kernel_ulong_t)&tps23881_info[TPS23881] }, + { "tps23881b", .driver_data = (kernel_ulong_t)&tps23881_info[TPS23881B] }, { } }; MODULE_DEVICE_TABLE(i2c, tps23881_id); static const struct of_device_id tps23881_of_match[] = { - { .compatible = "ti,tps23881", }, + { + .compatible = "ti,tps23881", + .data = &tps23881_info[TPS23881] + }, + { + .compatible = "ti,tps23881b", + .data = &tps23881_info[TPS23881B] + }, { }, }; MODULE_DEVICE_TABLE(of, tps23881_of_match); From 32032eb166a6d05d6bb4803c9b9e39659990b18a Mon Sep 17 00:00:00 2001 From: Thomas Wismer Date: Wed, 29 Oct 2025 22:23:10 +0100 Subject: [PATCH 456/867] dt-bindings: pse-pd: ti,tps23881: Add TPS23881B Add the TPS23881B I2C power sourcing equipment controller to the list of supported devices. Falling back to the TPS23881 predecessor device is not suitable as firmware loading needs to handled differently by the driver. The TPS23881 and TPS23881B devices require different firmware. Trying to load the TPS23881 firmware on a TPS23881B device fails and must therefore be omitted. Signed-off-by: Thomas Wismer Acked-by: Conor Dooley Reviewed-by: Kory Maincent Link: https://patch.msgid.link/20251029212312.108749-3-thomas@wismer.xyz Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/pse-pd/ti,tps23881.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/pse-pd/ti,tps23881.yaml b/Documentation/devicetree/bindings/net/pse-pd/ti,tps23881.yaml index bb1ee33986555..0b3803f647b7b 100644 --- a/Documentation/devicetree/bindings/net/pse-pd/ti,tps23881.yaml +++ b/Documentation/devicetree/bindings/net/pse-pd/ti,tps23881.yaml @@ -16,6 +16,7 @@ properties: compatible: enum: - ti,tps23881 + - ti,tps23881b reg: maxItems: 1 From 30176bf7c871681df506f3165ffe76ec462db991 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 29 Oct 2025 16:32:06 +0100 Subject: [PATCH 457/867] dpll: add phase-adjust-gran pin attribute Phase-adjust values are currently limited by a min-max range. Some hardware requires, for certain pin types, that values be multiples of a specific granularity, as in the zl3073x driver. Add a `phase-adjust-gran` pin attribute and an appropriate field in dpll_pin_properties. If set by the driver, use its value to validate user-provided phase-adjust values. Reviewed-by: Michal Schmidt Reviewed-by: Petr Oros Tested-by: Prathosh Satish Signed-off-by: Ivan Vecera Reviewed-by: Jiri Pirko Reviewed-by: Arkadiusz Kubalewski Link: https://patch.msgid.link/20251029153207.178448-2-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- Documentation/driver-api/dpll.rst | 36 +++++++++++++++------------ Documentation/netlink/specs/dpll.yaml | 7 ++++++ drivers/dpll/dpll_netlink.c | 12 ++++++++- include/linux/dpll.h | 1 + include/uapi/linux/dpll.h | 1 + 5 files changed, 40 insertions(+), 17 deletions(-) diff --git a/Documentation/driver-api/dpll.rst b/Documentation/driver-api/dpll.rst index be1fc643b645e..83118c728ed90 100644 --- a/Documentation/driver-api/dpll.rst +++ b/Documentation/driver-api/dpll.rst @@ -198,26 +198,28 @@ be requested with the same attribute with ``DPLL_CMD_DEVICE_SET`` command. ================================== ====================================== Device may also provide ability to adjust a signal phase on a pin. -If pin phase adjustment is supported, minimal and maximal values that pin -handle shall be provide to the user on ``DPLL_CMD_PIN_GET`` respond -with ``DPLL_A_PIN_PHASE_ADJUST_MIN`` and ``DPLL_A_PIN_PHASE_ADJUST_MAX`` +If pin phase adjustment is supported, minimal and maximal values and +granularity that pin handle shall be provided to the user on +``DPLL_CMD_PIN_GET`` respond with ``DPLL_A_PIN_PHASE_ADJUST_MIN``, +``DPLL_A_PIN_PHASE_ADJUST_MAX`` and ``DPLL_A_PIN_PHASE_ADJUST_GRAN`` attributes. Configured phase adjust value is provided with ``DPLL_A_PIN_PHASE_ADJUST`` attribute of a pin, and value change can be requested with the same attribute with ``DPLL_CMD_PIN_SET`` command. - =============================== ====================================== - ``DPLL_A_PIN_ID`` configured pin id - ``DPLL_A_PIN_PHASE_ADJUST_MIN`` attr minimum value of phase adjustment - ``DPLL_A_PIN_PHASE_ADJUST_MAX`` attr maximum value of phase adjustment - ``DPLL_A_PIN_PHASE_ADJUST`` attr configured value of phase - adjustment on parent dpll device - ``DPLL_A_PIN_PARENT_DEVICE`` nested attribute for requesting - configuration on given parent dpll - device - ``DPLL_A_PIN_PARENT_ID`` parent dpll device id - ``DPLL_A_PIN_PHASE_OFFSET`` attr measured phase difference - between a pin and parent dpll device - =============================== ====================================== + ================================ ========================================== + ``DPLL_A_PIN_ID`` configured pin id + ``DPLL_A_PIN_PHASE_ADJUST_GRAN`` attr granularity of phase adjustment value + ``DPLL_A_PIN_PHASE_ADJUST_MIN`` attr minimum value of phase adjustment + ``DPLL_A_PIN_PHASE_ADJUST_MAX`` attr maximum value of phase adjustment + ``DPLL_A_PIN_PHASE_ADJUST`` attr configured value of phase + adjustment on parent dpll device + ``DPLL_A_PIN_PARENT_DEVICE`` nested attribute for requesting + configuration on given parent dpll + device + ``DPLL_A_PIN_PARENT_ID`` parent dpll device id + ``DPLL_A_PIN_PHASE_OFFSET`` attr measured phase difference + between a pin and parent dpll device + ================================ ========================================== All phase related values are provided in pico seconds, which represents time difference between signals phase. The negative value means that @@ -384,6 +386,8 @@ according to attribute purpose. frequencies ``DPLL_A_PIN_ANY_FREQUENCY_MIN`` attr minimum value of frequency ``DPLL_A_PIN_ANY_FREQUENCY_MAX`` attr maximum value of frequency + ``DPLL_A_PIN_PHASE_ADJUST_GRAN`` attr granularity of phase + adjustment value ``DPLL_A_PIN_PHASE_ADJUST_MIN`` attr minimum value of phase adjustment ``DPLL_A_PIN_PHASE_ADJUST_MAX`` attr maximum value of phase diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml index 80728f6f9bc87..78d0724d7e12c 100644 --- a/Documentation/netlink/specs/dpll.yaml +++ b/Documentation/netlink/specs/dpll.yaml @@ -440,6 +440,12 @@ attribute-sets: doc: | Capable pin provides list of pins that can be bound to create a reference-sync pin pair. + - + name: phase-adjust-gran + type: u32 + doc: | + Granularity of phase adjustment, in picoseconds. The value of + phase adjustment must be a multiple of this granularity. - name: pin-parent-device @@ -616,6 +622,7 @@ operations: - capabilities - parent-device - parent-pin + - phase-adjust-gran - phase-adjust-min - phase-adjust-max - phase-adjust diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index a4153bcb6dcfe..64944f601ee5a 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -637,6 +637,10 @@ dpll_cmd_pin_get_one(struct sk_buff *msg, struct dpll_pin *pin, ret = dpll_msg_add_pin_freq(msg, pin, ref, extack); if (ret) return ret; + if (prop->phase_gran && + nla_put_u32(msg, DPLL_A_PIN_PHASE_ADJUST_GRAN, + prop->phase_gran)) + return -EMSGSIZE; if (nla_put_s32(msg, DPLL_A_PIN_PHASE_ADJUST_MIN, prop->phase_range.min)) return -EMSGSIZE; @@ -1261,7 +1265,13 @@ dpll_pin_phase_adj_set(struct dpll_pin *pin, struct nlattr *phase_adj_attr, if (phase_adj > pin->prop.phase_range.max || phase_adj < pin->prop.phase_range.min) { NL_SET_ERR_MSG_ATTR(extack, phase_adj_attr, - "phase adjust value not supported"); + "phase adjust value of out range"); + return -EINVAL; + } + if (pin->prop.phase_gran && phase_adj % (s32)pin->prop.phase_gran) { + NL_SET_ERR_MSG_ATTR_FMT(extack, phase_adj_attr, + "phase adjust value not multiple of %u", + pin->prop.phase_gran); return -EINVAL; } diff --git a/include/linux/dpll.h b/include/linux/dpll.h index 25be745bf41f1..562f520b23c27 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -163,6 +163,7 @@ struct dpll_pin_properties { u32 freq_supported_num; struct dpll_pin_frequency *freq_supported; struct dpll_pin_phase_adjust_range phase_range; + u32 phase_gran; }; #if IS_ENABLED(CONFIG_DPLL) diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h index ab1725a954d74..69d35570ac4f1 100644 --- a/include/uapi/linux/dpll.h +++ b/include/uapi/linux/dpll.h @@ -251,6 +251,7 @@ enum dpll_a_pin { DPLL_A_PIN_ESYNC_FREQUENCY_SUPPORTED, DPLL_A_PIN_ESYNC_PULSE, DPLL_A_PIN_REFERENCE_SYNC, + DPLL_A_PIN_PHASE_ADJUST_GRAN, __DPLL_A_PIN_MAX, DPLL_A_PIN_MAX = (__DPLL_A_PIN_MAX - 1) From 055a01b29fd643e33b9b1e88e24bbe1afe6fc6d9 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 29 Oct 2025 16:32:07 +0100 Subject: [PATCH 458/867] dpll: zl3073x: Specify phase adjustment granularity for pins Output pins phase adjustment values in the device are expressed in half synth clock cycles. Use this number of cycles as output pins' phase adjust granularity and simplify both get/set callbacks. Reviewed-by: Michal Schmidt Reviewed-by: Petr Oros Tested-by: Prathosh Satish Signed-off-by: Ivan Vecera Reviewed-by: Arkadiusz Kubalewski Link: https://patch.msgid.link/20251029153207.178448-3-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/dpll.c | 58 +++++++++---------------------------- drivers/dpll/zl3073x/prop.c | 11 +++++++ 2 files changed, 25 insertions(+), 44 deletions(-) diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c index f93f9a4583243..d90150671d374 100644 --- a/drivers/dpll/zl3073x/dpll.c +++ b/drivers/dpll/zl3073x/dpll.c @@ -35,6 +35,7 @@ * @prio: pin priority <0, 14> * @selectable: pin is selectable in automatic mode * @esync_control: embedded sync is controllable + * @phase_gran: phase adjustment granularity * @pin_state: last saved pin state * @phase_offset: last saved pin phase offset * @freq_offset: last saved fractional frequency offset @@ -49,6 +50,7 @@ struct zl3073x_dpll_pin { u8 prio; bool selectable; bool esync_control; + s32 phase_gran; enum dpll_pin_state pin_state; s64 phase_offset; s64 freq_offset; @@ -1388,25 +1390,14 @@ zl3073x_dpll_output_pin_phase_adjust_get(const struct dpll_pin *dpll_pin, struct zl3073x_dpll *zldpll = dpll_priv; struct zl3073x_dev *zldev = zldpll->dev; struct zl3073x_dpll_pin *pin = pin_priv; - u32 synth_freq; s32 phase_comp; - u8 out, synth; + u8 out; int rc; - out = zl3073x_output_pin_out_get(pin->id); - synth = zl3073x_out_synth_get(zldev, out); - synth_freq = zl3073x_synth_freq_get(zldev, synth); - - /* Check synth freq for zero */ - if (!synth_freq) { - dev_err(zldev->dev, "Got zero synth frequency for output %u\n", - out); - return -EINVAL; - } - guard(mutex)(&zldev->multiop_lock); /* Read output configuration */ + out = zl3073x_output_pin_out_get(pin->id); rc = zl3073x_mb_op(zldev, ZL_REG_OUTPUT_MB_SEM, ZL_OUTPUT_MB_SEM_RD, ZL_REG_OUTPUT_MB_MASK, BIT(out)); if (rc) @@ -1417,11 +1408,10 @@ zl3073x_dpll_output_pin_phase_adjust_get(const struct dpll_pin *dpll_pin, if (rc) return rc; - /* Value in register is expressed in half synth clock cycles */ - phase_comp *= (int)div_u64(PSEC_PER_SEC, 2 * synth_freq); - - /* Reverse two's complement negation applied during 'set' */ - *phase_adjust = -phase_comp; + /* Convert value to ps and reverse two's complement negation applied + * during 'set' + */ + *phase_adjust = -phase_comp * pin->phase_gran; return rc; } @@ -1437,39 +1427,18 @@ zl3073x_dpll_output_pin_phase_adjust_set(const struct dpll_pin *dpll_pin, struct zl3073x_dpll *zldpll = dpll_priv; struct zl3073x_dev *zldev = zldpll->dev; struct zl3073x_dpll_pin *pin = pin_priv; - int half_synth_cycle; - u32 synth_freq; - u8 out, synth; + u8 out; int rc; - /* Get attached synth */ - out = zl3073x_output_pin_out_get(pin->id); - synth = zl3073x_out_synth_get(zldev, out); - - /* Get synth's frequency */ - synth_freq = zl3073x_synth_freq_get(zldev, synth); - - /* Value in register is expressed in half synth clock cycles so - * the given phase adjustment a multiple of half synth clock. - */ - half_synth_cycle = (int)div_u64(PSEC_PER_SEC, 2 * synth_freq); - - if ((phase_adjust % half_synth_cycle) != 0) { - NL_SET_ERR_MSG_FMT(extack, - "Phase adjustment value has to be multiple of %d", - half_synth_cycle); - return -EINVAL; - } - phase_adjust /= half_synth_cycle; - /* The value in the register is stored as two's complement negation - * of requested value. + * of requested value and expressed in half synth clock cycles. */ - phase_adjust = -phase_adjust; + phase_adjust = -phase_adjust / pin->phase_gran; guard(mutex)(&zldev->multiop_lock); /* Read output configuration */ + out = zl3073x_output_pin_out_get(pin->id); rc = zl3073x_mb_op(zldev, ZL_REG_OUTPUT_MB_SEM, ZL_OUTPUT_MB_SEM_RD, ZL_REG_OUTPUT_MB_MASK, BIT(out)); if (rc) @@ -1758,9 +1727,10 @@ zl3073x_dpll_pin_register(struct zl3073x_dpll_pin *pin, u32 index) if (IS_ERR(props)) return PTR_ERR(props); - /* Save package label & esync capability */ + /* Save package label, esync capability and phase adjust granularity */ strscpy(pin->label, props->package_label); pin->esync_control = props->esync_control; + pin->phase_gran = props->dpll_props.phase_gran; if (zl3073x_dpll_is_input_pin(pin)) { rc = zl3073x_dpll_ref_prio_get(pin, &pin->prio); diff --git a/drivers/dpll/zl3073x/prop.c b/drivers/dpll/zl3073x/prop.c index 4cf7e8aefcb37..9e1fca5cdaf1e 100644 --- a/drivers/dpll/zl3073x/prop.c +++ b/drivers/dpll/zl3073x/prop.c @@ -208,7 +208,18 @@ struct zl3073x_pin_props *zl3073x_pin_props_get(struct zl3073x_dev *zldev, DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE | DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE; } else { + u8 out, synth; + u32 f; + props->dpll_props.type = DPLL_PIN_TYPE_GNSS; + + /* The output pin phase adjustment granularity equals half of + * the synth frequency count. + */ + out = zl3073x_output_pin_out_get(index); + synth = zl3073x_out_synth_get(zldev, out); + f = 2 * zl3073x_synth_freq_get(zldev, synth); + props->dpll_props.phase_gran = f ? div_u64(PSEC_PER_SEC, f) : 1; } props->dpll_props.phase_range.min = S32_MIN; From 01cc760632b875c4ad0d8fec0b0c01896b8a36d4 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Tue, 28 Oct 2025 08:44:52 +0700 Subject: [PATCH 459/867] Documentation: ARCnet: Update obsolete contact info ARCnet docs states that inquiries on the subsystem should be emailed to Avery Pennarun , for whom has been in CREDITS since the beginning of kernel git history and her email address is unreachable (bounce). The subsystem is now maintained by Michael Grzeschik since c38f6ac74c9980 ("MAINTAINERS: add arcnet and take maintainership"). In addition, there used to be a dedicated ARCnet mailing list but its archive at epistolary.org has been shut down. ARCnet discussion nowadays take place in netdev list. The arcnet.com domain mentioned has become AIoT (Artificial Intelligence of Things) related Typeform page and ARCnet info now resides on arcnet.cc (ARCnet Resource Center) instead. Update contact information. Signed-off-by: Bagas Sanjaya Reviewed-by: Randy Dunlap Tested-by: Randy Dunlap Link: https://patch.msgid.link/20251028014451.10521-2-bagasdotme@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/networking/arcnet-hardware.rst | 22 ++++----- Documentation/networking/arcnet.rst | 48 +++++--------------- 2 files changed, 21 insertions(+), 49 deletions(-) diff --git a/Documentation/networking/arcnet-hardware.rst b/Documentation/networking/arcnet-hardware.rst index 3bf7f99cd7bbf..20e5075d0d0e7 100644 --- a/Documentation/networking/arcnet-hardware.rst +++ b/Documentation/networking/arcnet-hardware.rst @@ -4,18 +4,20 @@ ARCnet Hardware =============== +:Author: Avery Pennarun + .. note:: - 1) This file is a supplement to arcnet.txt. Please read that for general + 1) This file is a supplement to arcnet.rst. Please read that for general driver configuration help. 2) This file is no longer Linux-specific. It should probably be moved out of the kernel sources. Ideas? Because so many people (myself included) seem to have obtained ARCnet cards without manuals, this file contains a quick introduction to ARCnet hardware, -some cabling tips, and a listing of all jumper settings I can find. Please -e-mail apenwarr@worldvisions.ca with any settings for your particular card, -or any other information you have! +some cabling tips, and a listing of all jumper settings I can find. If you +have any settings for your particular card, and/or any other information you +have, do not hesitate to :ref:`email to netdev `. Introduction to ARCnet @@ -72,11 +74,10 @@ level of encapsulation is defined by RFC1201, which I call "packet splitting," that allows "virtual packets" to grow as large as 64K each, although they are generally kept down to the Ethernet-style 1500 bytes. -For more information on the advantages and disadvantages (mostly the -advantages) of ARCnet networks, you might try the "ARCnet Trade Association" -WWW page: +For more information on ARCnet networks, visit the "ARCNET Resource Center" +WWW page at: - http://www.arcnet.com + https://www.arcnet.cc Cabling ARCnet Networks @@ -3226,9 +3227,6 @@ Settings for IRQ Selection (Lower Jumper Line) Other Cards =========== -I have no information on other models of ARCnet cards at the moment. Please -send any and all info to: - - apenwarr@worldvisions.ca +I have no information on other models of ARCnet cards at the moment. Thanks. diff --git a/Documentation/networking/arcnet.rst b/Documentation/networking/arcnet.rst index 82fce606c0f0b..cd43a18ad1494 100644 --- a/Documentation/networking/arcnet.rst +++ b/Documentation/networking/arcnet.rst @@ -4,6 +4,8 @@ ARCnet ====== +:Author: Avery Pennarun + .. note:: See also arcnet-hardware.txt in this directory for jumper-setting @@ -30,18 +32,7 @@ Come on, be a sport! Send me a success report! (hey, that was even better than my original poem... this is getting bad!) - -.. warning:: - - If you don't e-mail me about your success/failure soon, I may be forced to - start SINGING. And we don't want that, do we? - - (You know, it might be argued that I'm pushing this point a little too much. - If you think so, why not flame me in a quick little e-mail? Please also - include the type of card(s) you're using, software, size of network, and - whether it's working or not.) - - My e-mail address is: apenwarr@worldvisions.ca +---- These are the ARCnet drivers for Linux. @@ -59,23 +50,14 @@ ARCnet 2.10 ALPHA, Tomasz's all-new-and-improved RFC1051 support has been included and seems to be working fine! +.. _arcnet-netdev: + Where do I discuss these drivers? --------------------------------- -Tomasz has been so kind as to set up a new and improved mailing list. -Subscribe by sending a message with the BODY "subscribe linux-arcnet YOUR -REAL NAME" to listserv@tichy.ch.uj.edu.pl. Then, to submit messages to the -list, mail to linux-arcnet@tichy.ch.uj.edu.pl. - -There are archives of the mailing list at: - - http://epistolary.org/mailman/listinfo.cgi/arcnet - -The people on linux-net@vger.kernel.org (now defunct, replaced by -netdev@vger.kernel.org) have also been known to be very helpful, especially -when we're talking about ALPHA Linux kernels that may or may not work right -in the first place. - +ARCnet discussions take place on netdev. Simply send your email to +netdev@vger.kernel.org and make sure to Cc: maintainer listed in +"ARCNET NETWORK LAYER" heading of Documentation/process/maintainers.rst. Other Drivers and Info ---------------------- @@ -523,17 +505,9 @@ can set up your network then: It works: what now? ------------------- -Send mail describing your setup, preferably including driver version, kernel -version, ARCnet card model, CPU type, number of systems on your network, and -list of software in use to me at the following address: - - apenwarr@worldvisions.ca - -I do send (sometimes automated) replies to all messages I receive. My email -can be weird (and also usually gets forwarded all over the place along the -way to me), so if you don't get a reply within a reasonable time, please -resend. - +Send mail following :ref:`arcnet-netdev`. Describe your setup, preferably +including driver version, kernel version, ARCnet card model, CPU type, number +of systems on your network, and list of software in use. It doesn't work: what now? -------------------------- From 320258783765316d2baae99c26e461ee634054fe Mon Sep 17 00:00:00 2001 From: Jay Bhat Date: Thu, 30 Oct 2025 21:17:25 -0500 Subject: [PATCH 460/867] RDMA/irdma: Fix vf_id size to u16 to avoid overflow Correctly size the vf_id to u16 to avoid overflow. Signed-off-by: Jay Bhat Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20251031021726.1003-6-tatyana.e.nikolova@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/type.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index 4ae77cdde9dc7..c1b8f81ea283d 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -706,7 +706,7 @@ struct irdma_sc_dev { u32 vchnl_ver; u16 num_vfs; u16 hmc_fn_id; - u8 vf_id; + u16 vf_id; bool privileged:1; bool vchnl_up:1; bool ceq_valid:1; From b8126205dbe01e22b0d10c8be132bb53bf3399c1 Mon Sep 17 00:00:00 2001 From: Krzysztof Czurylo Date: Thu, 30 Oct 2025 21:17:20 -0500 Subject: [PATCH 461/867] MAINTAINERS: Update irdma maintainers Adds Krzysztof Czurylo as co-maintainer for irdma driver. Signed-off-by: Krzysztof Czurylo Signed-off-by: Tatyana Nikolova Signed-off-by: Leon Romanovsky --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 46126ce2f968e..8861469749e4a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12515,6 +12515,7 @@ F: include/linux/avf/virtchnl.h F: include/linux/net/intel/*/ INTEL ETHERNET PROTOCOL DRIVER FOR RDMA +M: Krzysztof Czurylo M: Tatyana Nikolova L: linux-rdma@vger.kernel.org S: Supported From ff4d2ef3874773c9c6173b0f099372bf62252aaf Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Wed, 29 Oct 2025 08:14:06 +0100 Subject: [PATCH 462/867] rust: devres: fix private intra-doc link The future move of pin-init to `syn` uncovers the following private intra-doc link: error: public documentation for `Devres` links to private item `Self::inner` --> rust/kernel/devres.rs:106:7 | 106 | /// [`Self::inner`] is guaranteed to be initialized and is always accessed read-only. | ^^^^^^^^^^^ this item is private | = note: this link will resolve properly if you pass `--document-private-items` = note: `-D rustdoc::private-intra-doc-links` implied by `-D warnings` = help: to override `-D warnings` add `#[allow(rustdoc::private_intra_doc_links)]` Currently, when rendered, the link points to "nowhere" (an inexistent anchor for a "method"). Thus fix it. Cc: stable@vger.kernel.org Fixes: f5d3ef25d238 ("rust: devres: get rid of Devres' inner Arc") Acked-by: Danilo Krummrich Link: https://patch.msgid.link/20251029071406.324511-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/kernel/devres.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index 10a6a17898541..2392c281459ef 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -103,7 +103,7 @@ struct Inner { /// /// # Invariants /// -/// [`Self::inner`] is guaranteed to be initialized and is always accessed read-only. +/// `Self::inner` is guaranteed to be initialized and is always accessed read-only. #[pin_data(PinnedDrop)] pub struct Devres { dev: ARef, From 09b1704f5b02c18dd02b21343530463fcfc92c54 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Wed, 29 Oct 2025 08:33:44 +0100 Subject: [PATCH 463/867] rust: condvar: fix broken intra-doc link The future move of pin-init to `syn` uncovers the following broken intra-doc link: error: unresolved link to `crate::pin_init` --> rust/kernel/sync/condvar.rs:39:40 | 39 | /// instances is with the [`pin_init`](crate::pin_init!) and [`new_condvar`] macros. | ^^^^^^^^^^^^^^^^ no item named `pin_init` in module `kernel` | = note: `-D rustdoc::broken-intra-doc-links` implied by `-D warnings` = help: to override `-D warnings` add `#[allow(rustdoc::broken_intra_doc_links)]` Currently, when rendered, the link points to a literal `crate::pin_init!` URL. Thus fix it. Cc: stable@vger.kernel.org Fixes: 129e97be8e28 ("rust: pin-init: fix documentation links") Reviewed-by: Alice Ryhl Link: https://patch.msgid.link/20251029073344.349341-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/kernel/sync/condvar.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/sync/condvar.rs b/rust/kernel/sync/condvar.rs index c6ec64295c9ff..aa5b9a7a726dd 100644 --- a/rust/kernel/sync/condvar.rs +++ b/rust/kernel/sync/condvar.rs @@ -36,7 +36,7 @@ pub use new_condvar; /// spuriously. /// /// Instances of [`CondVar`] need a lock class and to be pinned. The recommended way to create such -/// instances is with the [`pin_init`](crate::pin_init!) and [`new_condvar`] macros. +/// instances is with the [`pin_init`](pin_init::pin_init!) and [`new_condvar`] macros. /// /// # Examples /// From 16c43a56b79e2c3220b043236369a129d508c65a Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 2 Nov 2025 22:28:52 +0100 Subject: [PATCH 464/867] rust: kbuild: treat `build_error` and `rustdoc` as kernel objects Even if normally `build_error` isn't a kernel object, it should still be treated as such so that we pass the same flags. Similarly, `rustdoc` targets are never kernel objects, but we need to treat them as such. Otherwise, starting with Rust 1.91.0 (released 2025-10-30), `rustc` will complain about missing sanitizer flags since `-Zsanitizer` is a target modifier too [1]: error: mixing `-Zsanitizer` will cause an ABI mismatch in crate `build_error` --> rust/build_error.rs:3:1 | 3 | //! Build-time error. | ^ | = help: the `-Zsanitizer` flag modifies the ABI so Rust crates compiled with different values of this flag cannot be used together safely = note: unset `-Zsanitizer` in this crate is incompatible with `-Zsanitizer=kernel-address` in dependency `core` = help: set `-Zsanitizer=kernel-address` in this crate or unset `-Zsanitizer` in `core` = help: if you are sure this will not cause problems, you may use `-Cunsafe-allow-abi-mismatch=sanitizer` to silence this error Thus explicitly mark them as kernel objects. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Link: https://github.com/rust-lang/rust/pull/138736 [1] Reviewed-by: Alice Ryhl Tested-by: Justin M. Forbes Link: https://patch.msgid.link/20251102212853.1505384-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/Makefile | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/rust/Makefile b/rust/Makefile index 23c7ae905bd2f..a9fb9354b6593 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -127,9 +127,14 @@ rustdoc-core: private rustc_target_flags = --edition=$(core-edition) $(core-cfgs rustdoc-core: $(RUST_LIB_SRC)/core/src/lib.rs rustdoc-clean FORCE +$(call if_changed,rustdoc) +# Even if `rustdoc` targets are not kernel objects, they should still be +# treated as such so that we pass the same flags. Otherwise, for instance, +# `rustdoc` will complain about missing sanitizer flags causing an ABI mismatch. +rustdoc-compiler_builtins: private is-kernel-object := y rustdoc-compiler_builtins: $(src)/compiler_builtins.rs rustdoc-core FORCE +$(call if_changed,rustdoc) +rustdoc-ffi: private is-kernel-object := y rustdoc-ffi: $(src)/ffi.rs rustdoc-core FORCE +$(call if_changed,rustdoc) @@ -147,6 +152,7 @@ rustdoc-pin_init: $(src)/pin-init/src/lib.rs rustdoc-pin_init_internal \ rustdoc-macros FORCE +$(call if_changed,rustdoc) +rustdoc-kernel: private is-kernel-object := y rustdoc-kernel: private rustc_target_flags = --extern ffi --extern pin_init \ --extern build_error --extern macros \ --extern bindings --extern uapi @@ -522,6 +528,10 @@ $(obj)/pin_init.o: $(src)/pin-init/src/lib.rs $(obj)/compiler_builtins.o \ $(obj)/$(libpin_init_internal_name) $(obj)/$(libmacros_name) FORCE +$(call if_changed_rule,rustc_library) +# Even if normally `build_error` is not a kernel object, it should still be +# treated as such so that we pass the same flags. Otherwise, for instance, +# `rustc` will complain about missing sanitizer flags causing an ABI mismatch. +$(obj)/build_error.o: private is-kernel-object := y $(obj)/build_error.o: private skip_gendwarfksyms = 1 $(obj)/build_error.o: $(src)/build_error.rs $(obj)/compiler_builtins.o FORCE +$(call if_changed_rule,rustc_library) From fad472efab0a805dd939f017c5b8669a786a4bcf Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 2 Nov 2025 22:28:53 +0100 Subject: [PATCH 465/867] rust: kbuild: workaround `rustdoc` doctests modifier bug The `rustdoc` modifiers bug [1] was fixed in Rust 1.90.0 [2], for which we added a workaround in commit abbf9a449441 ("rust: workaround `rustdoc` target modifiers bug"). However, `rustdoc`'s doctest generation still has a similar issue [3], being fixed at [4], which does not affect us because we apply the workaround to both, and now, starting with Rust 1.91.0 (released 2025-10-30), `-Zsanitizer` is a target modifier too [5], which means we fail with: RUSTDOC TK rust/kernel/lib.rs error: mixing `-Zsanitizer` will cause an ABI mismatch in crate `kernel` --> rust/kernel/lib.rs:3:1 | 3 | //! The `kernel` crate. | ^ | = help: the `-Zsanitizer` flag modifies the ABI so Rust crates compiled with different values of this flag cannot be used together safely = note: unset `-Zsanitizer` in this crate is incompatible with `-Zsanitizer=kernel-address` in dependency `core` = help: set `-Zsanitizer=kernel-address` in this crate or unset `-Zsanitizer` in `core` = help: if you are sure this will not cause problems, you may use `-Cunsafe-allow-abi-mismatch=sanitizer` to silence this error A simple way around is to add the sanitizer to the list in the existing workaround (especially if we had not started to pass the sanitizer flags in the previous commit, since in that case that would not be necessary). However, that still applies the workaround in more cases than necessary. Instead, only modify the doctests flags to ignore the check for sanitizers, so that it is more local (and thus the compiler keeps checking it for us in the normal `rustdoc` calls). Since the previous commit already treated the `rustdoc` calls as kernel objects, this should allow us in the future to easily remove this workaround when the time comes. By the way, the `-Cunsafe-allow-abi-mismatch` flag overwrites previous ones rather than appending, so it needs to be all done in the same flag. Moreover, unknown modifiers are rejected, and thus we have to gate based on the version too. Finally, `-Zsanitizer-cfi-normalize-integers` is not affected (in Rust 1.91.0), so it is not needed in the workaround for the moment. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Link: https://github.com/rust-lang/rust/issues/144521 [1] Link: https://github.com/rust-lang/rust/pull/144523 [2] Link: https://github.com/rust-lang/rust/issues/146465 [3] Link: https://github.com/rust-lang/rust/pull/148068 [4] Link: https://github.com/rust-lang/rust/pull/138736 [5] Reviewed-by: Alice Ryhl Tested-by: Justin M. Forbes Link: https://patch.msgid.link/20251102212853.1505384-2-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rust/Makefile b/rust/Makefile index a9fb9354b6593..3e545c1a0ff40 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -69,6 +69,9 @@ core-edition := $(if $(call rustc-min-version,108700),2024,2021) # the time being (https://github.com/rust-lang/rust/issues/144521). rustdoc_modifiers_workaround := $(if $(call rustc-min-version,108800),-Cunsafe-allow-abi-mismatch=fixed-x18) +# Similarly, for doctests (https://github.com/rust-lang/rust/issues/146465). +doctests_modifiers_workaround := $(rustdoc_modifiers_workaround)$(if $(call rustc-min-version,109100),$(comma)sanitizer) + # `rustc` recognizes `--remap-path-prefix` since 1.26.0, but `rustdoc` only # since Rust 1.81.0. Moreover, `rustdoc` ICEs on out-of-tree builds since Rust # 1.82.0 (https://github.com/rust-lang/rust/issues/138520). Thus workaround both @@ -236,7 +239,7 @@ quiet_cmd_rustdoc_test_kernel = RUSTDOC TK $< --extern bindings --extern uapi \ --no-run --crate-name kernel -Zunstable-options \ --sysroot=/dev/null \ - $(rustdoc_modifiers_workaround) \ + $(doctests_modifiers_workaround) \ --test-builder $(objtree)/scripts/rustdoc_test_builder \ $< $(rustdoc_test_kernel_quiet); \ $(objtree)/scripts/rustdoc_test_gen From b2b526c2cf57d14ee269e012ed179081871f45a1 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Fri, 31 Oct 2025 09:15:53 -0700 Subject: [PATCH 466/867] net: mdio: Check regmap pointer returned by device_node_to_regmap() The call to device_node_to_regmap() in airoha_mdio_probe() can return an ERR_PTR() if regmap initialization fails. Currently, the driver stores the pointer without validation, which could lead to a crash if it is later dereferenced. Add an IS_ERR() check and return the corresponding error code to make the probe path more robust. Fixes: 67e3ba978361 ("net: mdio: Add MDIO bus controller for Airoha AN7583") Signed-off-by: Alok Tiwari Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251031161607.58581-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/mdio/mdio-airoha.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/mdio/mdio-airoha.c b/drivers/net/mdio/mdio-airoha.c index 1dc9939c8d7d4..52e7475121eaf 100644 --- a/drivers/net/mdio/mdio-airoha.c +++ b/drivers/net/mdio/mdio-airoha.c @@ -219,6 +219,8 @@ static int airoha_mdio_probe(struct platform_device *pdev) priv = bus->priv; priv->base_addr = addr; priv->regmap = device_node_to_regmap(dev->parent->of_node); + if (IS_ERR(priv->regmap)) + return PTR_ERR(priv->regmap); priv->clk = devm_clk_get_enabled(dev, NULL); if (IS_ERR(priv->clk)) From acbf1d0a9aeb1035e74c3750a6b31db0b7b69ed4 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Fri, 31 Oct 2025 04:26:44 -0700 Subject: [PATCH 467/867] hinic3: fix misleading error message in hinic3_open_channel() The error message printed when hinic3_configure() fails incorrectly reports "Failed to init txrxq irq", which does not match the actual operation performed. The hinic3_configure() function sets up various device resources such as MTU and RSS parameters , not IRQ initialization. Update the log to "Failed to configure device resources" to make the message accurate and clearer for debugging. Signed-off-by: Alok Tiwari Reviewed-by: Fan Gong Link: https://patch.msgid.link/20251031112654.46187-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/huawei/hinic3/hinic3_netdev_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_netdev_ops.c b/drivers/net/ethernet/huawei/hinic3/hinic3_netdev_ops.c index 0fa3c79002251..bbf22811a029d 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_netdev_ops.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_netdev_ops.c @@ -304,7 +304,7 @@ static int hinic3_open_channel(struct net_device *netdev) err = hinic3_configure(netdev); if (err) { - netdev_err(netdev, "Failed to init txrxq irq\n"); + netdev_err(netdev, "Failed to configure device resources\n"); goto err_uninit_qps_irq; } From 7ed8b63ddc9a9578eae81f4da32761568a25efad Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sat, 1 Nov 2025 11:39:54 +0100 Subject: [PATCH 468/867] MAINTAINERS: add brcm tag driver to b53 The b53 entry was missing the brcm tag driver, so add it. Reported-by: Jakub Kicinski Link: https://lore.kernel.org/netdev/20251029181216.3f35f8ba@kernel.org/ Signed-off-by: Jonas Gorski Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251101103954.29816-1-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0518f1f4f3b56..92e9cd1a363bd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4818,6 +4818,7 @@ F: drivers/net/dsa/b53/* F: drivers/net/dsa/bcm_sf2* F: include/linux/dsa/brcm.h F: include/linux/platform_data/b53.h +F: net/dsa/tag_brcm.c BROADCOM BCM2711/BCM2835 ARM ARCHITECTURE M: Florian Fainelli From b6a8a5477fe9bd6be2b594a88f82f8bba41e6d54 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sat, 1 Nov 2025 14:28:06 +0100 Subject: [PATCH 469/867] net: dsa: b53: fix resetting speed and pause on forced link There is no guarantee that the port state override registers have their default values, as not all switches support being reset via register or have a reset GPIO. So when forcing port config, we need to make sure to clear all fields, which we currently do not do for the speed and flow control configuration. This can cause flow control stay enabled, or in the case of speed becoming an illegal value, e.g. configured for 1G (0x2), then setting 100M (0x1), results in 0x3 which is invalid. For PORT_OVERRIDE_SPEED_2000M we need to make sure to only clear it on supported chips, as the bit can have different meanings on other chips, e.g. for BCM5389 this controls scanning PHYs for link/speed configuration. Fixes: 5e004460f874 ("net: dsa: b53: Add helper to set link parameters") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251101132807.50419-2-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 2f846381d5a76..cb28256ef3cc3 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1372,6 +1372,10 @@ static void b53_force_port_config(struct b53_device *dev, int port, else reg &= ~PORT_OVERRIDE_FULL_DUPLEX; + reg &= ~(0x3 << GMII_PO_SPEED_S); + if (is5301x(dev) || is58xx(dev)) + reg &= ~PORT_OVERRIDE_SPEED_2000M; + switch (speed) { case 2000: reg |= PORT_OVERRIDE_SPEED_2000M; @@ -1390,6 +1394,11 @@ static void b53_force_port_config(struct b53_device *dev, int port, return; } + if (is5325(dev)) + reg &= ~PORT_OVERRIDE_LP_FLOW_25; + else + reg &= ~(PORT_OVERRIDE_RX_FLOW | PORT_OVERRIDE_TX_FLOW); + if (rx_pause) { if (is5325(dev)) reg |= PORT_OVERRIDE_LP_FLOW_25; From 3e4ebdc1606adf77744cf8ed7a433d279fdc57ba Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sat, 1 Nov 2025 14:28:07 +0100 Subject: [PATCH 470/867] net: dsa: b53: fix bcm63xx RGMII port link adjustment BCM63XX's switch does not support MDIO scanning of external phys, so its MACs needs to be manually configured for autonegotiated link speeds. So b53_force_port_config() and b53_force_link() accordingly also when mode is MLO_AN_PHY for those ports. Fixes lower speeds than 1000/full on rgmii ports 4 - 7. This aligns the behaviour with the old bcm63xx_enetsw driver for those ports. Fixes: 967dd82ffc52 ("net: dsa: b53: Add support for Broadcom RoboSwitch") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251101132807.50419-3-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index cb28256ef3cc3..bb2c6dfa7835d 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1602,8 +1602,11 @@ static void b53_phylink_mac_link_down(struct phylink_config *config, struct b53_device *dev = dp->ds->priv; int port = dp->index; - if (mode == MLO_AN_PHY) + if (mode == MLO_AN_PHY) { + if (is63xx(dev) && in_range(port, B53_63XX_RGMII0, 4)) + b53_force_link(dev, port, false); return; + } if (mode == MLO_AN_FIXED) { b53_force_link(dev, port, false); @@ -1631,6 +1634,13 @@ static void b53_phylink_mac_link_up(struct phylink_config *config, if (mode == MLO_AN_PHY) { /* Re-negotiate EEE if it was enabled already */ p->eee_enabled = b53_eee_init(ds, port, phydev); + + if (is63xx(dev) && in_range(port, B53_63XX_RGMII0, 4)) { + b53_force_port_config(dev, port, speed, duplex, + tx_pause, rx_pause); + b53_force_link(dev, port, true); + } + return; } From c264294624e956a967a9e2e5fa41e3273340b089 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 2 Nov 2025 11:07:56 +0100 Subject: [PATCH 471/867] net: dsa: b53: fix enabling ip multicast In the New Control register bit 1 is either reserved, or has a different function: Out of Range Error Discard When enabled, the ingress port discards any frames if the Length field is between 1500 and 1536 (excluding 1500 and 1536) and with good CRC. The actual bit for enabling IP multicast is bit 0, which was only explicitly enabled for BCM5325 so far. For older switch chips, this bit defaults to 0, so we want to enable it as well, while newer switch chips default to 1, and their documentation says "It is illegal to set this bit to zero." So drop the wrong B53_IPMC_FWD_EN define, enable the IP multicast bit also for other switch chips. While at it, rename it to (B53_)IP_MC as that is how it is called in Broadcom code. Fixes: 63cc54a6f073 ("net: dsa: b53: Fix egress flooding settings") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251102100758.28352-2-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 4 ++-- drivers/net/dsa/b53/b53_regs.h | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index bb2c6dfa7835d..58c31049c0e7a 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -371,11 +371,11 @@ static void b53_set_forwarding(struct b53_device *dev, int enable) * frames should be flooded or not. */ b53_read8(dev, B53_CTRL_PAGE, B53_IP_MULTICAST_CTRL, &mgmt); - mgmt |= B53_UC_FWD_EN | B53_MC_FWD_EN | B53_IPMC_FWD_EN; + mgmt |= B53_UC_FWD_EN | B53_MC_FWD_EN | B53_IP_MC; b53_write8(dev, B53_CTRL_PAGE, B53_IP_MULTICAST_CTRL, mgmt); } else { b53_read8(dev, B53_CTRL_PAGE, B53_IP_MULTICAST_CTRL, &mgmt); - mgmt |= B53_IP_MCAST_25; + mgmt |= B53_IP_MC; b53_write8(dev, B53_CTRL_PAGE, B53_IP_MULTICAST_CTRL, mgmt); } } diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index 309fe0e46dadf..8ce1ce72e9385 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -111,8 +111,7 @@ /* IP Multicast control (8 bit) */ #define B53_IP_MULTICAST_CTRL 0x21 -#define B53_IP_MCAST_25 BIT(0) -#define B53_IPMC_FWD_EN BIT(1) +#define B53_IP_MC BIT(0) #define B53_UC_FWD_EN BIT(6) #define B53_MC_FWD_EN BIT(7) From 0be04b5fa62a82a9929ca261f6c9f64a3d0a28da Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 2 Nov 2025 11:07:57 +0100 Subject: [PATCH 472/867] net: dsa: b53: stop reading ARL entries if search is done The switch clears the ARL_SRCH_STDN bit when the search is done, i.e. it finished traversing the ARL table. This means that there will be no valid result, so we should not attempt to read and process any further entries. We only ever check the validity of the entries for 4 ARL bin chips, and only after having passed the first entry to the b53_fdb_copy(). This means that we always pass an invalid entry at the end to the b53_fdb_copy(). b53_fdb_copy() does check the validity though before passing on the entry, so it never gets passed on. On < 4 ARL bin chips, we will even continue reading invalid entries until we reach the result limit. Fixes: 1da6df85c6fb ("net: dsa: b53: Implement ARL add/del/dump operations") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251102100758.28352-3-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 58c31049c0e7a..b467500699c70 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2037,7 +2037,7 @@ static int b53_arl_search_wait(struct b53_device *dev) do { b53_read8(dev, B53_ARLIO_PAGE, offset, ®); if (!(reg & ARL_SRCH_STDN)) - return 0; + return -ENOENT; if (reg & ARL_SRCH_VLID) return 0; From e57723fe536f040cc2635ec1545dd0a7919a321e Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 2 Nov 2025 11:07:58 +0100 Subject: [PATCH 473/867] net: dsa: b53: properly bound ARL searches for < 4 ARL bin chips When iterating over the ARL table we stop at max ARL entries / 2, but this is only valid if the chip actually returns 2 results at once. For chips with only one result register we will stop before reaching the end of the table if it is more than half full. Fix this by only dividing the maximum results by two if we have a chip with more than one result register (i.e. those with 4 ARL bins). Fixes: cd169d799bee ("net: dsa: b53: Bound check ARL searches") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251102100758.28352-4-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index b467500699c70..eb767edc4c135 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2087,13 +2087,16 @@ static int b53_fdb_copy(int port, const struct b53_arl_entry *ent, int b53_fdb_dump(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, void *data) { + unsigned int count = 0, results_per_hit = 1; struct b53_device *priv = ds->priv; struct b53_arl_entry results[2]; - unsigned int count = 0; u8 offset; int ret; u8 reg; + if (priv->num_arl_bins > 2) + results_per_hit = 2; + mutex_lock(&priv->arl_mutex); if (is5325(priv) || is5365(priv)) @@ -2115,7 +2118,7 @@ int b53_fdb_dump(struct dsa_switch *ds, int port, if (ret) break; - if (priv->num_arl_bins > 2) { + if (results_per_hit == 2) { b53_arl_search_rd(priv, 1, &results[1]); ret = b53_fdb_copy(port, &results[1], cb, data); if (ret) @@ -2125,7 +2128,7 @@ int b53_fdb_dump(struct dsa_switch *ds, int port, break; } - } while (count++ < b53_max_arl_entries(priv) / 2); + } while (count++ < b53_max_arl_entries(priv) / results_per_hit); mutex_unlock(&priv->arl_mutex); From 18aa36238a4d835c1644dcccd63d32c7fdd4b310 Mon Sep 17 00:00:00 2001 From: Jianhui Zhao Date: Sun, 2 Nov 2025 16:26:37 +0100 Subject: [PATCH 474/867] net: phy: realtek: add interrupt support for RTL8221B This commit introduces interrupt support for RTL8221B (C45 mode). Interrupts are mapped on the VEND2 page. VEND2 registers are only accessible via C45 reads and cannot be accessed by C45 over C22. Signed-off-by: Jianhui Zhao [Enable only link state change interrupts] Signed-off-by: Aleksander Jan Bajkowski Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251102152644.1676482-1-olek2@wp.pl Signed-off-by: Jakub Kicinski --- drivers/net/phy/realtek/realtek_main.c | 56 ++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index 1fd4b6cf5c1e1..417f9a88aab66 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -128,6 +128,11 @@ */ #define RTL822X_VND2_C22_REG(reg) (0xa400 + 2 * (reg)) +#define RTL8221B_VND2_INER 0xa4d2 +#define RTL8221B_VND2_INER_LINK_STATUS BIT(4) + +#define RTL8221B_VND2_INSR 0xa4d4 + #define RTL8224_MII_RTCT 0x11 #define RTL8224_MII_RTCT_ENABLE BIT(0) #define RTL8224_MII_RTCT_PAIR_A BIT(4) @@ -1880,6 +1885,53 @@ static irqreturn_t rtl9000a_handle_interrupt(struct phy_device *phydev) return IRQ_HANDLED; } +static int rtl8221b_ack_interrupt(struct phy_device *phydev) +{ + int err; + + err = phy_read_mmd(phydev, MDIO_MMD_VEND2, RTL8221B_VND2_INSR); + + return (err < 0) ? err : 0; +} + +static int rtl8221b_config_intr(struct phy_device *phydev) +{ + int err; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = rtl8221b_ack_interrupt(phydev); + if (err) + return err; + + err = phy_write_mmd(phydev, MDIO_MMD_VEND2, RTL8221B_VND2_INER, + RTL8221B_VND2_INER_LINK_STATUS); + } else { + err = phy_write_mmd(phydev, MDIO_MMD_VEND2, + RTL8221B_VND2_INER, 0); + if (err) + return err; + + err = rtl8221b_ack_interrupt(phydev); + } + + return err; +} + +static irqreturn_t rtl8221b_handle_interrupt(struct phy_device *phydev) +{ + int err; + + err = rtl8221b_ack_interrupt(phydev); + if (err) { + phy_error(phydev); + return IRQ_NONE; + } + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; +} + static struct phy_driver realtek_drvs[] = { { PHY_ID_MATCH_EXACT(0x00008201), @@ -2054,6 +2106,8 @@ static struct phy_driver realtek_drvs[] = { }, { .match_phy_device = rtl8221b_vb_cg_c45_match_phy_device, .name = "RTL8221B-VB-CG 2.5Gbps PHY (C45)", + .config_intr = rtl8221b_config_intr, + .handle_interrupt = rtl8221b_handle_interrupt, .probe = rtl822x_probe, .config_init = rtl822xb_config_init, .get_rate_matching = rtl822xb_get_rate_matching, @@ -2078,6 +2132,8 @@ static struct phy_driver realtek_drvs[] = { }, { .match_phy_device = rtl8221b_vm_cg_c45_match_phy_device, .name = "RTL8221B-VM-CG 2.5Gbps PHY (C45)", + .config_intr = rtl8221b_config_intr, + .handle_interrupt = rtl8221b_handle_interrupt, .probe = rtl822x_probe, .config_init = rtl822xb_config_init, .get_rate_matching = rtl822xb_get_rate_matching, From 22795871edea35dfaf63011782ea77a3f440a655 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 1 Nov 2025 18:34:46 +0000 Subject: [PATCH 475/867] net: dsa: yt921x: Fix spelling mistake "stucked" -> "stuck" There is a spelling mistake in a dev_err message. Fix it. Signed-off-by: Colin Ian King Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251101183446.32134-1-colin.i.king@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/yt921x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/yt921x.c b/drivers/net/dsa/yt921x.c index ab762ffc46611..944988e29127d 100644 --- a/drivers/net/dsa/yt921x.c +++ b/drivers/net/dsa/yt921x.c @@ -1131,7 +1131,7 @@ static int yt921x_fdb_wait(struct yt921x_priv *priv, u32 *valp) res = yt921x_reg_wait(priv, YT921X_FDB_RESULT, YT921X_FDB_RESULT_DONE, &val); if (res) { - dev_err(dev, "FDB probably stucked\n"); + dev_err(dev, "FDB probably stuck\n"); return res; } From c8732e933925e188cbb1d9bc3a5e1ae9affe6869 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Fri, 31 Oct 2025 13:16:28 +0100 Subject: [PATCH 476/867] net: phy: micrel: lan8842 errata Add errata for lan8842. The errata document can be found here [1]. This is fixing the module 2 ("Analog front-end not optimized for PHY-side shorted center taps"). [1] https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/LAN8842-Errata-DS80001172.pdf Fixes: 5a774b64cd6a ("net: phy: micrel: Add support for lan8842") Reviewed-by: Andrew Lunn Signed-off-by: Horatiu Vultur Reviewed-by: Russell King (Oracle) Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 147 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 604b5de0c1581..1fa56d4c17937 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -466,6 +466,12 @@ struct lan8842_priv { u16 rev; }; +struct lanphy_reg_data { + int page; + u16 addr; + u16 val; +}; + static const struct kszphy_type lan8814_type = { .led_mode_reg = ~LAN8814_LED_CTRL_1, .cable_diag_reg = LAN8814_CABLE_DIAG, @@ -2835,6 +2841,13 @@ static int ksz886x_cable_test_get_status(struct phy_device *phydev, */ #define LAN8814_PAGE_PCS_DIGITAL 2 +/** + * LAN8814_PAGE_EEE - Selects Extended Page 3. + * + * This page contains EEE registers + */ +#define LAN8814_PAGE_EEE 3 + /** * LAN8814_PAGE_COMMON_REGS - Selects Extended Page 4. * @@ -2853,6 +2866,13 @@ static int ksz886x_cable_test_get_status(struct phy_device *phydev, */ #define LAN8814_PAGE_PORT_REGS 5 +/** + * LAN8814_PAGE_POWER_REGS - Selects Extended Page 28. + * + * This page contains analog control registers and power mode registers. + */ +#define LAN8814_PAGE_POWER_REGS 28 + /** * LAN8814_PAGE_SYSTEM_CTRL - Selects Extended Page 31. * @@ -5884,6 +5904,128 @@ static int lan8842_probe(struct phy_device *phydev) return 0; } +#define LAN8814_POWER_MGMT_MODE_3_ANEG_MDI 0x13 +#define LAN8814_POWER_MGMT_MODE_4_ANEG_MDIX 0x14 +#define LAN8814_POWER_MGMT_MODE_5_10BT_MDI 0x15 +#define LAN8814_POWER_MGMT_MODE_6_10BT_MDIX 0x16 +#define LAN8814_POWER_MGMT_MODE_7_100BT_TRAIN 0x17 +#define LAN8814_POWER_MGMT_MODE_8_100BT_MDI 0x18 +#define LAN8814_POWER_MGMT_MODE_9_100BT_EEE_MDI_TX 0x19 +#define LAN8814_POWER_MGMT_MODE_10_100BT_EEE_MDI_RX 0x1a +#define LAN8814_POWER_MGMT_MODE_11_100BT_MDIX 0x1b +#define LAN8814_POWER_MGMT_MODE_12_100BT_EEE_MDIX_TX 0x1c +#define LAN8814_POWER_MGMT_MODE_13_100BT_EEE_MDIX_RX 0x1d +#define LAN8814_POWER_MGMT_MODE_14_100BTX_EEE_TX_RX 0x1e + +#define LAN8814_POWER_MGMT_DLLPD_D BIT(0) +#define LAN8814_POWER_MGMT_ADCPD_D BIT(1) +#define LAN8814_POWER_MGMT_PGAPD_D BIT(2) +#define LAN8814_POWER_MGMT_TXPD_D BIT(3) +#define LAN8814_POWER_MGMT_DLLPD_C BIT(4) +#define LAN8814_POWER_MGMT_ADCPD_C BIT(5) +#define LAN8814_POWER_MGMT_PGAPD_C BIT(6) +#define LAN8814_POWER_MGMT_TXPD_C BIT(7) +#define LAN8814_POWER_MGMT_DLLPD_B BIT(8) +#define LAN8814_POWER_MGMT_ADCPD_B BIT(9) +#define LAN8814_POWER_MGMT_PGAPD_B BIT(10) +#define LAN8814_POWER_MGMT_TXPD_B BIT(11) +#define LAN8814_POWER_MGMT_DLLPD_A BIT(12) +#define LAN8814_POWER_MGMT_ADCPD_A BIT(13) +#define LAN8814_POWER_MGMT_PGAPD_A BIT(14) +#define LAN8814_POWER_MGMT_TXPD_A BIT(15) + +#define LAN8814_POWER_MGMT_C_D (LAN8814_POWER_MGMT_DLLPD_D | \ + LAN8814_POWER_MGMT_ADCPD_D | \ + LAN8814_POWER_MGMT_PGAPD_D | \ + LAN8814_POWER_MGMT_DLLPD_C | \ + LAN8814_POWER_MGMT_ADCPD_C | \ + LAN8814_POWER_MGMT_PGAPD_C) + +#define LAN8814_POWER_MGMT_B_C_D (LAN8814_POWER_MGMT_C_D | \ + LAN8814_POWER_MGMT_DLLPD_B | \ + LAN8814_POWER_MGMT_ADCPD_B | \ + LAN8814_POWER_MGMT_PGAPD_B) + +#define LAN8814_POWER_MGMT_VAL1 (LAN8814_POWER_MGMT_C_D | \ + LAN8814_POWER_MGMT_ADCPD_B | \ + LAN8814_POWER_MGMT_PGAPD_B | \ + LAN8814_POWER_MGMT_ADCPD_A | \ + LAN8814_POWER_MGMT_PGAPD_A) + +#define LAN8814_POWER_MGMT_VAL2 LAN8814_POWER_MGMT_C_D + +#define LAN8814_POWER_MGMT_VAL3 (LAN8814_POWER_MGMT_C_D | \ + LAN8814_POWER_MGMT_DLLPD_B | \ + LAN8814_POWER_MGMT_ADCPD_B | \ + LAN8814_POWER_MGMT_PGAPD_A) + +#define LAN8814_POWER_MGMT_VAL4 (LAN8814_POWER_MGMT_B_C_D | \ + LAN8814_POWER_MGMT_ADCPD_A | \ + LAN8814_POWER_MGMT_PGAPD_A) + +#define LAN8814_POWER_MGMT_VAL5 LAN8814_POWER_MGMT_B_C_D + +static const struct lanphy_reg_data short_center_tap_errata[] = { + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_3_ANEG_MDI, + LAN8814_POWER_MGMT_VAL1 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_4_ANEG_MDIX, + LAN8814_POWER_MGMT_VAL1 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_5_10BT_MDI, + LAN8814_POWER_MGMT_VAL1 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_6_10BT_MDIX, + LAN8814_POWER_MGMT_VAL1 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_7_100BT_TRAIN, + LAN8814_POWER_MGMT_VAL2 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_8_100BT_MDI, + LAN8814_POWER_MGMT_VAL3 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_9_100BT_EEE_MDI_TX, + LAN8814_POWER_MGMT_VAL3 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_10_100BT_EEE_MDI_RX, + LAN8814_POWER_MGMT_VAL4 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_11_100BT_MDIX, + LAN8814_POWER_MGMT_VAL5 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_12_100BT_EEE_MDIX_TX, + LAN8814_POWER_MGMT_VAL5 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_13_100BT_EEE_MDIX_RX, + LAN8814_POWER_MGMT_VAL4 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_14_100BTX_EEE_TX_RX, + LAN8814_POWER_MGMT_VAL4 }, +}; + +static int lanphy_write_reg_data(struct phy_device *phydev, + const struct lanphy_reg_data *data, + size_t num) +{ + int ret = 0; + + while (num--) { + ret = lanphy_write_page_reg(phydev, data->page, data->addr, + data->val); + if (ret) + break; + } + + return ret; +} + +static int lan8842_erratas(struct phy_device *phydev) +{ + return lanphy_write_reg_data(phydev, short_center_tap_errata, + ARRAY_SIZE(short_center_tap_errata)); +} + static int lan8842_config_init(struct phy_device *phydev) { int ret; @@ -5896,6 +6038,11 @@ static int lan8842_config_init(struct phy_device *phydev) if (ret < 0) return ret; + /* Apply the erratas for this device */ + ret = lan8842_erratas(phydev); + if (ret < 0) + return ret; + /* Even if the GPIOs are set to control the LEDs the behaviour of the * LEDs is wrong, they are not blinking when there is traffic. * To fix this it is required to set extended LED mode From 65bd9a262644b67490dddd93a2b842ac94ccbe36 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Fri, 31 Oct 2025 13:16:29 +0100 Subject: [PATCH 477/867] net: phy: micrel: lan8842 errata Add errata for lan8842. The errata document can be found here [1]. This is fixing the module 7 ("1000BASE-T PMA EEE TX wake timer is non-compliant") [1] https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/LAN8842-Errata-DS80001172.pdf Fixes: 5a774b64cd6a ("net: phy: micrel: Add support for lan8842") Reviewed-by: Andrew Lunn Signed-off-by: Horatiu Vultur Reviewed-by: Russell King (Oracle) Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 1fa56d4c17937..6a1a424e3b30f 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -5965,6 +5965,9 @@ static int lan8842_probe(struct phy_device *phydev) #define LAN8814_POWER_MGMT_VAL5 LAN8814_POWER_MGMT_B_C_D +#define LAN8814_EEE_WAKE_TX_TIMER 0x0e +#define LAN8814_EEE_WAKE_TX_TIMER_MAX_VAL 0x1f + static const struct lanphy_reg_data short_center_tap_errata[] = { { LAN8814_PAGE_POWER_REGS, LAN8814_POWER_MGMT_MODE_3_ANEG_MDI, @@ -6004,6 +6007,12 @@ static const struct lanphy_reg_data short_center_tap_errata[] = { LAN8814_POWER_MGMT_VAL4 }, }; +static const struct lanphy_reg_data waketx_timer_errata[] = { + { LAN8814_PAGE_EEE, + LAN8814_EEE_WAKE_TX_TIMER, + LAN8814_EEE_WAKE_TX_TIMER_MAX_VAL }, +}; + static int lanphy_write_reg_data(struct phy_device *phydev, const struct lanphy_reg_data *data, size_t num) @@ -6022,8 +6031,15 @@ static int lanphy_write_reg_data(struct phy_device *phydev, static int lan8842_erratas(struct phy_device *phydev) { - return lanphy_write_reg_data(phydev, short_center_tap_errata, + int ret; + + ret = lanphy_write_reg_data(phydev, short_center_tap_errata, ARRAY_SIZE(short_center_tap_errata)); + if (ret) + return ret; + + return lanphy_write_reg_data(phydev, waketx_timer_errata, + ARRAY_SIZE(waketx_timer_errata)); } static int lan8842_config_init(struct phy_device *phydev) From 209ff7af79bf495e6c3d300bf3dea6aeea973bc7 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Thu, 30 Oct 2025 19:24:53 +0100 Subject: [PATCH 478/867] net: stmmac: rename devlink parameter ts_coarse into phc_coarse_adj The devlink param "ts_coarse" doesn't indicate that we get coarse timestamps, but rather that the PHC clock adjusments are coarse as the frequency won't be continuously adjusted. Adjust the devlink parameter name to reflect that. The Coarse terminlogy comes from the dwmac register naming, update the documentation to better explain what the parameter is about. With this change, the parameter can now be adjusted using: devlink dev param set name phc_coarse_adj value true cmode runtime Signed-off-by: Maxime Chevallier Link: https://patch.msgid.link/20251030182454.182406-1-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- Documentation/networking/devlink/stmmac.rst | 21 +++++++++++++------ .../net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/Documentation/networking/devlink/stmmac.rst b/Documentation/networking/devlink/stmmac.rst index e8e33d1c7baf1..47e3ff10bc082 100644 --- a/Documentation/networking/devlink/stmmac.rst +++ b/Documentation/networking/devlink/stmmac.rst @@ -19,13 +19,22 @@ The ``stmmac`` driver implements the following driver-specific parameters. - Type - Mode - Description - * - ``ts_coarse`` + * - ``phc_coarse_adj`` - Boolean - runtime - - Enable the Coarse timestamping mode. In Coarse mode, the ptp clock is - expected to be updated through an external PPS input, but the subsecond - increment used for timestamping is set to 1/ptp_clock_rate. In Fine mode - (i.e. Coarse mode == false), the ptp clock frequency is adjusted more - frequently, but the subsecond increment is set to 2/ptp_clock_rate. + - Enable the Coarse timestamping mode, as defined in the DWMAC TRM. + A detailed explanation of this timestamping mode can be found in the + Socfpga Functionnal Description [1]. + + In Coarse mode, the ptp clock is expected to be fed by a high-precision + clock that is externally adjusted, and the subsecond increment used for + timestamping is set to 1/ptp_clock_rate. + + In Fine mode (i.e. Coarse mode == false), the ptp clock frequency is + continuously adjusted, but the subsecond increment is set to + 2/ptp_clock_rate. + Coarse mode is suitable for PTP Grand Master operation. If unsure, leave the parameter to False. + + [1] https://www.intel.com/content/www/us/en/docs/programmable/683126/21-2/functional-description-of-the-emac.html diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 1e69c1a7dc6c5..c2a783c8022d9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7445,7 +7445,7 @@ static int stmmac_dl_ts_coarse_get(struct devlink *dl, u32 id, } static const struct devlink_param stmmac_devlink_params[] = { - DEVLINK_PARAM_DRIVER(STMMAC_DEVLINK_PARAM_ID_TS_COARSE, "ts_coarse", + DEVLINK_PARAM_DRIVER(STMMAC_DEVLINK_PARAM_ID_TS_COARSE, "phc_coarse_adj", DEVLINK_PARAM_TYPE_BOOL, BIT(DEVLINK_PARAM_CMODE_RUNTIME), stmmac_dl_ts_coarse_get, From 38f50242bf0f237cdc262308d624d333286ec3c5 Mon Sep 17 00:00:00 2001 From: Stefan Wiehler Date: Tue, 28 Oct 2025 17:12:26 +0100 Subject: [PATCH 479/867] sctp: Hold RCU read lock while iterating over address list With CONFIG_PROVE_RCU_LIST=y and by executing $ netcat -l --sctp & $ netcat --sctp localhost & $ ss --sctp one can trigger the following Lockdep-RCU splat(s): WARNING: suspicious RCU usage 6.18.0-rc1-00093-g7f864458e9a6 #5 Not tainted ----------------------------- net/sctp/diag.c:76 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 2 locks held by ss/215: #0: ffff9c740828bec0 (nlk_cb_mutex-SOCK_DIAG){+.+.}-{4:4}, at: __netlink_dump_start+0x84/0x2b0 #1: ffff9c7401d72cd0 (sk_lock-AF_INET6){+.+.}-{0:0}, at: sctp_sock_dump+0x38/0x200 stack backtrace: CPU: 0 UID: 0 PID: 215 Comm: ss Not tainted 6.18.0-rc1-00093-g7f864458e9a6 #5 PREEMPT(voluntary) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x5d/0x90 lockdep_rcu_suspicious.cold+0x4e/0xa3 inet_sctp_diag_fill.isra.0+0x4b1/0x5d0 sctp_sock_dump+0x131/0x200 sctp_transport_traverse_process+0x170/0x1b0 ? __pfx_sctp_sock_filter+0x10/0x10 ? __pfx_sctp_sock_dump+0x10/0x10 sctp_diag_dump+0x103/0x140 __inet_diag_dump+0x70/0xb0 netlink_dump+0x148/0x490 __netlink_dump_start+0x1f3/0x2b0 inet_diag_handler_cmd+0xcd/0x100 ? __pfx_inet_diag_dump_start+0x10/0x10 ? __pfx_inet_diag_dump+0x10/0x10 ? __pfx_inet_diag_dump_done+0x10/0x10 sock_diag_rcv_msg+0x18e/0x320 ? __pfx_sock_diag_rcv_msg+0x10/0x10 netlink_rcv_skb+0x4d/0x100 netlink_unicast+0x1d7/0x2b0 netlink_sendmsg+0x203/0x450 ____sys_sendmsg+0x30c/0x340 ___sys_sendmsg+0x94/0xf0 __sys_sendmsg+0x83/0xf0 do_syscall_64+0xbb/0x390 entry_SYSCALL_64_after_hwframe+0x77/0x7f ... Fixes: 8f840e47f190 ("sctp: add the sctp_diag.c file") Signed-off-by: Stefan Wiehler Reviewed-by: Kuniyuki Iwashima Acked-by: Xin Long Link: https://patch.msgid.link/20251028161506.3294376-2-stefan.wiehler@nokia.com Signed-off-by: Jakub Kicinski --- net/sctp/diag.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 996c2018f0e61..1a8761f87bf16 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -73,19 +73,23 @@ static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb, struct nlattr *attr; void *info = NULL; + rcu_read_lock(); list_for_each_entry_rcu(laddr, address_list, list) addrcnt++; + rcu_read_unlock(); attr = nla_reserve(skb, INET_DIAG_LOCALS, addrlen * addrcnt); if (!attr) return -EMSGSIZE; info = nla_data(attr); + rcu_read_lock(); list_for_each_entry_rcu(laddr, address_list, list) { memcpy(info, &laddr->a, sizeof(laddr->a)); memset(info + sizeof(laddr->a), 0, addrlen - sizeof(laddr->a)); info += addrlen; } + rcu_read_unlock(); return 0; } From 95aef86ab231f047bb8085c70666059b58f53c09 Mon Sep 17 00:00:00 2001 From: Stefan Wiehler Date: Tue, 28 Oct 2025 17:12:27 +0100 Subject: [PATCH 480/867] sctp: Prevent TOCTOU out-of-bounds write For the following path not holding the sock lock, sctp_diag_dump() -> sctp_for_each_endpoint() -> sctp_ep_dump() make sure not to exceed bounds in case the address list has grown between buffer allocation (time-of-check) and write (time-of-use). Suggested-by: Kuniyuki Iwashima Fixes: 8f840e47f190 ("sctp: add the sctp_diag.c file") Signed-off-by: Stefan Wiehler Reviewed-by: Kuniyuki Iwashima Acked-by: Xin Long Link: https://patch.msgid.link/20251028161506.3294376-3-stefan.wiehler@nokia.com Signed-off-by: Jakub Kicinski --- net/sctp/diag.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 1a8761f87bf16..5d64dd99ca9a3 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -88,6 +88,9 @@ static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb, memcpy(info, &laddr->a, sizeof(laddr->a)); memset(info + sizeof(laddr->a), 0, addrlen - sizeof(laddr->a)); info += addrlen; + + if (!--addrcnt) + break; } rcu_read_unlock(); From f1fc201148c7e684c10a72b6a3375597f28d1ef6 Mon Sep 17 00:00:00 2001 From: Stefan Wiehler Date: Tue, 28 Oct 2025 17:12:28 +0100 Subject: [PATCH 481/867] sctp: Hold sock lock while iterating over address list Move address list traversal in inet_assoc_attr_size() under the sock lock to avoid holding the RCU read lock. Suggested-by: Xin Long Fixes: 8f840e47f190 ("sctp: add the sctp_diag.c file") Signed-off-by: Stefan Wiehler Acked-by: Xin Long Link: https://patch.msgid.link/20251028161506.3294376-4-stefan.wiehler@nokia.com Signed-off-by: Jakub Kicinski --- net/sctp/diag.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 5d64dd99ca9a3..2afb376299fe4 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -230,14 +230,15 @@ struct sctp_comm_param { bool net_admin; }; -static size_t inet_assoc_attr_size(struct sctp_association *asoc) +static size_t inet_assoc_attr_size(struct sock *sk, + struct sctp_association *asoc) { int addrlen = sizeof(struct sockaddr_storage); int addrcnt = 0; struct sctp_sockaddr_entry *laddr; list_for_each_entry_rcu(laddr, &asoc->base.bind_addr.address_list, - list) + list, lockdep_sock_is_held(sk)) addrcnt++; return nla_total_size(sizeof(struct sctp_info)) @@ -263,11 +264,14 @@ static int sctp_sock_dump_one(struct sctp_endpoint *ep, struct sctp_transport *t if (err) return err; - rep = nlmsg_new(inet_assoc_attr_size(assoc), GFP_KERNEL); - if (!rep) + lock_sock(sk); + + rep = nlmsg_new(inet_assoc_attr_size(sk, assoc), GFP_KERNEL); + if (!rep) { + release_sock(sk); return -ENOMEM; + } - lock_sock(sk); if (ep != assoc->ep) { err = -EAGAIN; goto out; From d261f5b09c28850dc63ca1d3018596f829f402d5 Mon Sep 17 00:00:00 2001 From: Mohammad Heib Date: Fri, 31 Oct 2025 17:52:02 +0200 Subject: [PATCH 482/867] net: ionic: add dma_wmb() before ringing TX doorbell The TX path currently writes descriptors and then immediately writes to the MMIO doorbell register to notify the NIC. On weakly ordered architectures, descriptor writes may still be pending in CPU or DMA write buffers when the doorbell is issued, leading to the device fetching stale or incomplete descriptors. Add a dma_wmb() in ionic_txq_post() to ensure all descriptor writes are visible to the device before the doorbell MMIO write. Fixes: 0f3154e6bcb3 ("ionic: Add Tx and Rx handling") Signed-off-by: Mohammad Heib Link: https://patch.msgid.link/20251031155203.203031-1-mheib@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index d10b58ebf6034..2e571d0a0d8a2 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -29,6 +29,10 @@ static void ionic_tx_clean(struct ionic_queue *q, static inline void ionic_txq_post(struct ionic_queue *q, bool ring_dbell) { + /* Ensure TX descriptor writes reach memory before NIC reads them. + * Prevents device from fetching stale descriptors. + */ + dma_wmb(); ionic_q_post(q, ring_dbell); } From de0337d641bfa5b6d6b489e479792f1039274e84 Mon Sep 17 00:00:00 2001 From: Mohammad Heib Date: Fri, 31 Oct 2025 17:52:03 +0200 Subject: [PATCH 483/867] net: ionic: map SKB after pseudo-header checksum prep The TSO path called ionic_tx_map_skb() before preparing the TCP pseudo checksum (ionic_tx_tcp_[inner_]pseudo_csum()), which may perform skb_cow_head() and might modifies bytes in the linear header area. Mapping first and then mutating the header risks: - Using a stale DMA address if skb_cow_head() relocates the head, and/or - Device reading stale header bytes on weakly-ordered systems (CPU writes after mapping are not guaranteed visible without an explicit dma_sync_single_for_device()). Reorder the TX path to perform all header mutations (including skb_cow_head()) *before* DMA mapping. Mapping is now done only after the skb layout and header contents are final. This removes the need for any post-mapping dma_sync and prevents on-wire corruption observed under VLAN+TSO load after repeated runs. This change is purely an ordering fix; no functional behavior change otherwise. Fixes: 0f3154e6bcb3 ("ionic: Add Tx and Rx handling") Signed-off-by: Mohammad Heib Reviewed-by: Brett Creeley Link: https://patch.msgid.link/20251031155203.203031-2-mheib@redhat.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/pensando/ionic/ionic_txrx.c | 30 ++++++++----------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 2e571d0a0d8a2..301ebee2fdc50 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -1448,19 +1448,6 @@ static int ionic_tx_tso(struct net_device *netdev, struct ionic_queue *q, bool encap; int err; - desc_info = &q->tx_info[q->head_idx]; - - if (unlikely(ionic_tx_map_skb(q, skb, desc_info))) - return -EIO; - - len = skb->len; - mss = skb_shinfo(skb)->gso_size; - outer_csum = (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | - SKB_GSO_GRE_CSUM | - SKB_GSO_IPXIP4 | - SKB_GSO_IPXIP6 | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_UDP_TUNNEL_CSUM)); has_vlan = !!skb_vlan_tag_present(skb); vlan_tci = skb_vlan_tag_get(skb); encap = skb->encapsulation; @@ -1474,12 +1461,21 @@ static int ionic_tx_tso(struct net_device *netdev, struct ionic_queue *q, err = ionic_tx_tcp_inner_pseudo_csum(skb); else err = ionic_tx_tcp_pseudo_csum(skb); - if (unlikely(err)) { - /* clean up mapping from ionic_tx_map_skb */ - ionic_tx_desc_unmap_bufs(q, desc_info); + if (unlikely(err)) return err; - } + desc_info = &q->tx_info[q->head_idx]; + if (unlikely(ionic_tx_map_skb(q, skb, desc_info))) + return -EIO; + + len = skb->len; + mss = skb_shinfo(skb)->gso_size; + outer_csum = (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | + SKB_GSO_IPXIP4 | + SKB_GSO_IPXIP6 | + SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM)); if (encap) hdrlen = skb_inner_tcp_all_headers(skb); else From 2e25935ed24daee37c4c2e8e29e478ce6e1f72c7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 1 Nov 2025 16:26:42 +0300 Subject: [PATCH 484/867] octeontx2-pf: Fix devm_kcalloc() error checking The devm_kcalloc() function never return error pointers, it returns NULL on failure. Also delete the netdev_err() printk. These allocation functions already have debug output built-in some the extra error message is not required. Fixes: efabce290151 ("octeontx2-pf: AF_XDP zero copy receive support") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/aQYKkrGA12REb2sj@stanley.mountain Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index aff17c37ddde0..902d6abaa3ec1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -1516,10 +1516,8 @@ int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, pool->xdp_cnt = numptrs; pool->xdp = devm_kcalloc(pfvf->dev, numptrs, sizeof(struct xdp_buff *), GFP_KERNEL); - if (IS_ERR(pool->xdp)) { - netdev_err(pfvf->netdev, "Creation of xsk pool failed\n"); - return PTR_ERR(pool->xdp); - } + if (!pool->xdp) + return -ENOMEM; } return 0; From 5556f23478e6eb5d6a0321d4135e2c37a3c78a1e Mon Sep 17 00:00:00 2001 From: Vivian Wang Date: Mon, 3 Nov 2025 10:02:49 +0800 Subject: [PATCH 485/867] net: spacemit: Check netif_running() in emac_set_pauseparam() Currently, emac_set_pauseparam() will oops if userspace calls it while the interface is not up, because phydev is NULL, but it is still accessed in emac_set_fc() and emac_set_fc_autoneg(). Check for netif_running(dev) in emac_set_pauseparam() before proceeding. Fixes: bfec6d7f2001 ("net: spacemit: Add K1 Ethernet MAC") Signed-off-by: Vivian Wang Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251103-k1-ethernet-remove-fc-v3-1-2083770cd282@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/spacemit/k1_emac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/spacemit/k1_emac.c b/drivers/net/ethernet/spacemit/k1_emac.c index e1c5faff3b71c..220eb5ce75833 100644 --- a/drivers/net/ethernet/spacemit/k1_emac.c +++ b/drivers/net/ethernet/spacemit/k1_emac.c @@ -1441,6 +1441,9 @@ static int emac_set_pauseparam(struct net_device *dev, struct emac_priv *priv = netdev_priv(dev); u8 fc = 0; + if (!netif_running(dev)) + return -ENETDOWN; + priv->flow_control_autoneg = pause->autoneg; if (pause->autoneg) { From 59b20b15c112867f28a12a24aa25f14549db02e4 Mon Sep 17 00:00:00 2001 From: Huiwen He Date: Mon, 3 Nov 2025 10:36:19 +0800 Subject: [PATCH 486/867] sctp: make sctp_transport_init() void sctp_transport_init() is static and never returns NULL. It is only called by sctp_transport_new(), so change it to void and remove the redundant return value check. Signed-off-by: Huiwen He Acked-by: Xin Long Link: https://patch.msgid.link/20251103023619.1025622-1-hehuiwen@kylinos.cn Signed-off-by: Jakub Kicinski --- net/sctp/transport.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 4d258a6e8033c..0d48c61fe6ade 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -37,10 +37,10 @@ /* 1st Level Abstractions. */ /* Initialize a new transport from provided memory. */ -static struct sctp_transport *sctp_transport_init(struct net *net, - struct sctp_transport *peer, - const union sctp_addr *addr, - gfp_t gfp) +static void sctp_transport_init(struct net *net, + struct sctp_transport *peer, + const union sctp_addr *addr, + gfp_t gfp) { /* Copy in the address. */ peer->af_specific = sctp_get_af_specific(addr->sa.sa_family); @@ -83,8 +83,6 @@ static struct sctp_transport *sctp_transport_init(struct net *net, get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); refcount_set(&peer->refcnt, 1); - - return peer; } /* Allocate and initialize a new transport. */ @@ -96,20 +94,13 @@ struct sctp_transport *sctp_transport_new(struct net *net, transport = kzalloc(sizeof(*transport), gfp); if (!transport) - goto fail; + return NULL; - if (!sctp_transport_init(net, transport, addr, gfp)) - goto fail_init; + sctp_transport_init(net, transport, addr, gfp); SCTP_DBG_OBJCNT_INC(transport); return transport; - -fail_init: - kfree(transport); - -fail: - return NULL; } /* This transport is no longer needed. Free up if possible, or From e120f46768d98151ece8756ebd688b0e43dc8b29 Mon Sep 17 00:00:00 2001 From: Qendrim Maxhuni Date: Wed, 29 Oct 2025 08:57:44 +0100 Subject: [PATCH 487/867] net: usb: qmi_wwan: initialize MAC header offset in qmimux_rx_fixup Raw IP packets have no MAC header, leaving skb->mac_header uninitialized. This can trigger kernel panics on ARM64 when xfrm or other subsystems access the offset due to strict alignment checks. Initialize the MAC header to prevent such crashes. This can trigger kernel panics on ARM when running IPsec over the qmimux0 interface. Example trace: Internal error: Oops: 000000009600004f [#1] SMP CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.12.34-gbe78e49cb433 #1 Hardware name: LS1028A RDB Board (DT) pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : xfrm_input+0xde8/0x1318 lr : xfrm_input+0x61c/0x1318 sp : ffff800080003b20 Call trace: xfrm_input+0xde8/0x1318 xfrm6_rcv+0x38/0x44 xfrm6_esp_rcv+0x48/0xa8 ip6_protocol_deliver_rcu+0x94/0x4b0 ip6_input_finish+0x44/0x70 ip6_input+0x44/0xc0 ipv6_rcv+0x6c/0x114 __netif_receive_skb_one_core+0x5c/0x8c __netif_receive_skb+0x18/0x60 process_backlog+0x78/0x17c __napi_poll+0x38/0x180 net_rx_action+0x168/0x2f0 Fixes: c6adf77953bc ("net: usb: qmi_wwan: add qmap mux protocol support") Signed-off-by: Qendrim Maxhuni Link: https://patch.msgid.link/20251029075744.105113-1-qendrim.maxhuni@garderos.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 11352d85475ae..3a4985b582cb1 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -192,6 +192,12 @@ static int qmimux_rx_fixup(struct usbnet *dev, struct sk_buff *skb) if (!skbn) return 0; + /* Raw IP packets don't have a MAC header, but other subsystems + * (like xfrm) may still access MAC header offsets, so they must + * be initialized. + */ + skb_reset_mac_header(skbn); + switch (skb->data[offset + qmimux_hdr_sz] & 0xf0) { case 0x40: skbn->protocol = htons(ETH_P_IP); From 2214ca1ff6df1c1faab4fb95f0296b6d9bf6e1ee Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 29 Oct 2025 17:32:53 +0000 Subject: [PATCH 488/867] mpls: Return early in mpls_label_ok(). When mpls_label_ok() returns false, it does not need to update *index. Let's remove is_ok and return early. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Guillaume Nault Link: https://patch.msgid.link/20251029173344.2934622-2-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/mpls/af_mpls.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 25c88cba5c48b..e3533d85d3725 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -940,24 +940,23 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg, static bool mpls_label_ok(struct net *net, unsigned int *index, struct netlink_ext_ack *extack) { - bool is_ok = true; - /* Reserved labels may not be set */ if (*index < MPLS_LABEL_FIRST_UNRESERVED) { NL_SET_ERR_MSG(extack, "Invalid label - must be MPLS_LABEL_FIRST_UNRESERVED or higher"); - is_ok = false; + return false; } /* The full 20 bit range may not be supported. */ - if (is_ok && *index >= net->mpls.platform_labels) { + if (*index >= net->mpls.platform_labels) { NL_SET_ERR_MSG(extack, "Label >= configured maximum in platform_labels"); - is_ok = false; + return false; } *index = array_index_nospec(*index, net->mpls.platform_labels); - return is_ok; + + return true; } static int mpls_route_add(struct mpls_route_config *cfg, From f0914b8436c589b7ab32c614d8d7868eb4ebd5bf Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 29 Oct 2025 17:32:54 +0000 Subject: [PATCH 489/867] mpls: Hold dev refcnt for mpls_nh. MPLS uses RTNL 1) to guarantee the lifetime of struct mpls_nh.nh_dev 2) to protect net->mpls.platform_label , but neither actually requires RTNL. If we do not call dev_put() in find_outdev() and call it just before freeing struct mpls_route, we can drop RTNL for 1). Let's hold the refcnt of mpls_nh.nh_dev and track it with netdevice_tracker. Two notable changes: If mpls_nh_build_multi() fails to set up a neighbour, we need to call netdev_put() for successfully created neighbours in mpls_rt_free_rcu(), so the number of neighbours (rt->rt_nhn) is now updated in each iteration. When a dev is unregistered, mpls_ifdown() clones mpls_route and replaces it with the clone, so the clone requires extra netdev_hold(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Guillaume Nault Link: https://patch.msgid.link/20251029173344.2934622-3-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/mpls/af_mpls.c | 63 +++++++++++++++++++++++++++++++-------------- net/mpls/internal.h | 1 + 2 files changed, 45 insertions(+), 19 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index e3533d85d3725..e7be874668098 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -530,10 +530,23 @@ static struct mpls_route *mpls_rt_alloc(u8 num_nh, u8 max_alen, u8 max_labels) return rt; } +static void mpls_rt_free_rcu(struct rcu_head *head) +{ + struct mpls_route *rt; + + rt = container_of(head, struct mpls_route, rt_rcu); + + change_nexthops(rt) { + netdev_put(nh->nh_dev, &nh->nh_dev_tracker); + } endfor_nexthops(rt); + + kfree(rt); +} + static void mpls_rt_free(struct mpls_route *rt) { if (rt) - kfree_rcu(rt, rt_rcu); + call_rcu(&rt->rt_rcu, mpls_rt_free_rcu); } static void mpls_notify_route(struct net *net, unsigned index, @@ -587,6 +600,7 @@ static unsigned find_free_label(struct net *net) #if IS_ENABLED(CONFIG_INET) static struct net_device *inet_fib_lookup_dev(struct net *net, + struct mpls_nh *nh, const void *addr) { struct net_device *dev; @@ -599,14 +613,14 @@ static struct net_device *inet_fib_lookup_dev(struct net *net, return ERR_CAST(rt); dev = rt->dst.dev; - dev_hold(dev); - + netdev_hold(dev, &nh->nh_dev_tracker, GFP_KERNEL); ip_rt_put(rt); return dev; } #else static struct net_device *inet_fib_lookup_dev(struct net *net, + struct mpls_nh *nh, const void *addr) { return ERR_PTR(-EAFNOSUPPORT); @@ -615,6 +629,7 @@ static struct net_device *inet_fib_lookup_dev(struct net *net, #if IS_ENABLED(CONFIG_IPV6) static struct net_device *inet6_fib_lookup_dev(struct net *net, + struct mpls_nh *nh, const void *addr) { struct net_device *dev; @@ -631,13 +646,14 @@ static struct net_device *inet6_fib_lookup_dev(struct net *net, return ERR_CAST(dst); dev = dst->dev; - dev_hold(dev); + netdev_hold(dev, &nh->nh_dev_tracker, GFP_KERNEL); dst_release(dst); return dev; } #else static struct net_device *inet6_fib_lookup_dev(struct net *net, + struct mpls_nh *nh, const void *addr) { return ERR_PTR(-EAFNOSUPPORT); @@ -653,16 +669,17 @@ static struct net_device *find_outdev(struct net *net, if (!oif) { switch (nh->nh_via_table) { case NEIGH_ARP_TABLE: - dev = inet_fib_lookup_dev(net, mpls_nh_via(rt, nh)); + dev = inet_fib_lookup_dev(net, nh, mpls_nh_via(rt, nh)); break; case NEIGH_ND_TABLE: - dev = inet6_fib_lookup_dev(net, mpls_nh_via(rt, nh)); + dev = inet6_fib_lookup_dev(net, nh, mpls_nh_via(rt, nh)); break; case NEIGH_LINK_TABLE: break; } } else { - dev = dev_get_by_index(net, oif); + dev = netdev_get_by_index(net, oif, + &nh->nh_dev_tracker, GFP_KERNEL); } if (!dev) @@ -671,8 +688,7 @@ static struct net_device *find_outdev(struct net *net, if (IS_ERR(dev)) return dev; - /* The caller is holding rtnl anyways, so release the dev reference */ - dev_put(dev); + nh->nh_dev = dev; return dev; } @@ -686,20 +702,17 @@ static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt, dev = find_outdev(net, rt, nh, oif); if (IS_ERR(dev)) { err = PTR_ERR(dev); - dev = NULL; goto errout; } /* Ensure this is a supported device */ err = -EINVAL; if (!mpls_dev_get(dev)) - goto errout; + goto errout_put; if ((nh->nh_via_table == NEIGH_LINK_TABLE) && (dev->addr_len != nh->nh_via_alen)) - goto errout; - - nh->nh_dev = dev; + goto errout_put; if (!(dev->flags & IFF_UP)) { nh->nh_flags |= RTNH_F_DEAD; @@ -713,6 +726,9 @@ static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt, return 0; +errout_put: + netdev_put(nh->nh_dev, &nh->nh_dev_tracker); + nh->nh_dev = NULL; errout: return err; } @@ -890,7 +906,8 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg, struct nlattr *nla_via, *nla_newdst; int remaining = cfg->rc_mp_len; int err = 0; - u8 nhs = 0; + + rt->rt_nhn = 0; change_nexthops(rt) { int attrlen; @@ -926,11 +943,9 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg, rt->rt_nhn_alive--; rtnh = rtnh_next(rtnh, &remaining); - nhs++; + rt->rt_nhn++; } endfor_nexthops(rt); - rt->rt_nhn = nhs; - return 0; errout: @@ -1523,8 +1538,12 @@ static int mpls_ifdown(struct net_device *dev, int event) change_nexthops(rt) { unsigned int nh_flags = nh->nh_flags; - if (nh->nh_dev != dev) + if (nh->nh_dev != dev) { + if (nh_del) + netdev_hold(nh->nh_dev, &nh->nh_dev_tracker, + GFP_KERNEL); goto next; + } switch (event) { case NETDEV_DOWN: @@ -2518,10 +2537,13 @@ static int resize_platform_label_table(struct net *net, size_t limit) /* In case the predefined labels need to be populated */ if (limit > MPLS_LABEL_IPV4NULL) { struct net_device *lo = net->loopback_dev; + rt0 = mpls_rt_alloc(1, lo->addr_len, 0); if (IS_ERR(rt0)) goto nort0; + rt0->rt_nh->nh_dev = lo; + netdev_hold(lo, &rt0->rt_nh->nh_dev_tracker, GFP_KERNEL); rt0->rt_protocol = RTPROT_KERNEL; rt0->rt_payload_type = MPT_IPV4; rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT; @@ -2532,10 +2554,13 @@ static int resize_platform_label_table(struct net *net, size_t limit) } if (limit > MPLS_LABEL_IPV6NULL) { struct net_device *lo = net->loopback_dev; + rt2 = mpls_rt_alloc(1, lo->addr_len, 0); if (IS_ERR(rt2)) goto nort2; + rt2->rt_nh->nh_dev = lo; + netdev_hold(lo, &rt2->rt_nh->nh_dev_tracker, GFP_KERNEL); rt2->rt_protocol = RTPROT_KERNEL; rt2->rt_payload_type = MPT_IPV6; rt2->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT; diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 83c629529b575..3a5feca27d6af 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -88,6 +88,7 @@ enum mpls_payload_type { struct mpls_nh { /* next hop label forwarding entry */ struct net_device *nh_dev; + netdevice_tracker nh_dev_tracker; /* nh_flags is accessed under RCU in the packet path; it is * modified handling netdev events with rtnl lock held From 451c538ec067e84c1bf1c2b99ebc2b1ca0a09090 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 29 Oct 2025 17:32:55 +0000 Subject: [PATCH 490/867] mpls: Unify return paths in mpls_dev_notify(). We will protect net->mpls.platform_label by a dedicated mutex. Then, we need to wrap functions called from mpls_dev_notify() with the mutex. As a prep, let's unify the return paths. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Guillaume Nault Link: https://patch.msgid.link/20251029173344.2934622-4-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/mpls/af_mpls.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index e7be874668098..c5bbf712f8be0 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1616,22 +1616,24 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, if (event == NETDEV_REGISTER) { mdev = mpls_add_dev(dev); - if (IS_ERR(mdev)) - return notifier_from_errno(PTR_ERR(mdev)); + if (IS_ERR(mdev)) { + err = PTR_ERR(mdev); + goto err; + } - return NOTIFY_OK; + goto out; } mdev = mpls_dev_get(dev); if (!mdev) - return NOTIFY_OK; + goto out; switch (event) { case NETDEV_DOWN: err = mpls_ifdown(dev, event); if (err) - return notifier_from_errno(err); + goto err; break; case NETDEV_UP: flags = netif_get_flags(dev); @@ -1647,13 +1649,14 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, } else { err = mpls_ifdown(dev, event); if (err) - return notifier_from_errno(err); + goto err; } break; case NETDEV_UNREGISTER: err = mpls_ifdown(dev, event); if (err) - return notifier_from_errno(err); + goto err; + mdev = mpls_dev_get(dev); if (mdev) { mpls_dev_sysctl_unregister(dev, mdev); @@ -1667,11 +1670,16 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, mpls_dev_sysctl_unregister(dev, mdev); err = mpls_dev_sysctl_register(dev, mdev); if (err) - return notifier_from_errno(err); + goto err; } break; } + +out: return NOTIFY_OK; + +err: + return notifier_from_errno(err); } static struct notifier_block mpls_dev_notifier = { From d8f9581e1b7f1fe2e1ac985f4ea508d044c90733 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 29 Oct 2025 17:32:56 +0000 Subject: [PATCH 491/867] ipv6: Add in6_dev_rcu(). rcu_dereference_rtnl() does not clearly tell whether the caller is under RCU or RTNL. Let's add in6_dev_rcu() to make it easy to remove __in6_dev_get() in the future. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Guillaume Nault Link: https://patch.msgid.link/20251029173344.2934622-5-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/addrconf.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 9e5e95988b9e5..78e8b877fb257 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -347,6 +347,11 @@ static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev) return rcu_dereference_rtnl(dev->ip6_ptr); } +static inline struct inet6_dev *in6_dev_rcu(const struct net_device *dev) +{ + return rcu_dereference(dev->ip6_ptr); +} + static inline struct inet6_dev *__in6_dev_get_rtnl_net(const struct net_device *dev) { return rtnl_net_dereference(dev_net(dev), dev->ip6_ptr); From bc7ebc569e8cc768342dfb01af1a26c7fbef513e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 29 Oct 2025 17:32:57 +0000 Subject: [PATCH 492/867] mpls: Use in6_dev_rcu() and dev_net_rcu() in mpls_forward() and mpls_xmit(). mpls_forward() and mpls_xmit() are called under RCU. Let's use in6_dev_rcu() and dev_net_rcu() there to annotate as such. Now we pass net to mpls_stats_inc_outucastpkts() not to read dev_net_rcu() twice. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Guillaume Nault Link: https://patch.msgid.link/20251029173344.2934622-6-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/mpls/af_mpls.c | 15 ++++++++------- net/mpls/internal.h | 3 ++- net/mpls/mpls_iptunnel.c | 4 ++-- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index c5bbf712f8be0..efc6c7da5766a 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -129,7 +129,8 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) } EXPORT_SYMBOL_GPL(mpls_pkt_too_big); -void mpls_stats_inc_outucastpkts(struct net_device *dev, +void mpls_stats_inc_outucastpkts(struct net *net, + struct net_device *dev, const struct sk_buff *skb) { struct mpls_dev *mdev; @@ -141,13 +142,13 @@ void mpls_stats_inc_outucastpkts(struct net_device *dev, tx_packets, tx_bytes); } else if (skb->protocol == htons(ETH_P_IP)) { - IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len); + IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len); #if IS_ENABLED(CONFIG_IPV6) } else if (skb->protocol == htons(ETH_P_IPV6)) { - struct inet6_dev *in6dev = __in6_dev_get(dev); + struct inet6_dev *in6dev = in6_dev_rcu(dev); if (in6dev) - IP6_UPD_PO_STATS(dev_net(dev), in6dev, + IP6_UPD_PO_STATS(net, in6dev, IPSTATS_MIB_OUT, skb->len); #endif } @@ -342,7 +343,7 @@ static bool mpls_egress(struct net *net, struct mpls_route *rt, static int mpls_forward(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - struct net *net = dev_net(dev); + struct net *net = dev_net_rcu(dev); struct mpls_shim_hdr *hdr; const struct mpls_nh *nh; struct mpls_route *rt; @@ -434,7 +435,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, dec.ttl -= 1; if (unlikely(!new_header_size && dec.bos)) { /* Penultimate hop popping */ - if (!mpls_egress(dev_net(out_dev), rt, skb, dec)) + if (!mpls_egress(net, rt, skb, dec)) goto err; } else { bool bos; @@ -451,7 +452,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, } } - mpls_stats_inc_outucastpkts(out_dev, skb); + mpls_stats_inc_outucastpkts(net, out_dev, skb); /* If via wasn't specified then send out using device address */ if (nh->nh_via_table == MPLS_NEIGH_TABLE_UNSPEC) diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 3a5feca27d6af..e491427ea08ae 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -197,7 +197,8 @@ int nla_get_labels(const struct nlattr *nla, u8 max_labels, u8 *labels, bool mpls_output_possible(const struct net_device *dev); unsigned int mpls_dev_mtu(const struct net_device *dev); bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu); -void mpls_stats_inc_outucastpkts(struct net_device *dev, +void mpls_stats_inc_outucastpkts(struct net *net, + struct net_device *dev, const struct sk_buff *skb); #endif /* MPLS_INTERNAL_H */ diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 6e73da94af7fb..cfbab7b2fec73 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -53,7 +53,7 @@ static int mpls_xmit(struct sk_buff *skb) /* Find the output device */ out_dev = dst->dev; - net = dev_net(out_dev); + net = dev_net_rcu(out_dev); if (!mpls_output_possible(out_dev) || !dst->lwtstate || skb_warn_if_lro(skb)) @@ -128,7 +128,7 @@ static int mpls_xmit(struct sk_buff *skb) bos = false; } - mpls_stats_inc_outucastpkts(out_dev, skb); + mpls_stats_inc_outucastpkts(net, out_dev, skb); if (rt) { if (rt->rt_gw_family == AF_INET6) From ab061f3347923b6e3aa7731056dc58cbe5044c9f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 29 Oct 2025 17:32:58 +0000 Subject: [PATCH 493/867] mpls: Add mpls_dev_rcu(). mpls_dev_get() uses rcu_dereference_rtnl() to fetch dev->mpls_ptr. We will replace RTNL with a dedicated mutex to protect the field. Then, we will use rcu_dereference_protected() for clarity. Let's add mpls_dev_rcu() for the RCU reader. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Guillaume Nault Link: https://patch.msgid.link/20251029173344.2934622-7-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/mpls/af_mpls.c | 12 ++++++------ net/mpls/internal.h | 5 +++++ net/mpls/mpls_iptunnel.c | 2 +- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index efc6c7da5766a..10130b90c439a 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -136,7 +136,7 @@ void mpls_stats_inc_outucastpkts(struct net *net, struct mpls_dev *mdev; if (skb->protocol == htons(ETH_P_MPLS_UC)) { - mdev = mpls_dev_get(dev); + mdev = mpls_dev_rcu(dev); if (mdev) MPLS_INC_STATS_LEN(mdev, skb->len, tx_packets, @@ -358,7 +358,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, /* Careful this entire function runs inside of an rcu critical section */ - mdev = mpls_dev_get(dev); + mdev = mpls_dev_rcu(dev); if (!mdev) goto drop; @@ -467,7 +467,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, return 0; tx_err: - out_mdev = out_dev ? mpls_dev_get(out_dev) : NULL; + out_mdev = out_dev ? mpls_dev_rcu(out_dev) : NULL; if (out_mdev) MPLS_INC_STATS(out_mdev, tx_errors); goto drop; @@ -1118,7 +1118,7 @@ static int mpls_fill_stats_af(struct sk_buff *skb, struct mpls_dev *mdev; struct nlattr *nla; - mdev = mpls_dev_get(dev); + mdev = mpls_dev_rcu(dev); if (!mdev) return -ENODATA; @@ -1138,7 +1138,7 @@ static size_t mpls_get_stats_af_size(const struct net_device *dev) { struct mpls_dev *mdev; - mdev = mpls_dev_get(dev); + mdev = mpls_dev_rcu(dev); if (!mdev) return 0; @@ -1341,7 +1341,7 @@ static int mpls_netconf_dump_devconf(struct sk_buff *skb, rcu_read_lock(); for_each_netdev_dump(net, dev, ctx->ifindex) { - mdev = mpls_dev_get(dev); + mdev = mpls_dev_rcu(dev); if (!mdev) continue; err = mpls_netconf_fill_devconf(skb, mdev, diff --git a/net/mpls/internal.h b/net/mpls/internal.h index e491427ea08ae..080e820100222 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -185,6 +185,11 @@ static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr * return result; } +static inline struct mpls_dev *mpls_dev_rcu(const struct net_device *dev) +{ + return rcu_dereference(dev->mpls_ptr); +} + static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev) { return rcu_dereference_rtnl(dev->mpls_ptr); diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index cfbab7b2fec73..1a1a0eb5b787f 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -153,7 +153,7 @@ static int mpls_xmit(struct sk_buff *skb) return LWTUNNEL_XMIT_DONE; drop: - out_mdev = out_dev ? mpls_dev_get(out_dev) : NULL; + out_mdev = out_dev ? mpls_dev_rcu(out_dev) : NULL; if (out_mdev) MPLS_INC_STATS(out_mdev, tx_errors); kfree_skb(skb); From 1fb462de9329731c17267c7ccf19619f22790a0a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 29 Oct 2025 17:32:59 +0000 Subject: [PATCH 494/867] mpls: Pass net to mpls_dev_get(). We will replace RTNL with a per-netns mutex to protect dev->mpls_ptr. Then, we will use rcu_dereference_protected() with the lockdep_is_held() annotation, which requires net to access the per-netns mutex. However, dev_net(dev) is not safe without RTNL. Let's pass net to mpls_dev_get(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Guillaume Nault Link: https://patch.msgid.link/20251029173344.2934622-8-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/mpls/af_mpls.c | 11 ++++++----- net/mpls/internal.h | 3 ++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 10130b90c439a..a715b12860e95 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -708,7 +708,7 @@ static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt, /* Ensure this is a supported device */ err = -EINVAL; - if (!mpls_dev_get(dev)) + if (!mpls_dev_get(net, dev)) goto errout_put; if ((nh->nh_via_table == NEIGH_LINK_TABLE) && @@ -1288,7 +1288,7 @@ static int mpls_netconf_get_devconf(struct sk_buff *in_skb, if (!dev) goto errout; - mdev = mpls_dev_get(dev); + mdev = mpls_dev_get(net, dev); if (!mdev) goto errout; @@ -1611,6 +1611,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net *net = dev_net(dev); struct mpls_dev *mdev; unsigned int flags; int err; @@ -1625,7 +1626,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, goto out; } - mdev = mpls_dev_get(dev); + mdev = mpls_dev_get(net, dev); if (!mdev) goto out; @@ -1658,7 +1659,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, if (err) goto err; - mdev = mpls_dev_get(dev); + mdev = mpls_dev_get(net, dev); if (mdev) { mpls_dev_sysctl_unregister(dev, mdev); RCU_INIT_POINTER(dev->mpls_ptr, NULL); @@ -1666,7 +1667,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, } break; case NETDEV_CHANGENAME: - mdev = mpls_dev_get(dev); + mdev = mpls_dev_get(net, dev); if (mdev) { mpls_dev_sysctl_unregister(dev, mdev); err = mpls_dev_sysctl_register(dev, mdev); diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 080e820100222..0df01a5395eea 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -190,7 +190,8 @@ static inline struct mpls_dev *mpls_dev_rcu(const struct net_device *dev) return rcu_dereference(dev->mpls_ptr); } -static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev) +static inline struct mpls_dev *mpls_dev_get(const struct net *net, + const struct net_device *dev) { return rcu_dereference_rtnl(dev->mpls_ptr); } From 73e40539399101667945ed8b8299d0fa67a4fca2 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 29 Oct 2025 17:33:00 +0000 Subject: [PATCH 495/867] mpls: Add mpls_route_input(). mpls_route_input_rcu() is called from mpls_forward() and mpls_getroute(). The former is under RCU, and the latter is under RTNL, so mpls_route_input_rcu() uses rcu_dereference_rtnl(). Let's use rcu_dereference() in mpls_route_input_rcu() and add an RTNL variant for mpls_getroute(). Later, we will remove rtnl_dereference() there. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Guillaume Nault Link: https://patch.msgid.link/20251029173344.2934622-9-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/mpls/af_mpls.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index a715b12860e95..530f7e6f7b3ce 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -75,16 +75,23 @@ static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, struct nlmsghdr *nlh, struct net *net, u32 portid, unsigned int nlm_flags); -static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index) +static struct mpls_route *mpls_route_input(struct net *net, unsigned int index) { - struct mpls_route *rt = NULL; + struct mpls_route __rcu **platform_label; - if (index < net->mpls.platform_labels) { - struct mpls_route __rcu **platform_label = - rcu_dereference_rtnl(net->mpls.platform_label); - rt = rcu_dereference_rtnl(platform_label[index]); - } - return rt; + platform_label = rtnl_dereference(net->mpls.platform_label); + return rtnl_dereference(platform_label[index]); +} + +static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned int index) +{ + struct mpls_route __rcu **platform_label; + + if (index >= net->mpls.platform_labels) + return NULL; + + platform_label = rcu_dereference(net->mpls.platform_label); + return rcu_dereference(platform_label[index]); } bool mpls_output_possible(const struct net_device *dev) @@ -2373,12 +2380,12 @@ static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, u32 portid = NETLINK_CB(in_skb).portid; u32 in_label = LABEL_NOT_SPECIFIED; struct nlattr *tb[RTA_MAX + 1]; + struct mpls_route *rt = NULL; u32 labels[MAX_NEW_LABELS]; struct mpls_shim_hdr *hdr; unsigned int hdr_size = 0; const struct mpls_nh *nh; struct net_device *dev; - struct mpls_route *rt; struct rtmsg *rtm, *r; struct nlmsghdr *nlh; struct sk_buff *skb; @@ -2406,7 +2413,8 @@ static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, } } - rt = mpls_route_input_rcu(net, in_label); + if (in_label < net->mpls.platform_labels) + rt = mpls_route_input(net, in_label); if (!rt) { err = -ENETUNREACH; goto errout; From 3a49629335a523341d3fae895435d5217341a022 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 29 Oct 2025 17:33:01 +0000 Subject: [PATCH 496/867] mpls: Use mpls_route_input() where appropriate. In many places, we uses rtnl_dereference() twice for net->mpls.platform_label and net->mpls.platform_label[index]. Let's replace the code with mpls_route_input(). We do not use mpls_route_input() in mpls_dump_routes() since we will rely on RCU there. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Guillaume Nault Link: https://patch.msgid.link/20251029173344.2934622-10-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/mpls/af_mpls.c | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 530f7e6f7b3ce..35ae3dbd7bdc7 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -590,19 +590,17 @@ static void mpls_route_update(struct net *net, unsigned index, mpls_rt_free(rt); } -static unsigned find_free_label(struct net *net) +static unsigned int find_free_label(struct net *net) { - struct mpls_route __rcu **platform_label; - size_t platform_labels; - unsigned index; + unsigned int index; - platform_label = rtnl_dereference(net->mpls.platform_label); - platform_labels = net->mpls.platform_labels; - for (index = MPLS_LABEL_FIRST_UNRESERVED; index < platform_labels; + for (index = MPLS_LABEL_FIRST_UNRESERVED; + index < net->mpls.platform_labels; index++) { - if (!rtnl_dereference(platform_label[index])) + if (!mpls_route_input(net, index)) return index; } + return LABEL_NOT_SPECIFIED; } @@ -985,7 +983,6 @@ static bool mpls_label_ok(struct net *net, unsigned int *index, static int mpls_route_add(struct mpls_route_config *cfg, struct netlink_ext_ack *extack) { - struct mpls_route __rcu **platform_label; struct net *net = cfg->rc_nlinfo.nl_net; struct mpls_route *rt, *old; int err = -EINVAL; @@ -1013,8 +1010,7 @@ static int mpls_route_add(struct mpls_route_config *cfg, } err = -EEXIST; - platform_label = rtnl_dereference(net->mpls.platform_label); - old = rtnl_dereference(platform_label[index]); + old = mpls_route_input(net, index); if ((cfg->rc_nlflags & NLM_F_EXCL) && old) goto errout; @@ -1503,16 +1499,15 @@ static void mpls_dev_destroy_rcu(struct rcu_head *head) static int mpls_ifdown(struct net_device *dev, int event) { - struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); - unsigned index; + unsigned int index; - platform_label = rtnl_dereference(net->mpls.platform_label); for (index = 0; index < net->mpls.platform_labels; index++) { - struct mpls_route *rt = rtnl_dereference(platform_label[index]); + struct mpls_route *rt; bool nh_del = false; u8 alive = 0; + rt = mpls_route_input(net, index); if (!rt) continue; @@ -1583,15 +1578,14 @@ static int mpls_ifdown(struct net_device *dev, int event) static void mpls_ifup(struct net_device *dev, unsigned int flags) { - struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); - unsigned index; + unsigned int index; u8 alive; - platform_label = rtnl_dereference(net->mpls.platform_label); for (index = 0; index < net->mpls.platform_labels; index++) { - struct mpls_route *rt = rtnl_dereference(platform_label[index]); + struct mpls_route *rt; + rt = mpls_route_input(net, index); if (!rt) continue; From dde1b38e873cff70f0af36e884bbeb1b14a536ed Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 29 Oct 2025 17:33:02 +0000 Subject: [PATCH 497/867] mpls: Convert mpls_dump_routes() to RCU. mpls_dump_routes() sets fib_dump_filter.rtnl_held to true and calls __dev_get_by_index() in mpls_valid_fib_dump_req(). This is the only RTNL dependant in mpls_dump_routes(). Also, synchronize_rcu() in resize_platform_label_table() guarantees that net->mpls.platform_label is alive under RCU. Let's convert mpls_dump_routes() to RCU and use dev_get_by_index_rcu(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Guillaume Nault Link: https://patch.msgid.link/20251029173344.2934622-11-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/mpls/af_mpls.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 35ae3dbd7bdc7..f00f75c137dc4 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -2153,7 +2153,7 @@ static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, if (i == RTA_OIF) { ifindex = nla_get_u32(tb[i]); - filter->dev = __dev_get_by_index(net, ifindex); + filter->dev = dev_get_by_index_rcu(net, ifindex); if (!filter->dev) return -ENODEV; filter->filter_set = 1; @@ -2191,20 +2191,19 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) struct net *net = sock_net(skb->sk); struct mpls_route __rcu **platform_label; struct fib_dump_filter filter = { - .rtnl_held = true, + .rtnl_held = false, }; unsigned int flags = NLM_F_MULTI; size_t platform_labels; unsigned int index; + int err; - ASSERT_RTNL(); + rcu_read_lock(); if (cb->strict_check) { - int err; - err = mpls_valid_fib_dump_req(net, nlh, &filter, cb); if (err < 0) - return err; + goto err; /* for MPLS, there is only 1 table with fixed type and flags. * If either are set in the filter then return nothing. @@ -2212,14 +2211,14 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) if ((filter.table_id && filter.table_id != RT_TABLE_MAIN) || (filter.rt_type && filter.rt_type != RTN_UNICAST) || filter.flags) - return skb->len; + goto unlock; } index = cb->args[0]; if (index < MPLS_LABEL_FIRST_UNRESERVED) index = MPLS_LABEL_FIRST_UNRESERVED; - platform_label = rtnl_dereference(net->mpls.platform_label); + platform_label = rcu_dereference(net->mpls.platform_label); platform_labels = net->mpls.platform_labels; if (filter.filter_set) @@ -2228,7 +2227,7 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) for (; index < platform_labels; index++) { struct mpls_route *rt; - rt = rtnl_dereference(platform_label[index]); + rt = rcu_dereference(platform_label[index]); if (!rt) continue; @@ -2243,7 +2242,13 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) } cb->args[0] = index; +unlock: + rcu_read_unlock(); return skb->len; + +err: + rcu_read_unlock(); + return err; } static inline size_t lfib_nlmsg_size(struct mpls_route *rt) @@ -2767,7 +2772,8 @@ static struct rtnl_af_ops mpls_af_ops __read_mostly = { static const struct rtnl_msg_handler mpls_rtnl_msg_handlers[] __initdata_or_module = { {THIS_MODULE, PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, 0}, {THIS_MODULE, PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, 0}, - {THIS_MODULE, PF_MPLS, RTM_GETROUTE, mpls_getroute, mpls_dump_routes, 0}, + {THIS_MODULE, PF_MPLS, RTM_GETROUTE, mpls_getroute, mpls_dump_routes, + RTNL_FLAG_DUMP_UNLOCKED}, {THIS_MODULE, PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf, mpls_netconf_dump_devconf, RTNL_FLAG_DUMP_UNLOCKED}, From fb2b77b9b1dbd90cf7db5580b15df40a20b42bd8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 29 Oct 2025 17:33:03 +0000 Subject: [PATCH 498/867] mpls: Convert RTM_GETNETCONF to RCU. mpls_netconf_get_devconf() calls __dev_get_by_index(), and this only depends on RTNL. Let's convert mpls_netconf_get_devconf() to RCU and use dev_get_by_index_rcu(). Note that nlmsg_new() is moved ahead to use GFP_KERNEL. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Guillaume Nault Link: https://patch.msgid.link/20251029173344.2934622-12-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/mpls/af_mpls.c | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index f00f75c137dc4..49fd15232dbec 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1282,23 +1282,32 @@ static int mpls_netconf_get_devconf(struct sk_buff *in_skb, if (err < 0) goto errout; - err = -EINVAL; - if (!tb[NETCONFA_IFINDEX]) + if (!tb[NETCONFA_IFINDEX]) { + err = -EINVAL; goto errout; + } ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]); - dev = __dev_get_by_index(net, ifindex); - if (!dev) - goto errout; - - mdev = mpls_dev_get(net, dev); - if (!mdev) - goto errout; - err = -ENOBUFS; skb = nlmsg_new(mpls_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL); - if (!skb) + if (!skb) { + err = -ENOBUFS; goto errout; + } + + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, ifindex); + if (!dev) { + err = -EINVAL; + goto errout_unlock; + } + + mdev = mpls_dev_rcu(dev); + if (!mdev) { + err = -EINVAL; + goto errout_unlock; + } err = mpls_netconf_fill_devconf(skb, mdev, NETLINK_CB(in_skb).portid, @@ -1307,12 +1316,19 @@ static int mpls_netconf_get_devconf(struct sk_buff *in_skb, if (err < 0) { /* -EMSGSIZE implies BUG in mpls_netconf_msgsize_devconf() */ WARN_ON(err == -EMSGSIZE); - kfree_skb(skb); - goto errout; + goto errout_unlock; } + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); + + rcu_read_unlock(); errout: return err; + +errout_unlock: + rcu_read_unlock(); + kfree_skb(skb); + goto errout; } static int mpls_netconf_dump_devconf(struct sk_buff *skb, @@ -2776,7 +2792,7 @@ static const struct rtnl_msg_handler mpls_rtnl_msg_handlers[] __initdata_or_modu RTNL_FLAG_DUMP_UNLOCKED}, {THIS_MODULE, PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf, mpls_netconf_dump_devconf, - RTNL_FLAG_DUMP_UNLOCKED}, + RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, }; static int __init mpls_init(void) From e833eb25161aae6cd0caf14782f405d0ed5765ed Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 29 Oct 2025 17:33:04 +0000 Subject: [PATCH 499/867] mpls: Protect net->mpls.platform_label with a per-netns mutex. MPLS (re)uses RTNL to protect net->mpls.platform_label, but the lock does not need to be RTNL at all. Let's protect net->mpls.platform_label with a dedicated per-netns mutex. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Guillaume Nault Link: https://patch.msgid.link/20251029173344.2934622-13-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/netns/mpls.h | 1 + net/mpls/af_mpls.c | 55 ++++++++++++++++++++++++++-------------- net/mpls/internal.h | 7 ++++- 3 files changed, 43 insertions(+), 20 deletions(-) diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h index 19ad2574b267c..6682e51513efa 100644 --- a/include/net/netns/mpls.h +++ b/include/net/netns/mpls.h @@ -16,6 +16,7 @@ struct netns_mpls { int default_ttl; size_t platform_labels; struct mpls_route __rcu * __rcu *platform_label; + struct mutex platform_mutex; struct ctl_table_header *ctl; }; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 49fd15232dbec..d0d047dd2245f 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -79,8 +79,8 @@ static struct mpls_route *mpls_route_input(struct net *net, unsigned int index) { struct mpls_route __rcu **platform_label; - platform_label = rtnl_dereference(net->mpls.platform_label); - return rtnl_dereference(platform_label[index]); + platform_label = mpls_dereference(net, net->mpls.platform_label); + return mpls_dereference(net, platform_label[index]); } static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned int index) @@ -578,10 +578,8 @@ static void mpls_route_update(struct net *net, unsigned index, struct mpls_route __rcu **platform_label; struct mpls_route *rt; - ASSERT_RTNL(); - - platform_label = rtnl_dereference(net->mpls.platform_label); - rt = rtnl_dereference(platform_label[index]); + platform_label = mpls_dereference(net, net->mpls.platform_label); + rt = mpls_dereference(net, platform_label[index]); rcu_assign_pointer(platform_label[index], new); mpls_notify_route(net, index, rt, new, info); @@ -1472,8 +1470,6 @@ static struct mpls_dev *mpls_add_dev(struct net_device *dev) int err = -ENOMEM; int i; - ASSERT_RTNL(); - mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); if (!mdev) return ERR_PTR(err); @@ -1633,6 +1629,8 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, unsigned int flags; int err; + mutex_lock(&net->mpls.platform_mutex); + if (event == NETDEV_REGISTER) { mdev = mpls_add_dev(dev); if (IS_ERR(mdev)) { @@ -1695,9 +1693,11 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, } out: + mutex_unlock(&net->mpls.platform_mutex); return NOTIFY_OK; err: + mutex_unlock(&net->mpls.platform_mutex); return notifier_from_errno(err); } @@ -1973,6 +1973,7 @@ static int rtm_to_route_config(struct sk_buff *skb, static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + struct net *net = sock_net(skb->sk); struct mpls_route_config *cfg; int err; @@ -1984,7 +1985,9 @@ static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) goto out; + mutex_lock(&net->mpls.platform_mutex); err = mpls_route_del(cfg, extack); + mutex_unlock(&net->mpls.platform_mutex); out: kfree(cfg); @@ -1995,6 +1998,7 @@ static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + struct net *net = sock_net(skb->sk); struct mpls_route_config *cfg; int err; @@ -2006,7 +2010,9 @@ static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) goto out; + mutex_lock(&net->mpls.platform_mutex); err = mpls_route_add(cfg, extack); + mutex_unlock(&net->mpls.platform_mutex); out: kfree(cfg); @@ -2407,6 +2413,8 @@ static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, u8 n_labels; int err; + mutex_lock(&net->mpls.platform_mutex); + err = mpls_valid_getroute_req(in_skb, in_nlh, tb, extack); if (err < 0) goto errout; @@ -2450,7 +2458,8 @@ static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, goto errout_free; } - return rtnl_unicast(skb, net, portid); + err = rtnl_unicast(skb, net, portid); + goto errout; } if (tb[RTA_NEWDST]) { @@ -2542,12 +2551,14 @@ static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, err = rtnl_unicast(skb, net, portid); errout: + mutex_unlock(&net->mpls.platform_mutex); return err; nla_put_failure: nlmsg_cancel(skb, nlh); err = -EMSGSIZE; errout_free: + mutex_unlock(&net->mpls.platform_mutex); kfree_skb(skb); return err; } @@ -2603,9 +2614,10 @@ static int resize_platform_label_table(struct net *net, size_t limit) lo->addr_len); } - rtnl_lock(); + mutex_lock(&net->mpls.platform_mutex); + /* Remember the original table */ - old = rtnl_dereference(net->mpls.platform_label); + old = mpls_dereference(net, net->mpls.platform_label); old_limit = net->mpls.platform_labels; /* Free any labels beyond the new table */ @@ -2636,7 +2648,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) net->mpls.platform_labels = limit; rcu_assign_pointer(net->mpls.platform_label, labels); - rtnl_unlock(); + mutex_unlock(&net->mpls.platform_mutex); mpls_rt_free(rt2); mpls_rt_free(rt0); @@ -2709,12 +2721,13 @@ static const struct ctl_table mpls_table[] = { }, }; -static int mpls_net_init(struct net *net) +static __net_init int mpls_net_init(struct net *net) { size_t table_size = ARRAY_SIZE(mpls_table); struct ctl_table *table; int i; + mutex_init(&net->mpls.platform_mutex); net->mpls.platform_labels = 0; net->mpls.platform_label = NULL; net->mpls.ip_ttl_propagate = 1; @@ -2740,7 +2753,7 @@ static int mpls_net_init(struct net *net) return 0; } -static void mpls_net_exit(struct net *net) +static __net_exit void mpls_net_exit(struct net *net) { struct mpls_route __rcu **platform_label; size_t platform_labels; @@ -2760,16 +2773,20 @@ static void mpls_net_exit(struct net *net) * As such no additional rcu synchronization is necessary when * freeing the platform_label table. */ - rtnl_lock(); - platform_label = rtnl_dereference(net->mpls.platform_label); + mutex_lock(&net->mpls.platform_mutex); + + platform_label = mpls_dereference(net, net->mpls.platform_label); platform_labels = net->mpls.platform_labels; + for (index = 0; index < platform_labels; index++) { - struct mpls_route *rt = rtnl_dereference(platform_label[index]); - RCU_INIT_POINTER(platform_label[index], NULL); + struct mpls_route *rt; + + rt = mpls_dereference(net, platform_label[index]); mpls_notify_route(net, index, rt, NULL, NULL); mpls_rt_free(rt); } - rtnl_unlock(); + + mutex_unlock(&net->mpls.platform_mutex); kvfree(platform_label); } diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 0df01a5395eea..80cb5bbcd9465 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -185,6 +185,11 @@ static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr * return result; } +#define mpls_dereference(net, p) \ + rcu_dereference_protected( \ + (p), \ + lockdep_is_held(&(net)->mpls.platform_mutex)) + static inline struct mpls_dev *mpls_dev_rcu(const struct net_device *dev) { return rcu_dereference(dev->mpls_ptr); @@ -193,7 +198,7 @@ static inline struct mpls_dev *mpls_dev_rcu(const struct net_device *dev) static inline struct mpls_dev *mpls_dev_get(const struct net *net, const struct net_device *dev) { - return rcu_dereference_rtnl(dev->mpls_ptr); + return mpls_dereference(net, dev->mpls_ptr); } int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels, From 7d99a7c6c6a3d0d6456520baa85d58095bf262ee Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 29 Oct 2025 17:33:05 +0000 Subject: [PATCH 500/867] mpls: Drop RTNL for RTM_NEWROUTE, RTM_DELROUTE, and RTM_GETROUTE. RTM_NEWROUTE looks up dev under RCU (ip_route_output(), ipv6_stub->ipv6_dst_lookup_flow(), netdev_get_by_index()), and each neighbour holds the refcnt of its dev. Also, net->mpls.platform_label is protected by a dedicated per-netns mutex. Now, no MPLS code depends on RTNL. Let's drop RTNL for RTM_NEWROUTE, RTM_DELROUTE, and RTM_GETROUTE. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Guillaume Nault Link: https://patch.msgid.link/20251029173344.2934622-14-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/mpls/af_mpls.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index d0d047dd2245f..580aac112dd21 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -2803,10 +2803,12 @@ static struct rtnl_af_ops mpls_af_ops __read_mostly = { }; static const struct rtnl_msg_handler mpls_rtnl_msg_handlers[] __initdata_or_module = { - {THIS_MODULE, PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, 0}, - {THIS_MODULE, PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, 0}, + {THIS_MODULE, PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, + RTNL_FLAG_DOIT_UNLOCKED}, + {THIS_MODULE, PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, + RTNL_FLAG_DOIT_UNLOCKED}, {THIS_MODULE, PF_MPLS, RTM_GETROUTE, mpls_getroute, mpls_dump_routes, - RTNL_FLAG_DUMP_UNLOCKED}, + RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, {THIS_MODULE, PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf, mpls_netconf_dump_devconf, RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, From c18d4b190a46651726c9a952667c74d2deb33c28 Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Tue, 28 Oct 2025 20:30:05 +0000 Subject: [PATCH 501/867] net: Extend NAPI threaded polling to allow kthread based busy polling Add a new state NAPI_STATE_THREADED_BUSY_POLL to the NAPI state enum to enable and disable threaded busy polling. When threaded busy polling is enabled for a NAPI, enable NAPI_STATE_THREADED also. When the threaded NAPI is scheduled, set NAPI_STATE_IN_BUSY_POLL to signal napi_complete_done not to rearm interrupts. Whenever NAPI_STATE_THREADED_BUSY_POLL is unset, the NAPI_STATE_IN_BUSY_POLL will be unset, napi_complete_done unsets the NAPI_STATE_SCHED_THREADED bit also, which in turn will make the kthread go to sleep. Signed-off-by: Samiullah Khawaja Reviewed-by: Willem de Bruijn Acked-by: Martin Karsten Tested-by: Martin Karsten Link: https://patch.msgid.link/20251028203007.575686-2-skhawaja@google.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 5 ++- Documentation/networking/napi.rst | 50 ++++++++++++++++++++- include/linux/netdevice.h | 4 +- include/uapi/linux/netdev.h | 1 + net/core/dev.c | 58 ++++++++++++++++++++----- net/core/dev.h | 3 ++ net/core/netdev-genl-gen.c | 2 +- tools/include/uapi/linux/netdev.h | 1 + 8 files changed, 109 insertions(+), 15 deletions(-) diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index e00d3fa1c152d..10c412b7433f7 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -88,7 +88,7 @@ definitions: - name: napi-threaded type: enum - entries: [disabled, enabled] + entries: [disabled, enabled, busy-poll] attribute-sets: - @@ -291,7 +291,8 @@ attribute-sets: name: threaded doc: Whether the NAPI is configured to operate in threaded polling mode. If this is set to enabled then the NAPI context operates - in threaded polling mode. + in threaded polling mode. If this is set to busy-poll, then the + threaded polling mode also busy polls. type: u32 enum: napi-threaded - diff --git a/Documentation/networking/napi.rst b/Documentation/networking/napi.rst index 7dd60366f4ff3..4e008efebb352 100644 --- a/Documentation/networking/napi.rst +++ b/Documentation/networking/napi.rst @@ -263,7 +263,9 @@ are not well known). Busy polling is enabled by either setting ``SO_BUSY_POLL`` on selected sockets or using the global ``net.core.busy_poll`` and ``net.core.busy_read`` sysctls. An io_uring API for NAPI busy polling -also exists. +also exists. Threaded polling of NAPI also has a mode to busy poll for +packets (:ref:`threaded busy polling`) using the NAPI +processing kthread. epoll-based busy polling ------------------------ @@ -426,6 +428,52 @@ Therefore, setting ``gro_flush_timeout`` and ``napi_defer_hard_irqs`` is the recommended usage, because otherwise setting ``irq-suspend-timeout`` might not have any discernible effect. +.. _threaded_busy_poll: + +Threaded NAPI busy polling +-------------------------- + +Threaded NAPI busy polling extends threaded NAPI and adds support to do +continuous busy polling of the NAPI. This can be useful for forwarding or +AF_XDP applications. + +Threaded NAPI busy polling can be enabled on per NIC queue basis using Netlink. + +For example, using the following script: + +.. code-block:: bash + + $ ynl --family netdev --do napi-set \ + --json='{"id": 66, "threaded": "busy-poll"}' + +The kernel will create a kthread that busy polls on this NAPI. + +The user may elect to set the CPU affinity of this kthread to an unused CPU +core to improve how often the NAPI is polled at the expense of wasted CPU +cycles. Note that this will keep the CPU core busy with 100% usage. + +Once threaded busy polling is enabled for a NAPI, PID of the kthread can be +retrieved using Netlink so the affinity of the kthread can be set up. + +For example, the following script can be used to fetch the PID: + +.. code-block:: bash + + $ ynl --family netdev --do napi-get --json='{"id": 66}' + +This will output something like following, the pid `258` is the PID of the +kthread that is polling this NAPI. + +.. code-block:: bash + + $ {'defer-hard-irqs': 0, + 'gro-flush-timeout': 0, + 'id': 66, + 'ifindex': 2, + 'irq-suspend-timeout': 0, + 'pid': 258, + 'threaded': 'busy-poll'} + .. _threaded: Threaded NAPI diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9c1e5042c5e76..e808071dbb7d3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -423,11 +423,12 @@ enum { NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ NAPI_STATE_LISTED, /* NAPI added to system lists */ NAPI_STATE_NO_BUSY_POLL, /* Do not add in napi_hash, no busy polling */ - NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */ + NAPI_STATE_IN_BUSY_POLL, /* Do not rearm NAPI interrupt */ NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/ NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/ NAPI_STATE_SCHED_THREADED, /* Napi is currently scheduled in threaded mode */ NAPI_STATE_HAS_NOTIFIER, /* Napi has an IRQ notifier */ + NAPI_STATE_THREADED_BUSY_POLL, /* The threaded NAPI poller will busy poll */ }; enum { @@ -442,6 +443,7 @@ enum { NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED), NAPIF_STATE_SCHED_THREADED = BIT(NAPI_STATE_SCHED_THREADED), NAPIF_STATE_HAS_NOTIFIER = BIT(NAPI_STATE_HAS_NOTIFIER), + NAPIF_STATE_THREADED_BUSY_POLL = BIT(NAPI_STATE_THREADED_BUSY_POLL), }; enum gro_result { diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 48eb49aa03d41..048c8de1a130d 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -80,6 +80,7 @@ enum netdev_qstats_scope { enum netdev_napi_threaded { NETDEV_NAPI_THREADED_DISABLED, NETDEV_NAPI_THREADED_ENABLED, + NETDEV_NAPI_THREADED_BUSY_POLL, }; enum { diff --git a/net/core/dev.c b/net/core/dev.c index dccc1176f3c65..2c1de5fb97d93 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7089,7 +7089,8 @@ static void napi_stop_kthread(struct napi_struct *napi) */ if ((val & NAPIF_STATE_SCHED_THREADED) || !(val & NAPIF_STATE_SCHED)) { - new = val & (~NAPIF_STATE_THREADED); + new = val & (~(NAPIF_STATE_THREADED | + NAPIF_STATE_THREADED_BUSY_POLL)); } else { msleep(20); continue; @@ -7113,6 +7114,16 @@ static void napi_stop_kthread(struct napi_struct *napi) napi->thread = NULL; } +static void napi_set_threaded_state(struct napi_struct *napi, + enum netdev_napi_threaded threaded_mode) +{ + bool threaded = threaded_mode != NETDEV_NAPI_THREADED_DISABLED; + bool busy_poll = threaded_mode == NETDEV_NAPI_THREADED_BUSY_POLL; + + assign_bit(NAPI_STATE_THREADED, &napi->state, threaded); + assign_bit(NAPI_STATE_THREADED_BUSY_POLL, &napi->state, busy_poll); +} + int napi_set_threaded(struct napi_struct *napi, enum netdev_napi_threaded threaded) { @@ -7139,7 +7150,7 @@ int napi_set_threaded(struct napi_struct *napi, } else { /* Make sure kthread is created before THREADED bit is set. */ smp_mb__before_atomic(); - assign_bit(NAPI_STATE_THREADED, &napi->state, threaded); + napi_set_threaded_state(napi, threaded); } return 0; @@ -7531,7 +7542,9 @@ void napi_disable_locked(struct napi_struct *n) } new = val | NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC; - new &= ~(NAPIF_STATE_THREADED | NAPIF_STATE_PREFER_BUSY_POLL); + new &= ~(NAPIF_STATE_THREADED | + NAPIF_STATE_THREADED_BUSY_POLL | + NAPIF_STATE_PREFER_BUSY_POLL); } while (!try_cmpxchg(&n->state, &val, new)); hrtimer_cancel(&n->timer); @@ -7743,7 +7756,7 @@ static int napi_thread_wait(struct napi_struct *napi) return -1; } -static void napi_threaded_poll_loop(struct napi_struct *napi) +static void napi_threaded_poll_loop(struct napi_struct *napi, bool busy_poll) { struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; struct softnet_data *sd; @@ -7772,22 +7785,47 @@ static void napi_threaded_poll_loop(struct napi_struct *napi) } skb_defer_free_flush(); bpf_net_ctx_clear(bpf_net_ctx); + + /* When busy poll is enabled, the old packets are not flushed in + * napi_complete_done. So flush them here. + */ + if (busy_poll) + gro_flush_normal(&napi->gro, HZ >= 1000); local_bh_enable(); + /* Call cond_resched here to avoid watchdog warnings. */ + if (repoll || busy_poll) { + rcu_softirq_qs_periodic(last_qs); + cond_resched(); + } + if (!repoll) break; - - rcu_softirq_qs_periodic(last_qs); - cond_resched(); } } static int napi_threaded_poll(void *data) { struct napi_struct *napi = data; + bool want_busy_poll; + bool in_busy_poll; + unsigned long val; + + while (!napi_thread_wait(napi)) { + val = READ_ONCE(napi->state); + + want_busy_poll = val & NAPIF_STATE_THREADED_BUSY_POLL; + in_busy_poll = val & NAPIF_STATE_IN_BUSY_POLL; - while (!napi_thread_wait(napi)) - napi_threaded_poll_loop(napi); + if (unlikely(val & NAPIF_STATE_DISABLE)) + want_busy_poll = false; + + if (want_busy_poll != in_busy_poll) + assign_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state, + want_busy_poll); + + napi_threaded_poll_loop(napi, want_busy_poll); + } return 0; } @@ -13097,7 +13135,7 @@ static void run_backlog_napi(unsigned int cpu) { struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu); - napi_threaded_poll_loop(&sd->backlog); + napi_threaded_poll_loop(&sd->backlog, false); } static void backlog_napi_setup(unsigned int cpu) diff --git a/net/core/dev.h b/net/core/dev.h index 900880e8b5b4b..4d872a79bafbc 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -317,6 +317,9 @@ static inline void napi_set_irq_suspend_timeout(struct napi_struct *n, static inline enum netdev_napi_threaded napi_get_threaded(struct napi_struct *n) { + if (test_bit(NAPI_STATE_THREADED_BUSY_POLL, &n->state)) + return NETDEV_NAPI_THREADED_BUSY_POLL; + if (test_bit(NAPI_STATE_THREADED, &n->state)) return NETDEV_NAPI_THREADED_ENABLED; diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index e9a2a6f26cb7d..ff20435c45d25 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -97,7 +97,7 @@ static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_THREADED [NETDEV_A_NAPI_DEFER_HARD_IRQS] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_napi_defer_hard_irqs_range), [NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT] = { .type = NLA_UINT, }, [NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT] = { .type = NLA_UINT, }, - [NETDEV_A_NAPI_THREADED] = NLA_POLICY_MAX(NLA_U32, 1), + [NETDEV_A_NAPI_THREADED] = NLA_POLICY_MAX(NLA_U32, 2), }; /* NETDEV_CMD_BIND_TX - do */ diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 48eb49aa03d41..048c8de1a130d 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -80,6 +80,7 @@ enum netdev_qstats_scope { enum netdev_napi_threaded { NETDEV_NAPI_THREADED_DISABLED, NETDEV_NAPI_THREADED_ENABLED, + NETDEV_NAPI_THREADED_BUSY_POLL, }; enum { From add3c1324a8912b1abbf7afeb976fb719563f454 Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Tue, 28 Oct 2025 20:30:06 +0000 Subject: [PATCH 502/867] selftests: Add napi threaded busy poll test in `busy_poller` Add testcase to run busy poll test with threaded napi busy poll enabled. Signed-off-by: Samiullah Khawaja Reviewed-by: Willem de Bruijn Acked-by: Martin Karsten Tested-by: Martin Karsten Link: https://patch.msgid.link/20251028203007.575686-3-skhawaja@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/busy_poll_test.sh | 24 ++++++++++++++++++- tools/testing/selftests/net/busy_poller.c | 16 ++++++++++--- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/busy_poll_test.sh b/tools/testing/selftests/net/busy_poll_test.sh index 7d2d40812074f..5ec1c85c1623e 100755 --- a/tools/testing/selftests/net/busy_poll_test.sh +++ b/tools/testing/selftests/net/busy_poll_test.sh @@ -27,6 +27,8 @@ NAPI_DEFER_HARD_IRQS=100 GRO_FLUSH_TIMEOUT=50000 SUSPEND_TIMEOUT=20000000 +NAPI_THREADED_MODE_BUSY_POLL=2 + setup_ns() { set -e @@ -62,6 +64,9 @@ cleanup_ns() test_busypoll() { suspend_value=${1:-0} + napi_threaded_value=${2:-0} + prefer_busy_poll_value=${3:-$PREFER_BUSY_POLL} + tmp_file=$(mktemp) out_file=$(mktemp) @@ -73,10 +78,11 @@ test_busypoll() -b${SERVER_IP} \ -m${MAX_EVENTS} \ -u${BUSY_POLL_USECS} \ - -P${PREFER_BUSY_POLL} \ + -P${prefer_busy_poll_value} \ -g${BUSY_POLL_BUDGET} \ -i${NSIM_SV_IFIDX} \ -s${suspend_value} \ + -t${napi_threaded_value} \ -o${out_file}& wait_local_port_listen nssv ${SERVER_PORT} tcp @@ -109,6 +115,15 @@ test_busypoll_with_suspend() return $? } +test_busypoll_with_napi_threaded() +{ + # Only enable napi threaded poll. Set suspend timeout and prefer busy + # poll to 0. + test_busypoll 0 ${NAPI_THREADED_MODE_BUSY_POLL} 0 + + return $? +} + ### ### Code start ### @@ -154,6 +169,13 @@ if [ $? -ne 0 ]; then exit 1 fi +test_busypoll_with_napi_threaded +if [ $? -ne 0 ]; then + echo "test_busypoll_with_napi_threaded failed" + cleanup_ns + exit 1 +fi + echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL diff --git a/tools/testing/selftests/net/busy_poller.c b/tools/testing/selftests/net/busy_poller.c index 04c7ff577bb89..3a81f9c947950 100644 --- a/tools/testing/selftests/net/busy_poller.c +++ b/tools/testing/selftests/net/busy_poller.c @@ -65,15 +65,16 @@ static uint32_t cfg_busy_poll_usecs; static uint16_t cfg_busy_poll_budget; static uint8_t cfg_prefer_busy_poll; -/* IRQ params */ +/* NAPI params */ static uint32_t cfg_defer_hard_irqs; static uint64_t cfg_gro_flush_timeout; static uint64_t cfg_irq_suspend_timeout; +static enum netdev_napi_threaded cfg_napi_threaded_poll = NETDEV_NAPI_THREADED_DISABLED; static void usage(const char *filepath) { error(1, 0, - "Usage: %s -p -b -m -u -P -g -o -d -r -s -i", + "Usage: %s -p -b -m -u -P -g -o -d -r -s -t -i", filepath); } @@ -86,7 +87,7 @@ static void parse_opts(int argc, char **argv) if (argc <= 1) usage(argv[0]); - while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) { + while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:t:")) != -1) { /* most options take integer values, except o and b, so reduce * code duplication a bit for the common case by calling * strtoull here and leave bounds checking and casting per @@ -168,6 +169,12 @@ static void parse_opts(int argc, char **argv) cfg_ifindex = (int)tmp; break; + case 't': + if (tmp > 2) + error(1, ERANGE, "napi threaded poll value must be 0-2"); + + cfg_napi_threaded_poll = (enum netdev_napi_threaded)tmp; + break; } } @@ -247,6 +254,9 @@ static void setup_queue(void) netdev_napi_set_req_set_irq_suspend_timeout(set_req, cfg_irq_suspend_timeout); + if (cfg_napi_threaded_poll) + netdev_napi_set_req_set_threaded(set_req, cfg_napi_threaded_poll); + if (netdev_napi_set(ys, set_req)) error(1, 0, "can't set NAPI params: %s\n", yerr.msg); From abcf6eef90c6e47efed62a7c233ffc1a6a90797e Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 27 Oct 2025 13:27:58 +0100 Subject: [PATCH 503/867] net: phy: introduce internal API for PHY MSE diagnostics Add the base infrastructure for Mean Square Error (MSE) diagnostics, as proposed by the OPEN Alliance "Advanced diagnostic features for 100BASE-T1 automotive Ethernet PHYs" [1] specification. The OPEN Alliance spec defines only average MSE and average peak MSE over a fixed number of symbols. However, other PHYs, such as the KSZ9131, additionally expose a worst-peak MSE value latched since the last channel capture. This API accounts for such vendor extensions by adding a distinct capability bit and snapshot field. Channel-to-pair mapping is normally straightforward, but in some cases (e.g. 100BASE-TX with MDI-X resolution unknown) the mapping is ambiguous. If hardware does not expose MDI-X status, the exact pair cannot be determined. To avoid returning misleading per-channel data in this case, a LINK selector is defined for aggregate MSE measurements. All investigated devices differ in MSE capabilities, such as sample rate, number of analyzed symbols, and scaling factors. For example, the KSZ9131 uses different scaling for MSE and pMSE. To make this visible to callers, scale limits and timing information are returned via get_mse_capability(). Some PHYs sample very few symbols at high frequency (e.g. 2 us update rate). To cover such cases and allow for future high-speed PHYs with even shorter intervals, the refresh rate is reported as u64 in picoseconds. This patch introduces the internal PHY API for Mean Square Error diagnostics. It defines new kernel-side data types and driver hooks: - struct phy_mse_capability: describes supported metrics, scale limits, update interval, and sampling length. - struct phy_mse_snapshot: holds one correlated measurement set. - New phy_driver ops: `get_mse_capability()` and `get_mse_snapshot()`. These definitions form the core kernel API. No user-visible interfaces are added in this commit. Standardization notes: OPEN Alliance defines presence and interpretation of some metrics but does not fix numeric scales or sampling internals: - SQI (3-bit, 0..7) is mandatory; correlation to SNR/BER is informative (OA 100BASE-T1 TC1 v1.0 6.1.2; OA 1000BASE-T1 TC12 v2.2 6.1.2). - MSE is optional; OA recommends 2^16 symbols and scaling to 0..511, with a worst-case latch since last read (OA 100BASE-T1 TC1 v1.0 6.1.1; OA 1000BASE-T1 TC12 v2.2 6.1.1). Refresh is recommended (~0.8-2.0 ms for 100BASE-T1; ~80-200 us for 1000BASE-T1). Exact scaling/time windows are vendor-specific. - Peak MSE (pMSE) is defined only for 100BASE-T1 as optional, e.g. 128-symbol sliding window with 8-bit range and worst-case latch (OA 100BASE-T1 TC1 v1.0 6.1.3). Therefore this API exposes which measures and selectors a PHY supports, and documents where behavior is standard-referenced vs vendor-specific. [1] Signed-off-by: Oleksij Rempel Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/20251027122801.982364-2-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 206 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) diff --git a/include/linux/phy.h b/include/linux/phy.h index 358dd6f0ff965..e3474f03cbc1e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -903,6 +903,165 @@ struct phy_led { #define to_phy_led(d) container_of(d, struct phy_led, led_cdev) +/* + * PHY_MSE_CAP_* - Bitmask flags for Mean Square Error (MSE) capabilities + * + * These flags describe which MSE metrics and selectors are implemented + * by the PHY for the current link mode. They are used in + * struct phy_mse_capability.supported_caps. + * + * Standardization: + * The OPEN Alliance (OA) defines the presence of MSE/SQI/pMSE but not their + * numeric scaling, update intervals, or aggregation windows. See: + * OA 100BASE-T1 TC1 v1.0, sections 6.1.1-6.1.3 + * OA 1000BASE-T1 TC12 v2.2, sections 6.1.1-6.1.2 + * + * Description of flags: + * + * PHY_MSE_CAP_CHANNEL_A + * Per-pair diagnostics for Channel A are supported. Mapping to the + * physical wire pair may depend on MDI/MDI-X polarity. + * + * PHY_MSE_CAP_CHANNEL_B, _C, _D + * Same as above for channels B-D. + * + * PHY_MSE_CAP_WORST_CHANNEL + * The PHY or driver can identify and report the single worst-performing + * channel without querying each one individually. + * + * PHY_MSE_CAP_LINK + * The PHY provides only a link-wide aggregate measurement or cannot map + * results to a specific pair (for example 100BASE-TX with unknown + * MDI/MDI-X). + * + * PHY_MSE_CAP_AVG + * Average MSE (mean DCQ metric) is supported. For 100/1000BASE-T1 the OA + * recommends 2^16 symbols, scaled 0..511, but the exact scaling is + * vendor-specific. + * + * PHY_MSE_CAP_PEAK + * Peak MSE (current peak within the measurement window) is supported. + * Defined as pMSE for 100BASE-T1; vendor-specific for others. + * + * PHY_MSE_CAP_WORST_PEAK + * Latched worst-case peak MSE since the last read (read-to-clear if + * implemented). Optional in OA 100BASE-T1 TC1 6.1.3. + */ +#define PHY_MSE_CAP_CHANNEL_A BIT(0) +#define PHY_MSE_CAP_CHANNEL_B BIT(1) +#define PHY_MSE_CAP_CHANNEL_C BIT(2) +#define PHY_MSE_CAP_CHANNEL_D BIT(3) +#define PHY_MSE_CAP_WORST_CHANNEL BIT(4) +#define PHY_MSE_CAP_LINK BIT(5) +#define PHY_MSE_CAP_AVG BIT(6) +#define PHY_MSE_CAP_PEAK BIT(7) +#define PHY_MSE_CAP_WORST_PEAK BIT(8) + +/* + * enum phy_mse_channel - Identifiers for selecting MSE measurement channels + * + * PHY_MSE_CHANNEL_A - PHY_MSE_CHANNEL_D + * Select per-pair measurement for the corresponding channel. + * + * PHY_MSE_CHANNEL_WORST + * Select the single worst-performing channel reported by hardware. + * + * PHY_MSE_CHANNEL_LINK + * Select link-wide aggregate data (used when per-pair results are + * unavailable). + */ +enum phy_mse_channel { + PHY_MSE_CHANNEL_A, + PHY_MSE_CHANNEL_B, + PHY_MSE_CHANNEL_C, + PHY_MSE_CHANNEL_D, + PHY_MSE_CHANNEL_WORST, + PHY_MSE_CHANNEL_LINK, +}; + +/** + * struct phy_mse_capability - Capabilities of Mean Square Error (MSE) + * measurement interface + * + * Standardization notes: + * + * - Presence of MSE/SQI/pMSE is defined by OPEN Alliance specs, but numeric + * scaling, refresh/update rate and aggregation windows are not fixed and + * are vendor-/product-specific. (OA 100BASE-T1 TC1 v1.0 6.1.*; + * OA 1000BASE-T1 TC12 v2.2 6.1.*) + * + * - Typical recommendations: 2^16 symbols and 0..511 scaling for MSE; pMSE only + * defined for 100BASE-T1 (sliding window example), others are vendor + * extensions. Drivers must report actual scale/limits here. + * + * Describes the MSE measurement capabilities for the current link mode. These + * properties are dynamic and may change when link settings are modified. + * Callers should re-query this capability after any link state change to + * ensure they have the most up-to-date information. + * + * Callers should only request measurements for channels and types that are + * indicated as supported by the @supported_caps bitmask. If @supported_caps + * is 0, the device provides no MSE diagnostics, and driver operations should + * typically return -EOPNOTSUPP. + * + * Snapshot values for average and peak MSE can be normalized to a 0..1 ratio + * by dividing the raw snapshot by the corresponding @max_average_mse or + * @max_peak_mse value. + * + * @max_average_mse: The maximum value for an average MSE snapshot. This + * defines the scale for the measurement. If the PHY_MSE_CAP_AVG capability is + * supported, this value MUST be greater than 0. (vendor-specific units). + * @max_peak_mse: The maximum value for a peak MSE snapshot. If either + * PHY_MSE_CAP_PEAK or PHY_MSE_CAP_WORST_PEAK is supported, this value MUST + * be greater than 0. (vendor-specific units). + * @refresh_rate_ps: The typical interval, in picoseconds, between hardware + * updates of the MSE values. This is an estimate, and callers should not + * assume synchronous sampling. (vendor-specific units). + * @num_symbols: The number of symbols aggregated per hardware sample to + * calculate the MSE. (vendor-specific units). + * @supported_caps: A bitmask of PHY_MSE_CAP_* values indicating which + * measurement types (e.g., average, peak) and channels + * (e.g., per-pair or link-wide) are supported. + */ +struct phy_mse_capability { + u64 max_average_mse; + u64 max_peak_mse; + u64 refresh_rate_ps; + u64 num_symbols; + u32 supported_caps; +}; + +/** + * struct phy_mse_snapshot - A snapshot of Mean Square Error (MSE) diagnostics + * + * Holds a set of MSE diagnostic values that were all captured from a single + * measurement window. + * + * Values are raw, device-scaled and not normalized. Use struct + * phy_mse_capability to interpret the scale and sampling window. + * + * @average_mse: The average MSE value over the measurement window. + * OPEN Alliance references MSE as a DCQ metric; recommends 2^16 symbols and + * 0..511 scaling. Exact scale and refresh are vendor-specific. + * (100BASE-T1 TC1 v1.0 6.1.1; 1000BASE-T1 TC12 v2.2 6.1.1). + * + * @peak_mse: The peak MSE value observed within the measurement window. + * For 100BASE-T1, "pMSE" is optional and may be implemented via a sliding + * 128-symbol window with periodic capture; not standardized for 1000BASE-T1. + * (100BASE-T1 TC1 v1.0 6.1.3, Table "DCQ.peakMSE"). + * + * @worst_peak_mse: A latched high-water mark of the peak MSE since last read + * (read-to-clear if implemented). OPEN Alliance shows a latched "worst case + * peak MSE" for 100BASE-T1 pMSE; availability/semantics outside that are + * vendor-specific. (100BASE-T1 TC1 v1.0 6.1.3, DCQ.peakMSE high byte; + * 1000BASE-T1 TC12 v2.2 treats DCQ details as vendor-specific.) + */ +struct phy_mse_snapshot { + u64 average_mse; + u64 peak_mse; + u64 worst_peak_mse; +}; + /** * struct phy_driver - Driver structure for a particular PHY type * @@ -1184,6 +1343,53 @@ struct phy_driver { /** @get_sqi_max: Get the maximum signal quality indication */ int (*get_sqi_max)(struct phy_device *dev); + /** + * @get_mse_capability: Get capabilities and scale of MSE measurement + * @dev: PHY device + * @cap: Output (filled on success) + * + * Fill @cap with the PHY's MSE capability for the current + * link mode: scale limits (max_average_mse, max_peak_mse), update + * interval (refresh_rate_ps), sample length (num_symbols) and the + * capability bitmask (supported_caps). + * + * Implementations may defer capability report until hardware has + * converged; in that case they should return -EAGAIN and allow the + * caller to retry later. + * + * Return: 0 on success. On failure, returns a negative errno code, such + * as -EOPNOTSUPP if MSE measurement is not supported by the PHY or in + * the current link mode, or -EAGAIN if the capability information is + * not yet available. + */ + int (*get_mse_capability)(struct phy_device *dev, + struct phy_mse_capability *cap); + + /** + * @get_mse_snapshot: Retrieve a snapshot of MSE diagnostic values + * @dev: PHY device + * @channel: Channel identifier (PHY_MSE_CHANNEL_*) + * @snapshot: Output (filled on success) + * + * Fill @snapshot with a correlated set of MSE values from the most + * recent measurement window. + * + * Callers must validate @channel against supported_caps returned by + * get_mse_capability(). Drivers must not coerce @channel; if the + * requested selector is not implemented by the device or current link + * mode, the operation must fail. + * + * worst_peak_mse is latched and must be treated as read-to-clear. + * + * Return: 0 on success. On failure, returns a negative errno code, such + * as -EOPNOTSUPP if MSE measurement is not supported by the PHY or in + * the current link mode, or -EAGAIN if measurements are not yet + * available. + */ + int (*get_mse_snapshot)(struct phy_device *dev, + enum phy_mse_channel channel, + struct phy_mse_snapshot *snapshot); + /* PLCA RS interface */ /** @get_plca_cfg: Return the current PLCA configuration */ int (*get_plca_cfg)(struct phy_device *dev, From e6e93fb01302e9b7a15d17f3b8a00eff8a601654 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 27 Oct 2025 13:27:59 +0100 Subject: [PATCH 504/867] ethtool: netlink: add ETHTOOL_MSG_MSE_GET and wire up PHY MSE access Introduce the userspace entry point for PHY MSE diagnostics via ethtool netlink. This exposes the core API added previously and returns both capability information and one or more snapshots. Userspace sends ETHTOOL_MSG_MSE_GET. The reply carries: - ETHTOOL_A_MSE_CAPABILITIES: scale limits and timing information - ETHTOOL_A_MSE_CHANNEL_* nests: one or more snapshots (per-channel if available, otherwise WORST, otherwise LINK) Link down returns -ENETDOWN. Changes: - YAML: add attribute sets (mse, mse-capabilities, mse-snapshot) and the mse-get operation - UAPI (generated): add ETHTOOL_A_MSE_* enums and message IDs, ETHTOOL_MSG_MSE_GET/REPLY - ethtool core: add net/ethtool/mse.c implementing the request, register genl op, and hook into ethnl dispatch - docs: document MSE_GET in ethtool-netlink.rst The include/uapi/linux/ethtool_netlink_generated.h is generated from Documentation/netlink/specs/ethtool.yaml. Signed-off-by: Oleksij Rempel Link: https://patch.msgid.link/20251027122801.982364-3-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 86 +++++ Documentation/networking/ethtool-netlink.rst | 64 ++++ .../uapi/linux/ethtool_netlink_generated.h | 35 ++ net/ethtool/Makefile | 2 +- net/ethtool/mse.c | 329 ++++++++++++++++++ net/ethtool/netlink.c | 10 + net/ethtool/netlink.h | 2 + 7 files changed, 527 insertions(+), 1 deletion(-) create mode 100644 net/ethtool/mse.c diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 6a0fb19745138..05d2b6508b59c 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -1823,6 +1823,73 @@ attribute-sets: type: uint enum: pse-event doc: List of events reported by the PSE controller + - + name: mse-capabilities + doc: MSE capabilities attribute set + attr-cnt-name: --ethtool-a-mse-capabilities-cnt + attributes: + - + name: max-average-mse + type: uint + - + name: max-peak-mse + type: uint + - + name: refresh-rate-ps + type: uint + - + name: num-symbols + type: uint + - + name: mse-snapshot + doc: MSE snapshot attribute set + attr-cnt-name: --ethtool-a-mse-snapshot-cnt + attributes: + - + name: average-mse + type: uint + - + name: peak-mse + type: uint + - + name: worst-peak-mse + type: uint + - + name: mse + attr-cnt-name: --ethtool-a-mse-cnt + attributes: + - + name: header + type: nest + nested-attributes: header + - + name: capabilities + type: nest + nested-attributes: mse-capabilities + - + name: channel-a + type: nest + nested-attributes: mse-snapshot + - + name: channel-b + type: nest + nested-attributes: mse-snapshot + - + name: channel-c + type: nest + nested-attributes: mse-snapshot + - + name: channel-d + type: nest + nested-attributes: mse-snapshot + - + name: worst-channel + type: nest + nested-attributes: mse-snapshot + - + name: link + type: nest + nested-attributes: mse-snapshot operations: enum-model: directional @@ -2756,6 +2823,25 @@ operations: attributes: - header - context + - + name: mse-get + doc: Get PHY MSE measurement data and capabilities. + attribute-set: mse + do: &mse-get-op + request: + attributes: + - header + reply: + attributes: + - header + - capabilities + - channel-a + - channel-b + - channel-c + - channel-d + - worst-channel + - link + dump: *mse-get-op mcast-groups: list: diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index b270886c5f5d5..af56c304cef43 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -242,6 +242,7 @@ Userspace to kernel: ``ETHTOOL_MSG_RSS_SET`` set RSS settings ``ETHTOOL_MSG_RSS_CREATE_ACT`` create an additional RSS context ``ETHTOOL_MSG_RSS_DELETE_ACT`` delete an additional RSS context + ``ETHTOOL_MSG_MSE_GET`` get MSE diagnostic data ===================================== ================================= Kernel to userspace: @@ -299,6 +300,7 @@ Kernel to userspace: ``ETHTOOL_MSG_RSS_CREATE_ACT_REPLY`` create an additional RSS context ``ETHTOOL_MSG_RSS_CREATE_NTF`` additional RSS context created ``ETHTOOL_MSG_RSS_DELETE_NTF`` additional RSS context deleted + ``ETHTOOL_MSG_MSE_GET_REPLY`` MSE diagnostic data ======================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -2458,6 +2460,68 @@ Kernel response contents: For a description of each attribute, see ``TSCONFIG_GET``. +MSE_GET +======= + +Retrieves detailed Mean Square Error (MSE) diagnostic information from the PHY. + +Request Contents: + + ==================================== ====== ============================ + ``ETHTOOL_A_MSE_HEADER`` nested request header + ==================================== ====== ============================ + +Kernel Response Contents: + + ==================================== ====== ================================ + ``ETHTOOL_A_MSE_HEADER`` nested reply header + ``ETHTOOL_A_MSE_CAPABILITIES`` nested capability/scale info for MSE + measurements + ``ETHTOOL_A_MSE_CHANNEL_A`` nested snapshot for Channel A + ``ETHTOOL_A_MSE_CHANNEL_B`` nested snapshot for Channel B + ``ETHTOOL_A_MSE_CHANNEL_C`` nested snapshot for Channel C + ``ETHTOOL_A_MSE_CHANNEL_D`` nested snapshot for Channel D + ``ETHTOOL_A_MSE_WORST_CHANNEL`` nested snapshot for worst channel + ``ETHTOOL_A_MSE_LINK`` nested snapshot for link-wide aggregate + ==================================== ====== ================================ + +MSE Capabilities +---------------- + +This nested attribute reports the capability / scaling properties used to +interpret snapshot values. + + ============================================== ====== ========================= + ``ETHTOOL_A_MSE_CAPABILITIES_MAX_AVERAGE_MSE`` uint max avg_mse scale + ``ETHTOOL_A_MSE_CAPABILITIES_MAX_PEAK_MSE`` uint max peak_mse scale + ``ETHTOOL_A_MSE_CAPABILITIES_REFRESH_RATE_PS`` uint sample rate (picoseconds) + ``ETHTOOL_A_MSE_CAPABILITIES_NUM_SYMBOLS`` uint symbols per HW sample + ============================================== ====== ========================= + +The max-average/peak fields are included only if the corresponding metric +is supported by the PHY. Their absence indicates that the metric is not +available. + +See ``struct phy_mse_capability`` kernel documentation in +``include/linux/phy.h``. + +MSE Snapshot +------------ + +Each per-channel nest contains an atomic snapshot of MSE values for that +selector (channel A/B/C/D, worst channel, or link). + + ========================================== ====== =================== + ``ETHTOOL_A_MSE_SNAPSHOT_AVERAGE_MSE`` uint average MSE value + ``ETHTOOL_A_MSE_SNAPSHOT_PEAK_MSE`` uint current peak MSE + ``ETHTOOL_A_MSE_SNAPSHOT_WORST_PEAK_MSE`` uint worst-case peak MSE + ========================================== ====== =================== + +Within each channel nest, only the metrics supported by the PHY will be present. + +See ``struct phy_mse_snapshot`` kernel documentation in +``include/linux/phy.h``. + Request translation =================== diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 0e8ac0d974e20..b71b175df46df 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -803,6 +803,39 @@ enum { ETHTOOL_A_PSE_NTF_MAX = (__ETHTOOL_A_PSE_NTF_CNT - 1) }; +enum { + ETHTOOL_A_MSE_CAPABILITIES_MAX_AVERAGE_MSE = 1, + ETHTOOL_A_MSE_CAPABILITIES_MAX_PEAK_MSE, + ETHTOOL_A_MSE_CAPABILITIES_REFRESH_RATE_PS, + ETHTOOL_A_MSE_CAPABILITIES_NUM_SYMBOLS, + + __ETHTOOL_A_MSE_CAPABILITIES_CNT, + ETHTOOL_A_MSE_CAPABILITIES_MAX = (__ETHTOOL_A_MSE_CAPABILITIES_CNT - 1) +}; + +enum { + ETHTOOL_A_MSE_SNAPSHOT_AVERAGE_MSE = 1, + ETHTOOL_A_MSE_SNAPSHOT_PEAK_MSE, + ETHTOOL_A_MSE_SNAPSHOT_WORST_PEAK_MSE, + + __ETHTOOL_A_MSE_SNAPSHOT_CNT, + ETHTOOL_A_MSE_SNAPSHOT_MAX = (__ETHTOOL_A_MSE_SNAPSHOT_CNT - 1) +}; + +enum { + ETHTOOL_A_MSE_HEADER = 1, + ETHTOOL_A_MSE_CAPABILITIES, + ETHTOOL_A_MSE_CHANNEL_A, + ETHTOOL_A_MSE_CHANNEL_B, + ETHTOOL_A_MSE_CHANNEL_C, + ETHTOOL_A_MSE_CHANNEL_D, + ETHTOOL_A_MSE_WORST_CHANNEL, + ETHTOOL_A_MSE_LINK, + + __ETHTOOL_A_MSE_CNT, + ETHTOOL_A_MSE_MAX = (__ETHTOOL_A_MSE_CNT - 1) +}; + enum { ETHTOOL_MSG_USER_NONE = 0, ETHTOOL_MSG_STRSET_GET = 1, @@ -855,6 +888,7 @@ enum { ETHTOOL_MSG_RSS_SET, ETHTOOL_MSG_RSS_CREATE_ACT, ETHTOOL_MSG_RSS_DELETE_ACT, + ETHTOOL_MSG_MSE_GET, __ETHTOOL_MSG_USER_CNT, ETHTOOL_MSG_USER_MAX = (__ETHTOOL_MSG_USER_CNT - 1) @@ -915,6 +949,7 @@ enum { ETHTOOL_MSG_RSS_CREATE_ACT_REPLY, ETHTOOL_MSG_RSS_CREATE_NTF, ETHTOOL_MSG_RSS_DELETE_NTF, + ETHTOOL_MSG_MSE_GET_REPLY, __ETHTOOL_MSG_KERNEL_CNT, ETHTOOL_MSG_KERNEL_MAX = (__ETHTOOL_MSG_KERNEL_CNT - 1) diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 1e493553b9777..629c10916670e 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -9,4 +9,4 @@ ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \ channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \ tunnels.o fec.o eeprom.o stats.o phc_vclocks.o mm.o \ module.o cmis_fw_update.o cmis_cdb.o pse-pd.o plca.o \ - phy.o tsconfig.o + phy.o tsconfig.o mse.o diff --git a/net/ethtool/mse.c b/net/ethtool/mse.c new file mode 100644 index 0000000000000..6aac004c3ffce --- /dev/null +++ b/net/ethtool/mse.c @@ -0,0 +1,329 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include + +#include "netlink.h" +#include "common.h" + +/* Channels A-D only; WORST and LINK are exclusive alternatives */ +#define PHY_MSE_CHANNEL_COUNT 4 + +struct mse_req_info { + struct ethnl_req_info base; +}; + +struct mse_snapshot_entry { + struct phy_mse_snapshot snapshot; + int channel; +}; + +struct mse_reply_data { + struct ethnl_reply_data base; + struct phy_mse_capability capability; + struct mse_snapshot_entry *snapshots; + unsigned int num_snapshots; +}; + +static struct mse_reply_data * +mse_repdata(const struct ethnl_reply_data *reply_base) +{ + return container_of(reply_base, struct mse_reply_data, base); +} + +const struct nla_policy ethnl_mse_get_policy[] = { + [ETHTOOL_A_MSE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_phy), +}; + +static int get_snapshot_if_supported(struct phy_device *phydev, + struct mse_reply_data *data, + unsigned int *idx, u32 cap_bit, + enum phy_mse_channel channel) +{ + int ret; + + if (data->capability.supported_caps & cap_bit) { + ret = phydev->drv->get_mse_snapshot(phydev, channel, + &data->snapshots[*idx].snapshot); + if (ret) + return ret; + data->snapshots[*idx].channel = channel; + (*idx)++; + } + + return 0; +} + +static int mse_get_channels(struct phy_device *phydev, + struct mse_reply_data *data) +{ + unsigned int i = 0; + int ret; + + if (!data->capability.supported_caps) + return 0; + + data->snapshots = kcalloc(PHY_MSE_CHANNEL_COUNT, + sizeof(*data->snapshots), GFP_KERNEL); + if (!data->snapshots) + return -ENOMEM; + + /* Priority 1: Individual channels */ + ret = get_snapshot_if_supported(phydev, data, &i, PHY_MSE_CAP_CHANNEL_A, + PHY_MSE_CHANNEL_A); + if (ret) + return ret; + ret = get_snapshot_if_supported(phydev, data, &i, PHY_MSE_CAP_CHANNEL_B, + PHY_MSE_CHANNEL_B); + if (ret) + return ret; + ret = get_snapshot_if_supported(phydev, data, &i, PHY_MSE_CAP_CHANNEL_C, + PHY_MSE_CHANNEL_C); + if (ret) + return ret; + ret = get_snapshot_if_supported(phydev, data, &i, PHY_MSE_CAP_CHANNEL_D, + PHY_MSE_CHANNEL_D); + if (ret) + return ret; + + /* If any individual channels were found, we are done. */ + if (i > 0) { + data->num_snapshots = i; + return 0; + } + + /* Priority 2: Worst channel, if no individual channels supported. */ + ret = get_snapshot_if_supported(phydev, data, &i, + PHY_MSE_CAP_WORST_CHANNEL, + PHY_MSE_CHANNEL_WORST); + if (ret) + return ret; + + /* If worst channel was found, we are done. */ + if (i > 0) { + data->num_snapshots = i; + return 0; + } + + /* Priority 3: Link-wide, if nothing else is supported. */ + ret = get_snapshot_if_supported(phydev, data, &i, PHY_MSE_CAP_LINK, + PHY_MSE_CHANNEL_LINK); + if (ret) + return ret; + + data->num_snapshots = i; + return 0; +} + +static int mse_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + const struct genl_info *info) +{ + struct mse_reply_data *data = mse_repdata(reply_base); + struct net_device *dev = reply_base->dev; + struct phy_device *phydev; + int ret; + + phydev = ethnl_req_get_phydev(req_base, info->attrs, + ETHTOOL_A_MSE_HEADER, info->extack); + if (IS_ERR(phydev)) + return PTR_ERR(phydev); + if (!phydev) + return -EOPNOTSUPP; + + ret = ethnl_ops_begin(dev); + if (ret) + return ret; + + mutex_lock(&phydev->lock); + + if (!phydev->drv || !phydev->drv->get_mse_capability || + !phydev->drv->get_mse_snapshot) { + ret = -EOPNOTSUPP; + goto out_unlock; + } + if (!phydev->link) { + ret = -ENETDOWN; + goto out_unlock; + } + + ret = phydev->drv->get_mse_capability(phydev, &data->capability); + if (ret) + goto out_unlock; + + ret = mse_get_channels(phydev, data); + +out_unlock: + mutex_unlock(&phydev->lock); + ethnl_ops_complete(dev); + if (ret) + kfree(data->snapshots); + return ret; +} + +static void mse_cleanup_data(struct ethnl_reply_data *reply_base) +{ + struct mse_reply_data *data = mse_repdata(reply_base); + + kfree(data->snapshots); +} + +static int mse_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct mse_reply_data *data = mse_repdata(reply_base); + size_t len = 0; + unsigned int i; + + /* ETHTOOL_A_MSE_CAPABILITIES */ + len += nla_total_size(0); + if (data->capability.supported_caps & PHY_MSE_CAP_AVG) + /* ETHTOOL_A_MSE_CAPABILITIES_MAX_AVERAGE_MSE */ + len += nla_total_size(sizeof(u64)); + if (data->capability.supported_caps & (PHY_MSE_CAP_PEAK | + PHY_MSE_CAP_WORST_PEAK)) + /* ETHTOOL_A_MSE_CAPABILITIES_MAX_PEAK_MSE */ + len += nla_total_size(sizeof(u64)); + /* ETHTOOL_A_MSE_CAPABILITIES_REFRESH_RATE_PS */ + len += nla_total_size(sizeof(u64)); + /* ETHTOOL_A_MSE_CAPABILITIES_NUM_SYMBOLS */ + len += nla_total_size(sizeof(u64)); + + for (i = 0; i < data->num_snapshots; i++) { + size_t snapshot_len = 0; + + /* Per-channel nest (e.g., ETHTOOL_A_MSE_CHANNEL_A / _B / _C / + * _D / _WORST_CHANNEL / _LINK) + */ + snapshot_len += nla_total_size(0); + + if (data->capability.supported_caps & PHY_MSE_CAP_AVG) + snapshot_len += nla_total_size(sizeof(u64)); + if (data->capability.supported_caps & PHY_MSE_CAP_PEAK) + snapshot_len += nla_total_size(sizeof(u64)); + if (data->capability.supported_caps & PHY_MSE_CAP_WORST_PEAK) + snapshot_len += nla_total_size(sizeof(u64)); + + len += snapshot_len; + } + + return len; +} + +static int mse_channel_to_attr(int ch) +{ + switch (ch) { + case PHY_MSE_CHANNEL_A: + return ETHTOOL_A_MSE_CHANNEL_A; + case PHY_MSE_CHANNEL_B: + return ETHTOOL_A_MSE_CHANNEL_B; + case PHY_MSE_CHANNEL_C: + return ETHTOOL_A_MSE_CHANNEL_C; + case PHY_MSE_CHANNEL_D: + return ETHTOOL_A_MSE_CHANNEL_D; + case PHY_MSE_CHANNEL_WORST: + return ETHTOOL_A_MSE_WORST_CHANNEL; + case PHY_MSE_CHANNEL_LINK: + return ETHTOOL_A_MSE_LINK; + default: + return -EINVAL; + } +} + +static int mse_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct mse_reply_data *data = mse_repdata(reply_base); + struct nlattr *nest; + unsigned int i; + int ret; + + nest = nla_nest_start(skb, ETHTOOL_A_MSE_CAPABILITIES); + if (!nest) + return -EMSGSIZE; + + if (data->capability.supported_caps & PHY_MSE_CAP_AVG) { + ret = nla_put_uint(skb, + ETHTOOL_A_MSE_CAPABILITIES_MAX_AVERAGE_MSE, + data->capability.max_average_mse); + if (ret < 0) + goto nla_put_nest_failure; + } + + if (data->capability.supported_caps & (PHY_MSE_CAP_PEAK | + PHY_MSE_CAP_WORST_PEAK)) { + ret = nla_put_uint(skb, ETHTOOL_A_MSE_CAPABILITIES_MAX_PEAK_MSE, + data->capability.max_peak_mse); + if (ret < 0) + goto nla_put_nest_failure; + } + + ret = nla_put_uint(skb, ETHTOOL_A_MSE_CAPABILITIES_REFRESH_RATE_PS, + data->capability.refresh_rate_ps); + if (ret < 0) + goto nla_put_nest_failure; + + ret = nla_put_uint(skb, ETHTOOL_A_MSE_CAPABILITIES_NUM_SYMBOLS, + data->capability.num_symbols); + if (ret < 0) + goto nla_put_nest_failure; + + nla_nest_end(skb, nest); + + for (i = 0; i < data->num_snapshots; i++) { + const struct mse_snapshot_entry *s = &data->snapshots[i]; + int chan_attr; + + chan_attr = mse_channel_to_attr(s->channel); + if (chan_attr < 0) + return chan_attr; + + nest = nla_nest_start(skb, chan_attr); + if (!nest) + return -EMSGSIZE; + + if (data->capability.supported_caps & PHY_MSE_CAP_AVG) { + ret = nla_put_uint(skb, + ETHTOOL_A_MSE_SNAPSHOT_AVERAGE_MSE, + s->snapshot.average_mse); + if (ret) + goto nla_put_nest_failure; + } + if (data->capability.supported_caps & PHY_MSE_CAP_PEAK) { + ret = nla_put_uint(skb, ETHTOOL_A_MSE_SNAPSHOT_PEAK_MSE, + s->snapshot.peak_mse); + if (ret) + goto nla_put_nest_failure; + } + if (data->capability.supported_caps & PHY_MSE_CAP_WORST_PEAK) { + ret = nla_put_uint(skb, + ETHTOOL_A_MSE_SNAPSHOT_WORST_PEAK_MSE, + s->snapshot.worst_peak_mse); + if (ret) + goto nla_put_nest_failure; + } + + nla_nest_end(skb, nest); + } + + return 0; + +nla_put_nest_failure: + nla_nest_cancel(skb, nest); + return ret; +} + +const struct ethnl_request_ops ethnl_mse_request_ops = { + .request_cmd = ETHTOOL_MSG_MSE_GET, + .reply_cmd = ETHTOOL_MSG_MSE_GET_REPLY, + .hdr_attr = ETHTOOL_A_MSE_HEADER, + .req_info_size = sizeof(struct mse_req_info), + .reply_data_size = sizeof(struct mse_reply_data), + + .prepare_data = mse_prepare_data, + .cleanup_data = mse_cleanup_data, + .reply_size = mse_reply_size, + .fill_reply = mse_fill_reply, +}; diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 2f813f25f07e1..6e5f0f4f815a1 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -420,6 +420,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_TSCONFIG_GET] = ðnl_tsconfig_request_ops, [ETHTOOL_MSG_TSCONFIG_SET] = ðnl_tsconfig_request_ops, [ETHTOOL_MSG_PHY_GET] = ðnl_phy_request_ops, + [ETHTOOL_MSG_MSE_GET] = ðnl_mse_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -1534,6 +1535,15 @@ static const struct genl_ops ethtool_genl_ops[] = { .policy = ethnl_rss_delete_policy, .maxattr = ARRAY_SIZE(ethnl_rss_delete_policy) - 1, }, + { + .cmd = ETHTOOL_MSG_MSE_GET, + .doit = ethnl_default_doit, + .start = ethnl_perphy_start, + .dumpit = ethnl_perphy_dumpit, + .done = ethnl_perphy_done, + .policy = ethnl_mse_get_policy, + .maxattr = ARRAY_SIZE(ethnl_mse_get_policy) - 1, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 1d4f9ecb3d263..89010eaa67dfc 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -442,6 +442,7 @@ extern const struct ethnl_request_ops ethnl_plca_status_request_ops; extern const struct ethnl_request_ops ethnl_mm_request_ops; extern const struct ethnl_request_ops ethnl_phy_request_ops; extern const struct ethnl_request_ops ethnl_tsconfig_request_ops; +extern const struct ethnl_request_ops ethnl_mse_request_ops; extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1]; extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; @@ -497,6 +498,7 @@ extern const struct nla_policy ethnl_module_fw_flash_act_policy[ETHTOOL_A_MODULE extern const struct nla_policy ethnl_phy_get_policy[ETHTOOL_A_PHY_HEADER + 1]; extern const struct nla_policy ethnl_tsconfig_get_policy[ETHTOOL_A_TSCONFIG_HEADER + 1]; extern const struct nla_policy ethnl_tsconfig_set_policy[ETHTOOL_A_TSCONFIG_MAX + 1]; +extern const struct nla_policy ethnl_mse_get_policy[ETHTOOL_A_MSE_HEADER + 1]; int ethnl_set_features(struct sk_buff *skb, struct genl_info *info); int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info); From 335a9660e141349d7751b3b880d7531ea401a8db Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 27 Oct 2025 13:28:00 +0100 Subject: [PATCH 505/867] net: phy: micrel: add MSE interface support for KSZ9477 family MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the get_mse_capability() and get_mse_snapshot() PHY driver ops for KSZ9477-series integrated PHYs to demonstrate the new PHY MSE UAPI. These PHYs do not expose a documented direct MSE register, but the Signal Quality Indicator (SQI) registers are derived from the internal MSE computation. This hook maps SQI readings into the MSE interface so that tooling can retrieve the raw value together with metadata for correct interpretation in userspace. Behaviour: - For 1000BASE-T, report per-channel (A–D) values and support a WORST channel selector. - For 100BASE-TX, only LINK-wide measurements are available. - Report average MSE only, with a max scale based on KSZ9477_MMD_SQI_MASK and a fixed refresh rate of 2 µs. This mapping differs from the OPEN Alliance SQI definition, which assigns thresholds such as pre-fail indices; the MSE interface instead provides the raw measurement, leaving interpretation to userspace. Signed-off-by: Oleksij Rempel Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/20251027122801.982364-4-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 102 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index edca0024b7c73..06080b1c753d0 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2325,6 +2325,106 @@ static int kszphy_get_sqi_max(struct phy_device *phydev) return KSZ9477_SQI_MAX; } +static int kszphy_get_mse_capability(struct phy_device *phydev, + struct phy_mse_capability *cap) +{ + /* Capabilities depend on link mode: + * - 1000BASE-T: per-pair SQI registers exist => expose A..D + * and a WORST selector. + * - 100BASE-TX: HW provides a single MSE/SQI reading in the "channel A" + * register, but with auto MDI-X there is no MDI-X resolution bit, + * so we cannot map that register to a specific wire pair reliably. + * To avoid misleading per-channel data, advertise only LINK. + * Other speeds: no MSE exposure via this driver. + * + * Note: WORST is *not* a hardware selector on this family. + * We expose it because the driver computes it in software + * by scanning per-channel readouts (A..D) and picking the + * maximum average MSE. + */ + if (phydev->speed == SPEED_1000) + cap->supported_caps = PHY_MSE_CAP_CHANNEL_A | + PHY_MSE_CAP_CHANNEL_B | + PHY_MSE_CAP_CHANNEL_C | + PHY_MSE_CAP_CHANNEL_D | + PHY_MSE_CAP_WORST_CHANNEL; + else if (phydev->speed == SPEED_100) + cap->supported_caps = PHY_MSE_CAP_LINK; + else + return -EOPNOTSUPP; + + cap->max_average_mse = FIELD_MAX(KSZ9477_MMD_SQI_MASK); + cap->refresh_rate_ps = 2000000; /* 2 us */ + /* Estimated from link modulation (125 MBd per channel) and documented + * refresh rate of 2 us + */ + cap->num_symbols = 250; + + cap->supported_caps |= PHY_MSE_CAP_AVG; + + return 0; +} + +static int kszphy_get_mse_snapshot(struct phy_device *phydev, + enum phy_mse_channel channel, + struct phy_mse_snapshot *snapshot) +{ + u8 num_channels; + int ret; + + if (phydev->speed == SPEED_1000) + num_channels = 4; + else if (phydev->speed == SPEED_100) + num_channels = 1; + else + return -EOPNOTSUPP; + + if (channel == PHY_MSE_CHANNEL_WORST) { + u32 worst_val = 0; + int i; + + /* WORST is implemented in software: select the maximum + * average MSE across the available per-channel registers. + * Only defined when multiple channels exist (1000BASE-T). + */ + if (num_channels < 2) + return -EOPNOTSUPP; + + for (i = 0; i < num_channels; i++) { + ret = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, + KSZ9477_MMD_SIGNAL_QUALITY_CHAN_A + i); + if (ret < 0) + return ret; + + ret = FIELD_GET(KSZ9477_MMD_SQI_MASK, ret); + if (ret > worst_val) + worst_val = ret; + } + snapshot->average_mse = worst_val; + } else if (channel == PHY_MSE_CHANNEL_LINK && num_channels == 1) { + ret = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, + KSZ9477_MMD_SIGNAL_QUALITY_CHAN_A); + if (ret < 0) + return ret; + snapshot->average_mse = FIELD_GET(KSZ9477_MMD_SQI_MASK, ret); + } else if (channel >= PHY_MSE_CHANNEL_A && + channel <= PHY_MSE_CHANNEL_D) { + /* Per-channel readouts are valid only for 1000BASE-T. */ + if (phydev->speed != SPEED_1000) + return -EOPNOTSUPP; + + ret = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, + KSZ9477_MMD_SIGNAL_QUALITY_CHAN_A + channel); + if (ret < 0) + return ret; + snapshot->average_mse = FIELD_GET(KSZ9477_MMD_SQI_MASK, ret); + } else { + return -EOPNOTSUPP; + } + + return 0; +} + static void kszphy_enable_clk(struct phy_device *phydev) { struct kszphy_priv *priv = phydev->priv; @@ -6497,6 +6597,8 @@ static struct phy_driver ksphy_driver[] = { .cable_test_get_status = ksz9x31_cable_test_get_status, .get_sqi = kszphy_get_sqi, .get_sqi_max = kszphy_get_sqi_max, + .get_mse_capability = kszphy_get_mse_capability, + .get_mse_snapshot = kszphy_get_mse_snapshot, } }; module_phy_driver(ksphy_driver); From fd93ed77efe4735cd2b9a3fbccd5e199ced19bba Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 27 Oct 2025 13:28:01 +0100 Subject: [PATCH 506/867] net: phy: dp83td510: add MSE interface support for 10BASE-T1L Implement get_mse_capability() and get_mse_snapshot() for the DP83TD510E to expose its Mean Square Error (MSE) register via the new PHY MSE UAPI. The DP83TD510E does not document any peak MSE values; it only exposes a single average MSE register used internally to derive SQI. This implementation therefore advertises only PHY_MSE_CAP_AVG, along with LINK and channel-A selectors. Scaling is fixed to 0xFFFF, and the refresh interval/number of symbols are estimated from 10BASE-T1L symbol rate (7.5 MBd) and typical diagnostic intervals (~1 ms). For 10BASE-T1L deployments, SQI is a reliable indicator of link modulation quality once the link is established, but it does not indicate whether autonegotiation pulses will be correctly received in marginal conditions. MSE provides a direct measurement of slicer error rate that can be used to evaluate if autonegotiation is likely to succeed under a given cable length and condition. In practice, testing such scenarios often requires forcing a fixed-link setup to isolate MSE behaviour from the autonegotiation process. Signed-off-by: Oleksij Rempel Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/20251027122801.982364-5-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83td510.c | 62 +++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/drivers/net/phy/dp83td510.c b/drivers/net/phy/dp83td510.c index 23af1ac194fa9..d75dae6071ad6 100644 --- a/drivers/net/phy/dp83td510.c +++ b/drivers/net/phy/dp83td510.c @@ -61,6 +61,7 @@ #define DP83TD510E_MASTER_SLAVE_RESOL_FAIL BIT(15) #define DP83TD510E_MSE_DETECT 0xa85 +#define DP83TD510E_MSE_MAX U16_MAX #define DP83TD510_SQI_MAX 7 @@ -249,6 +250,64 @@ struct dp83td510_priv { #define DP83TD510E_ALCD_COMPLETE BIT(15) #define DP83TD510E_ALCD_CABLE_LENGTH GENMASK(10, 0) +static int dp83td510_get_mse_capability(struct phy_device *phydev, + struct phy_mse_capability *cap) +{ + /* DP83TD510E documents only a single (average) MSE register + * (used to derive SQI); no peak or worst-peak counters are + * described. Advertise only PHY_MSE_CAP_AVG. + */ + cap->supported_caps = PHY_MSE_CAP_AVG; + /* 10BASE-T1L is a single-pair medium, so there are no B/C/D channels. + * We still advertise PHY_MSE_CAP_CHANNEL_A to indicate that the PHY + * can attribute the measurement to a specific pair (the only one), + * rather than exposing it only as a link-aggregate. + * + * Rationale: + * - Keeps the ethtool MSE_GET selection logic consistent: per-channel + * (A/B/C/D) is preferred over WORST/LINK, so userspace receives a + * CHANNEL_A nest instead of LINK. + * - Signals to tools that "per-pair" data is available (even if there's + * just one pair), avoiding the impression that only aggregate values + * are supported. + * - Remains compatible with multi-pair PHYs and uniform UI handling. + * + * Note: WORST and other channels are not advertised on 10BASE-T1L. + */ + cap->supported_caps |= PHY_MSE_CHANNEL_A | PHY_MSE_CAP_LINK; + cap->max_average_mse = DP83TD510E_MSE_MAX; + + /* The datasheet does not specify the refresh rate or symbol count, + * but based on similar PHYs and standards, we can assume a common + * value. For 10BASE-T1L, the symbol rate is 7.5 MBd. A common + * diagnostic interval is around 1ms. + * 7.5e6 symbols/sec * 0.001 sec = 7500 symbols. + */ + cap->refresh_rate_ps = 1000000000; /* 1 ms */ + cap->num_symbols = 7500; + + return 0; +} + +static int dp83td510_get_mse_snapshot(struct phy_device *phydev, + enum phy_mse_channel channel, + struct phy_mse_snapshot *snapshot) +{ + int ret; + + if (channel != PHY_MSE_CHANNEL_LINK && + channel != PHY_MSE_CHANNEL_A) + return -EOPNOTSUPP; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_MSE_DETECT); + if (ret < 0) + return ret; + + snapshot->average_mse = ret; + + return 0; +} + static int dp83td510_led_brightness_set(struct phy_device *phydev, u8 index, enum led_brightness brightness) { @@ -893,6 +952,9 @@ static struct phy_driver dp83td510_driver[] = { .get_phy_stats = dp83td510_get_phy_stats, .update_stats = dp83td510_update_stats, + .get_mse_capability = dp83td510_get_mse_capability, + .get_mse_snapshot = dp83td510_get_mse_snapshot, + .led_brightness_set = dp83td510_led_brightness_set, .led_hw_is_supported = dp83td510_led_hw_is_supported, .led_hw_control_set = dp83td510_led_hw_control_set, From 27cb3de7f43ac0263474d87a2c84d96f904d73e2 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Tue, 28 Oct 2025 12:32:44 +0800 Subject: [PATCH 507/867] net: add net cookie for net device trace events In a multi-network card or container environment, this is needed in order to differentiate between trace events relating to net devices that exist in different network namespaces and share the same name. for xmit_timeout trace events: [002] ..s1. 1838.311662: net_dev_xmit_timeout: dev=eth0 driver=virtio_net queue=10 net_cookie=3 [007] ..s1. 1839.335650: net_dev_xmit_timeout: dev=eth0 driver=virtio_net queue=10 net_cookie=4100 [007] ..s1. 1844.455659: net_dev_xmit_timeout: dev=eth0 driver=virtio_net queue=10 net_cookie=3 [002] ..s1. 1850.087647: net_dev_xmit_timeout: dev=eth0 driver=virtio_net queue=10 net_cookie=3 Cc: Eran Ben Elisha Cc: Jiri Pirko Cc: Cong Wang Cc: Jakub Kicinski Cc: Eric Dumazet Cc: Simon Horman Cc: Paolo Abeni Suggested-by: Ido Schimmel Signed-off-by: Tonghao Zhang Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20251028043244.82288-1-tonghao@bamaicloud.com Signed-off-by: Paolo Abeni --- include/trace/events/net.h | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/include/trace/events/net.h b/include/trace/events/net.h index d55162c12f90a..fdd9ad474ce3a 100644 --- a/include/trace/events/net.h +++ b/include/trace/events/net.h @@ -35,6 +35,7 @@ TRACE_EVENT(net_dev_start_xmit, __field( u16, gso_size ) __field( u16, gso_segs ) __field( u16, gso_type ) + __field( u64, net_cookie ) ), TP_fast_assign( @@ -57,16 +58,18 @@ TRACE_EVENT(net_dev_start_xmit, __entry->gso_size = skb_shinfo(skb)->gso_size; __entry->gso_segs = skb_shinfo(skb)->gso_segs; __entry->gso_type = skb_shinfo(skb)->gso_type; + __entry->net_cookie = dev_net(dev)->net_cookie; ), - TP_printk("dev=%s queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d len=%u data_len=%u network_offset=%d transport_offset_valid=%d transport_offset=%d tx_flags=%d gso_size=%d gso_segs=%d gso_type=%#x", + TP_printk("dev=%s queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d len=%u data_len=%u network_offset=%d transport_offset_valid=%d transport_offset=%d tx_flags=%d gso_size=%d gso_segs=%d gso_type=%#x net_cookie=%llu", __get_str(name), __entry->queue_mapping, __entry->skbaddr, __entry->vlan_tagged, __entry->vlan_proto, __entry->vlan_tci, __entry->protocol, __entry->ip_summed, __entry->len, __entry->data_len, __entry->network_offset, __entry->transport_offset_valid, __entry->transport_offset, __entry->tx_flags, - __entry->gso_size, __entry->gso_segs, __entry->gso_type) + __entry->gso_size, __entry->gso_segs, + __entry->gso_type, __entry->net_cookie) ); TRACE_EVENT(net_dev_xmit, @@ -83,17 +86,21 @@ TRACE_EVENT(net_dev_xmit, __field( unsigned int, len ) __field( int, rc ) __string( name, dev->name ) + __field( u64, net_cookie ) ), TP_fast_assign( __entry->skbaddr = skb; __entry->len = skb_len; __entry->rc = rc; + __entry->net_cookie = dev_net(dev)->net_cookie; __assign_str(name); ), - TP_printk("dev=%s skbaddr=%p len=%u rc=%d", - __get_str(name), __entry->skbaddr, __entry->len, __entry->rc) + TP_printk("dev=%s skbaddr=%p len=%u rc=%d net_cookie=%llu", + __get_str(name), __entry->skbaddr, + __entry->len, __entry->rc, + __entry->net_cookie) ); TRACE_EVENT(net_dev_xmit_timeout, @@ -107,16 +114,19 @@ TRACE_EVENT(net_dev_xmit_timeout, __string( name, dev->name ) __string( driver, netdev_drivername(dev)) __field( int, queue_index ) + __field( u64, net_cookie ) ), TP_fast_assign( __assign_str(name); __assign_str(driver); __entry->queue_index = queue_index; + __entry->net_cookie = dev_net(dev)->net_cookie; ), - TP_printk("dev=%s driver=%s queue=%d", - __get_str(name), __get_str(driver), __entry->queue_index) + TP_printk("dev=%s driver=%s queue=%d net_cookie=%llu", + __get_str(name), __get_str(driver), + __entry->queue_index, __entry->net_cookie) ); DECLARE_EVENT_CLASS(net_dev_template, @@ -129,16 +139,20 @@ DECLARE_EVENT_CLASS(net_dev_template, __field( void *, skbaddr ) __field( unsigned int, len ) __string( name, skb->dev->name ) + __field( u64, net_cookie ) ), TP_fast_assign( __entry->skbaddr = skb; __entry->len = skb->len; + __entry->net_cookie = dev_net(skb->dev)->net_cookie; __assign_str(name); ), - TP_printk("dev=%s skbaddr=%p len=%u", - __get_str(name), __entry->skbaddr, __entry->len) + TP_printk("dev=%s skbaddr=%p len=%u net_cookie=%llu", + __get_str(name), __entry->skbaddr, + __entry->len, + __entry->net_cookie) ) DEFINE_EVENT(net_dev_template, net_dev_queue, @@ -188,6 +202,7 @@ DECLARE_EVENT_CLASS(net_dev_rx_verbose_template, __field( unsigned char, nr_frags ) __field( u16, gso_size ) __field( u16, gso_type ) + __field( u64, net_cookie ) ), TP_fast_assign( @@ -214,16 +229,18 @@ DECLARE_EVENT_CLASS(net_dev_rx_verbose_template, __entry->nr_frags = skb_shinfo(skb)->nr_frags; __entry->gso_size = skb_shinfo(skb)->gso_size; __entry->gso_type = skb_shinfo(skb)->gso_type; + __entry->net_cookie = dev_net(skb->dev)->net_cookie; ), - TP_printk("dev=%s napi_id=%#x queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d hash=0x%08x l4_hash=%d len=%u data_len=%u truesize=%u mac_header_valid=%d mac_header=%d nr_frags=%d gso_size=%d gso_type=%#x", + TP_printk("dev=%s napi_id=%#x queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d hash=0x%08x l4_hash=%d len=%u data_len=%u truesize=%u mac_header_valid=%d mac_header=%d nr_frags=%d gso_size=%d gso_type=%#x net_cookie=%llu", __get_str(name), __entry->napi_id, __entry->queue_mapping, __entry->skbaddr, __entry->vlan_tagged, __entry->vlan_proto, __entry->vlan_tci, __entry->protocol, __entry->ip_summed, __entry->hash, __entry->l4_hash, __entry->len, __entry->data_len, __entry->truesize, __entry->mac_header_valid, __entry->mac_header, - __entry->nr_frags, __entry->gso_size, __entry->gso_type) + __entry->nr_frags, __entry->gso_size, + __entry->gso_type, __entry->net_cookie) ); DEFINE_EVENT(net_dev_rx_verbose_template, napi_gro_frags_entry, From 462280043466b2bc74483c56a5d5316ff6b16380 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 30 Oct 2025 08:06:45 +0800 Subject: [PATCH 508/867] xsk: do not enable/disable irq when grabbing/releasing xsk_tx_list_lock The commit ac98d8aab61b ("xsk: wire upp Tx zero-copy functions") originally introducing this lock put the deletion process in the sk_destruct which can run in irq context obviously, so the xxx_irqsave()/xxx_irqrestore() pair was used. But later another commit 541d7fdd7694 ("xsk: proper AF_XDP socket teardown ordering") moved the deletion into xsk_release() that only happens in process context. It means that since this commit, it doesn't necessarily need that pair. Now, there are two places that use this xsk_tx_list_lock and only run in the process context. So avoid manipulating the irq then. Signed-off-by: Jason Xing Acked-by: Maciej Fijalkowski Link: https://patch.msgid.link/20251030000646.18859-2-kerneljasonxing@gmail.com Signed-off-by: Paolo Abeni --- net/xdp/xsk_buff_pool.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index aa9788f20d0db..309075050b2a0 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -12,26 +12,22 @@ void xp_add_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs) { - unsigned long flags; - if (!xs->tx) return; - spin_lock_irqsave(&pool->xsk_tx_list_lock, flags); + spin_lock(&pool->xsk_tx_list_lock); list_add_rcu(&xs->tx_list, &pool->xsk_tx_list); - spin_unlock_irqrestore(&pool->xsk_tx_list_lock, flags); + spin_unlock(&pool->xsk_tx_list_lock); } void xp_del_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs) { - unsigned long flags; - if (!xs->tx) return; - spin_lock_irqsave(&pool->xsk_tx_list_lock, flags); + spin_lock(&pool->xsk_tx_list_lock); list_del_rcu(&xs->tx_list); - spin_unlock_irqrestore(&pool->xsk_tx_list_lock, flags); + spin_unlock(&pool->xsk_tx_list_lock); } void xp_destroy(struct xsk_buff_pool *pool) From 30ed05adca4a05c50594384cff18910858dd1d35 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 30 Oct 2025 08:06:46 +0800 Subject: [PATCH 509/867] xsk: use a smaller new lock for shared pool case - Split cq_lock into two smaller locks: cq_prod_lock and cq_cached_prod_lock - Avoid disabling/enabling interrupts in the hot xmit path In either xsk_cq_cancel_locked() or xsk_cq_reserve_locked() function, the race condition is only between multiple xsks sharing the same pool. They are all in the process context rather than interrupt context, so now the small lock named cq_cached_prod_lock can be used without handling interrupts. While cq_cached_prod_lock ensures the exclusive modification of @cached_prod, cq_prod_lock in xsk_cq_submit_addr_locked() only cares about @producer and corresponding @desc. Both of them don't necessarily be consistent with @cached_prod protected by cq_cached_prod_lock. That's the reason why the previous big lock can be split into two smaller ones. Please note that SPSC rule is all about the global state of producer and consumer that can affect both layers instead of local or cached ones. Frequently disabling and enabling interrupt are very time consuming in some cases, especially in a per-descriptor granularity, which now can be avoided after this optimization, even when the pool is shared by multiple xsks. With this patch, the performance number[1] could go from 1,872,565 pps to 1,961,009 pps. It's a minor rise of around 5%. [1]: taskset -c 1 ./xdpsock -i enp2s0f1 -q 0 -t -S -s 64 Signed-off-by: Jason Xing Acked-by: Maciej Fijalkowski Link: https://patch.msgid.link/20251030000646.18859-3-kerneljasonxing@gmail.com Signed-off-by: Paolo Abeni --- include/net/xsk_buff_pool.h | 13 +++++++++---- net/xdp/xsk.c | 15 ++++++--------- net/xdp/xsk_buff_pool.c | 3 ++- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index cac56e6b0869b..92a2358c6ce34 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -85,11 +85,16 @@ struct xsk_buff_pool { bool unaligned; bool tx_sw_csum; void *addrs; - /* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect: - * NAPI TX thread and sendmsg error paths in the SKB destructor callback and when - * sockets share a single cq when the same netdev and queue id is shared. + /* Mutual exclusion of the completion ring in the SKB mode. + * Protect: NAPI TX thread and sendmsg error paths in the SKB + * destructor callback. */ - spinlock_t cq_lock; + spinlock_t cq_prod_lock; + /* Mutual exclusion of the completion ring in the SKB mode. + * Protect: when sockets share a single cq when the same netdev + * and queue id is shared. + */ + spinlock_t cq_cached_prod_lock; struct xdp_buff_xsk *free_heads[]; }; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 7b0c68a70888c..2f26c918d4484 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -548,12 +548,11 @@ static int xsk_wakeup(struct xdp_sock *xs, u8 flags) static int xsk_cq_reserve_locked(struct xsk_buff_pool *pool) { - unsigned long flags; int ret; - spin_lock_irqsave(&pool->cq_lock, flags); + spin_lock(&pool->cq_cached_prod_lock); ret = xskq_prod_reserve(pool->cq); - spin_unlock_irqrestore(&pool->cq_lock, flags); + spin_unlock(&pool->cq_cached_prod_lock); return ret; } @@ -566,7 +565,7 @@ static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool, unsigned long flags; u32 idx; - spin_lock_irqsave(&pool->cq_lock, flags); + spin_lock_irqsave(&pool->cq_prod_lock, flags); idx = xskq_get_prod(pool->cq); xskq_prod_write_addr(pool->cq, idx, @@ -583,16 +582,14 @@ static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool, } } xskq_prod_submit_n(pool->cq, descs_processed); - spin_unlock_irqrestore(&pool->cq_lock, flags); + spin_unlock_irqrestore(&pool->cq_prod_lock, flags); } static void xsk_cq_cancel_locked(struct xsk_buff_pool *pool, u32 n) { - unsigned long flags; - - spin_lock_irqsave(&pool->cq_lock, flags); + spin_lock(&pool->cq_cached_prod_lock); xskq_prod_cancel_n(pool->cq, n); - spin_unlock_irqrestore(&pool->cq_lock, flags); + spin_unlock(&pool->cq_cached_prod_lock); } static void xsk_inc_num_desc(struct sk_buff *skb) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 309075050b2a0..00a4eddaa0cd6 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -90,7 +90,8 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, INIT_LIST_HEAD(&pool->xskb_list); INIT_LIST_HEAD(&pool->xsk_tx_list); spin_lock_init(&pool->xsk_tx_list_lock); - spin_lock_init(&pool->cq_lock); + spin_lock_init(&pool->cq_prod_lock); + spin_lock_init(&pool->cq_cached_prod_lock); refcount_set(&pool->users, 1); pool->fq = xs->fq_tmp; From 789521b4717fd6bd85164ba5c131f621a79c9736 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 1 Nov 2025 10:40:11 +0100 Subject: [PATCH 510/867] rust: kbuild: support `-Cjump-tables=n` for Rust 1.93.0 Rust 1.93.0 (expected 2026-01-22) is stabilizing `-Zno-jump-tables` [1][2] as `-Cjump-tables=n` [3]. Without this change, one would eventually see: RUSTC L rust/core.o error: unknown unstable option: `no-jump-tables` Thus support the upcoming version. Link: https://github.com/rust-lang/rust/issues/116592 [1] Link: https://github.com/rust-lang/rust/pull/105812 [2] Link: https://github.com/rust-lang/rust/pull/145974 [3] Reviewed-by: Alice Ryhl Reviewed-by: Trevor Gross Acked-by: Nicolas Schier Link: https://patch.msgid.link/20251101094011.1024534-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- arch/loongarch/Makefile | 2 +- arch/x86/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index dc5bd3f1b8d2c..96ca1a688984e 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -109,7 +109,7 @@ endif ifdef CONFIG_RUSTC_HAS_ANNOTATE_TABLEJUMP KBUILD_RUSTFLAGS += -Cllvm-args=--loongarch-annotate-tablejump else -KBUILD_RUSTFLAGS += -Zno-jump-tables # keep compatibility with older compilers +KBUILD_RUSTFLAGS += $(if $(call rustc-min-version,109300),-Cjump-tables=n,-Zno-jump-tables) # keep compatibility with older compilers endif ifdef CONFIG_LTO_CLANG # The annotate-tablejump option can not be passed to LLVM backend when LTO is enabled. diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 8fbff3106c56e..1a27efcf3c205 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -98,7 +98,7 @@ ifeq ($(CONFIG_X86_KERNEL_IBT),y) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104816 # KBUILD_CFLAGS += $(call cc-option,-fcf-protection=branch -fno-jump-tables) -KBUILD_RUSTFLAGS += -Zcf-protection=branch -Zno-jump-tables +KBUILD_RUSTFLAGS += -Zcf-protection=branch $(if $(call rustc-min-version,109300),-Cjump-tables=n,-Zno-jump-tables) else KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) endif From 105bae321862b3bac300c73748192ff61a5129cd Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Mon, 3 Nov 2025 16:40:04 +0100 Subject: [PATCH 511/867] rtnetlink: honor RTEXT_FILTER_SKIP_STATS in IFLA_STATS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gathering interface statistics can be a relatively expensive operation on certain systems as it requires iterating over all the cpus. RTEXT_FILTER_SKIP_STATS was first introduced [1] to skip AF_INET6 statistics from interface dumps and it was then extended [2] to also exclude IFLA_VF_INFO. The semantics of the flag does not seem to be limited to AF_INET or VF statistics and having a way to query the interface status (e.g: carrier, address) without retrieving its statistics seems reasonable. So this patch extends the use RTEXT_FILTER_SKIP_STATS to also affect IFLA_STATS. [1] https://lore.kernel.org/all/20150911204848.GC9687@oracle.com/ [2] https://lore.kernel.org/all/20230611105108.122586-1-gal@nvidia.com/ Signed-off-by: Adrian Moreno Reviewed-by: Toke Høiland-Jørgensen Reviewed-by: Eric Dumazet Reviewed-by: Nicolas Dichtel Link: https://patch.msgid.link/20251103154006.1189707-1-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 576d5ec3bb364..b1ed55141d8a7 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1270,13 +1270,13 @@ static size_t rtnl_dpll_pin_size(const struct net_device *dev) static noinline size_t if_nlmsg_size(const struct net_device *dev, u32 ext_filter_mask) { - return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + size_t size; + + size = NLMSG_ALIGN(sizeof(struct ifinfomsg)) + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + nla_total_size(IFALIASZ) /* IFLA_IFALIAS */ + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */ + nla_total_size_64bit(sizeof(struct rtnl_link_ifmap)) - + nla_total_size(sizeof(struct rtnl_link_stats)) - + nla_total_size_64bit(sizeof(struct rtnl_link_stats64)) + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */ + nla_total_size(4) /* IFLA_TXQLEN */ @@ -1329,6 +1329,12 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(2) /* IFLA_HEADROOM */ + nla_total_size(2) /* IFLA_TAILROOM */ + 0; + + if (!(ext_filter_mask & RTEXT_FILTER_SKIP_STATS)) + size += nla_total_size(sizeof(struct rtnl_link_stats)) + + nla_total_size_64bit(sizeof(struct rtnl_link_stats64)); + + return size; } static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) @@ -2123,7 +2129,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, if (rtnl_phys_switch_id_fill(skb, dev)) goto nla_put_failure; - if (rtnl_fill_stats(skb, dev)) + if (!(ext_filter_mask & RTEXT_FILTER_SKIP_STATS) && + rtnl_fill_stats(skb, dev)) goto nla_put_failure; if (rtnl_fill_vf(skb, dev, ext_filter_mask)) From 46173144e03d87beddf02ee785cbdf818087687a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 3 Nov 2025 16:52:56 +0000 Subject: [PATCH 512/867] net: mark deliver_skb() as unlikely and not inlined deliver_skb() should not be inlined as is it not called in the fast path. Add unlikely() clauses giving hints to the compiler about this fact. Before this patch: size net/core/dev.o text data bss dec hex filename 121794 13330 176 135300 21084 net/core/dev.o __netif_receive_skb_core() size on x86_64 : 4080 bytes. After: size net/core/dev.o text data bss dec hex filenamee 120330 13338 176 133844 20ad4 net/core/dev.o __netif_receive_skb_core() size on x86_64 : 2781 bytes. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20251103165256.1712169-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 2c1de5fb97d93..ba39146bbd25f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2463,9 +2463,9 @@ int dev_forward_skb_nomtu(struct net_device *dev, struct sk_buff *skb) return __dev_forward_skb2(dev, skb, false) ?: netif_rx_internal(skb); } -static inline int deliver_skb(struct sk_buff *skb, - struct packet_type *pt_prev, - struct net_device *orig_dev) +static int deliver_skb(struct sk_buff *skb, + struct packet_type *pt_prev, + struct net_device *orig_dev) { if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) return -ENOMEM; @@ -2484,7 +2484,7 @@ static inline void deliver_ptype_list_skb(struct sk_buff *skb, list_for_each_entry_rcu(ptype, ptype_list, list) { if (ptype->type != type) continue; - if (pt_prev) + if (unlikely(pt_prev)) deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; } @@ -2545,7 +2545,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) if (skb_loop_sk(ptype, skb)) continue; - if (pt_prev) { + if (unlikely(pt_prev)) { deliver_skb(skb2, pt_prev, skb->dev); pt_prev = ptype; continue; @@ -4421,7 +4421,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, return skb; bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); - if (*pt_prev) { + if (unlikely(*pt_prev)) { *ret = deliver_skb(skb, *pt_prev, orig_dev); *pt_prev = NULL; } @@ -5883,7 +5883,7 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, if (nf_hook_ingress_active(skb)) { int ingress_retval; - if (*pt_prev) { + if (unlikely(*pt_prev)) { *ret = deliver_skb(skb, *pt_prev, orig_dev); *pt_prev = NULL; } @@ -5960,13 +5960,13 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, list_for_each_entry_rcu(ptype, &dev_net_rcu(skb->dev)->ptype_all, list) { - if (pt_prev) + if (unlikely(pt_prev)) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; } list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) { - if (pt_prev) + if (unlikely(pt_prev)) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; } @@ -5997,7 +5997,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, } if (skb_vlan_tag_present(skb)) { - if (pt_prev) { + if (unlikely(pt_prev)) { ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; } @@ -6009,7 +6009,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, rx_handler = rcu_dereference(skb->dev->rx_handler); if (rx_handler) { - if (pt_prev) { + if (unlikely(pt_prev)) { ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; } From dec568a36f9b16f0334aed8e95ec4225606830cc Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 3 Nov 2025 11:49:55 +0000 Subject: [PATCH 513/867] net: stmmac: imx: use phylink's interface mode for set_clk_tx_rate() imx_dwmac_set_clk_tx_rate() is passed the interface mode from phylink which will be the same as plat_dat->phy_interface. Use the passed-in interface mode rather than plat_dat->phy_interface. Reviewed-by: Maxime Chevallier Tested-by: Maxime Chevallier Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vFt4N-0000000ChoM-1llp@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index 4268b99872372..147fa08d5b6e7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -195,9 +195,6 @@ static void imx_dwmac_exit(struct platform_device *pdev, void *priv) static int imx_dwmac_set_clk_tx_rate(void *bsp_priv, struct clk *clk_tx_i, phy_interface_t interface, int speed) { - struct imx_priv_data *dwmac = bsp_priv; - - interface = dwmac->plat_dat->phy_interface; if (interface == PHY_INTERFACE_MODE_RMII || interface == PHY_INTERFACE_MODE_MII) return 0; From 553f23d1953527eb277efa902cd498131b2527e1 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 3 Nov 2025 11:50:00 +0000 Subject: [PATCH 514/867] net: stmmac: s32: move PHY_INTF_SEL_x definitions out of the way S32's PHY_INTF_SEL_x definitions conflict with those for the dwmac cores as they use a different bitmapping. Add a S32 prefix so that they are unique. Signed-off-by: Russell King (Oracle) Reviewed-by: Maxime Chevallier Reviewed-by: Jan Petrous (OSS) Link: https://patch.msgid.link/E1vFt4S-0000000ChoS-2Ahi@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c index ee095ac132037..2b7ad64bfdf74 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c @@ -24,10 +24,10 @@ #define GMAC_INTF_RATE_125M 125000000 /* 125MHz */ /* SoC PHY interface control register */ -#define PHY_INTF_SEL_MII 0x00 -#define PHY_INTF_SEL_SGMII 0x01 -#define PHY_INTF_SEL_RGMII 0x02 -#define PHY_INTF_SEL_RMII 0x08 +#define S32_PHY_INTF_SEL_MII 0x00 +#define S32_PHY_INTF_SEL_SGMII 0x01 +#define S32_PHY_INTF_SEL_RGMII 0x02 +#define S32_PHY_INTF_SEL_RMII 0x08 struct s32_priv_data { void __iomem *ioaddr; @@ -40,7 +40,7 @@ struct s32_priv_data { static int s32_gmac_write_phy_intf_select(struct s32_priv_data *gmac) { - writel(PHY_INTF_SEL_RGMII, gmac->ctrl_sts); + writel(S32_PHY_INTF_SEL_RGMII, gmac->ctrl_sts); dev_dbg(gmac->dev, "PHY mode set to %s\n", phy_modes(*gmac->intf_mode)); From 4a4692e9091867dd413764c7d81f09e8109a233a Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 3 Nov 2025 11:50:05 +0000 Subject: [PATCH 515/867] net: stmmac: add phy_intf_sel and ACTPHYIF definitions Add definitions for the active PHY interface found in DMA hardware feature register 0, and also used to configure the core in multi- interface designs via phy_intf_sel. Signed-off-by: Russell King (Oracle) Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/E1vFt4X-0000000ChoY-30p9@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/common.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 27083af545683..7395bbb94aeaa 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -313,6 +313,16 @@ struct stmmac_safety_stats { #define DMA_HW_FEAT_ACTPHYIF 0x70000000 /* Active/selected PHY iface */ #define DEFAULT_DMA_PBL 8 +/* phy_intf_sel_i and ACTPHYIF encodings */ +#define PHY_INTF_SEL_GMII_MII 0 +#define PHY_INTF_SEL_RGMII 1 +#define PHY_INTF_SEL_SGMII 2 +#define PHY_INTF_SEL_TBI 3 +#define PHY_INTF_SEL_RMII 4 +#define PHY_INTF_SEL_RTBI 5 +#define PHY_INTF_SEL_SMII 6 +#define PHY_INTF_SEL_REVMII 7 + /* MSI defines */ #define STMMAC_MSI_VEC_MAX 32 From b459790d3fd6d7ead31182ae0cd8632fe79deed6 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 3 Nov 2025 11:50:10 +0000 Subject: [PATCH 516/867] net: stmmac: add stmmac_get_phy_intf_sel() Provide a function to translate the PHY interface mode to the phy_intf_sel pin configuration for dwmac1000 and dwmac4 cores that support multiple interfaces. We currently handle MII, GMII, RGMII, SGMII, RMII and REVMII, but not TBI, RTBI nor SMII as drivers do not appear to use these three and the driver doesn't currently support these. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vFt4c-0000000Choe-3SII@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 1 + .../net/ethernet/stmicro/stmmac/stmmac_main.c | 20 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 3ea680cc63d81..0ea74c88a779f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -396,6 +396,7 @@ void stmmac_ptp_register(struct stmmac_priv *priv); void stmmac_ptp_unregister(struct stmmac_priv *priv); int stmmac_xdp_open(struct net_device *dev); void stmmac_xdp_release(struct net_device *dev); +int stmmac_get_phy_intf_sel(phy_interface_t interface); int stmmac_resume(struct device *dev); int stmmac_suspend(struct device *dev); void stmmac_dvr_remove(struct device *dev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index c2a783c8022d9..6d4323d045732 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3082,6 +3082,26 @@ static void stmmac_check_ether_addr(struct stmmac_priv *priv) } } +int stmmac_get_phy_intf_sel(phy_interface_t interface) +{ + int phy_intf_sel = -EINVAL; + + if (interface == PHY_INTERFACE_MODE_MII || + interface == PHY_INTERFACE_MODE_GMII) + phy_intf_sel = PHY_INTF_SEL_GMII_MII; + else if (phy_interface_mode_is_rgmii(interface)) + phy_intf_sel = PHY_INTF_SEL_RGMII; + else if (interface == PHY_INTERFACE_MODE_SGMII) + phy_intf_sel = PHY_INTF_SEL_SGMII; + else if (interface == PHY_INTERFACE_MODE_RMII) + phy_intf_sel = PHY_INTF_SEL_RMII; + else if (interface == PHY_INTERFACE_MODE_REVMII) + phy_intf_sel = PHY_INTF_SEL_REVMII; + + return phy_intf_sel; +} +EXPORT_SYMBOL_GPL(stmmac_get_phy_intf_sel); + /** * stmmac_init_dma_engine - DMA init. * @priv: driver private structure From 1b6aa81c85621d6b55099906585ff09a477203b8 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 3 Nov 2025 11:50:15 +0000 Subject: [PATCH 517/867] net: stmmac: add support for configuring the phy_intf_sel inputs When dwmac is synthesised with support for multiple PHY interfaces, the core provides phy_intf_sel inputs, sampled on reset, to configure the PHY facing interface. Use stmmac_get_phy_intf_sel() in core code to determine the dwmac phy_intf_sel input value, and provide a new platform method called with this value just before we issue a soft reset to the dwmac core. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vFt4h-0000000Chos-3wxX@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 34 +++++++++++++++++++ include/linux/stmmac.h | 1 + 2 files changed, 35 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 6d4323d045732..ccf383b355e75 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3102,6 +3102,36 @@ int stmmac_get_phy_intf_sel(phy_interface_t interface) } EXPORT_SYMBOL_GPL(stmmac_get_phy_intf_sel); +static int stmmac_prereset_configure(struct stmmac_priv *priv) +{ + struct plat_stmmacenet_data *plat_dat = priv->plat; + phy_interface_t interface; + int phy_intf_sel, ret; + + if (!plat_dat->set_phy_intf_sel) + return 0; + + interface = plat_dat->phy_interface; + phy_intf_sel = stmmac_get_phy_intf_sel(interface); + if (phy_intf_sel < 0) { + netdev_err(priv->dev, + "failed to get phy_intf_sel for %s: %pe\n", + phy_modes(interface), ERR_PTR(phy_intf_sel)); + return phy_intf_sel; + } + + ret = plat_dat->set_phy_intf_sel(plat_dat->bsp_priv, phy_intf_sel); + if (ret == -EINVAL) + netdev_err(priv->dev, "platform does not support %s\n", + phy_modes(interface)); + else if (ret < 0) + netdev_err(priv->dev, + "platform failed to set interface %s: %pe\n", + phy_modes(interface), ERR_PTR(ret)); + + return ret; +} + /** * stmmac_init_dma_engine - DMA init. * @priv: driver private structure @@ -3128,6 +3158,10 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv) if (priv->extend_desc && (priv->mode == STMMAC_RING_MODE)) priv->plat->dma_cfg->atds = 1; + ret = stmmac_prereset_configure(priv); + if (ret) + return ret; + ret = stmmac_reset(priv, priv->ioaddr); if (ret) { netdev_err(priv->dev, "Failed to reset the dma\n"); diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 151c81c560c8c..48e9f1d4e17e8 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -250,6 +250,7 @@ struct plat_stmmacenet_data { struct stmmac_txq_cfg tx_queues_cfg[MTL_MAX_TX_QUEUES]; void (*get_interfaces)(struct stmmac_priv *priv, void *bsp_priv, unsigned long *interfaces); + int (*set_phy_intf_sel)(void *priv, u8 phy_intf_sel); int (*set_clk_tx_rate)(void *priv, struct clk *clk_tx_i, phy_interface_t interface, int speed); void (*fix_mac_speed)(void *priv, int speed, unsigned int mode); From 8233cc439779eac1d2682d334c1aa6bb6d95120c Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 3 Nov 2025 11:50:21 +0000 Subject: [PATCH 518/867] net: stmmac: imx: convert to PHY_INTF_SEL_xxx Convert dwmac-imx to use the PHY_INTF_SEL_xxx definitions rather than constants via: - ensuring that the prefix for the MASK and value definitions is the same. - using FIELD_PREP() to shift the PHY_INTF_SEL_xxx definition to the appropriate bitfield. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vFt4n-0000000Choy-0IeG@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-imx.c | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index 147fa08d5b6e7..4fbee59e7337c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -23,18 +23,25 @@ #include "stmmac_platform.h" #define GPR_ENET_QOS_INTF_MODE_MASK GENMASK(21, 16) -#define GPR_ENET_QOS_INTF_SEL_MII (0x0 << 16) -#define GPR_ENET_QOS_INTF_SEL_RMII (0x4 << 16) -#define GPR_ENET_QOS_INTF_SEL_RGMII (0x1 << 16) +#define GPR_ENET_QOS_INTF_SEL_MASK GENMASK(20, 16) +#define GPR_ENET_QOS_INTF_SEL_MII FIELD_PREP(GPR_ENET_QOS_INTF_SEL_MASK, \ + PHY_INTF_SEL_GMII_MII) +#define GPR_ENET_QOS_INTF_SEL_RMII FIELD_PREP(GPR_ENET_QOS_INTF_SEL_MASK, \ + PHY_INTF_SEL_RMII) +#define GPR_ENET_QOS_INTF_SEL_RGMII FIELD_PREP(GPR_ENET_QOS_INTF_SEL_MASK, \ + PHY_INTF_SEL_RGMII) #define GPR_ENET_QOS_CLK_GEN_EN (0x1 << 19) #define GPR_ENET_QOS_CLK_TX_CLK_SEL (0x1 << 20) #define GPR_ENET_QOS_RGMII_EN (0x1 << 21) #define MX93_GPR_ENET_QOS_INTF_MODE_MASK GENMASK(3, 0) -#define MX93_GPR_ENET_QOS_INTF_MASK GENMASK(3, 1) -#define MX93_GPR_ENET_QOS_INTF_SEL_MII (0x0 << 1) -#define MX93_GPR_ENET_QOS_INTF_SEL_RMII (0x4 << 1) -#define MX93_GPR_ENET_QOS_INTF_SEL_RGMII (0x1 << 1) +#define MX93_GPR_ENET_QOS_INTF_SEL_MASK GENMASK(3, 1) +#define MX93_GPR_ENET_QOS_INTF_SEL_MII FIELD_PREP(MX93_GPR_ENET_QOS_INTF_SEL_MASK, \ + PHY_INTF_SEL_GMII_MII) +#define MX93_GPR_ENET_QOS_INTF_SEL_RMII FIELD_PREP(MX93_GPR_ENET_QOS_INTF_SEL_MASK, \ + PHY_INTF_SEL_RMII) +#define MX93_GPR_ENET_QOS_INTF_SEL_RGMII FIELD_PREP(MX93_GPR_ENET_QOS_INTF_SEL_MASK, \ + PHY_INTF_SEL_RGMII) #define MX93_GPR_ENET_QOS_CLK_GEN_EN (0x1 << 0) #define MX93_GPR_ENET_QOS_CLK_SEL_MASK BIT_MASK(0) #define MX93_GPR_CLK_SEL_OFFSET (4) @@ -241,7 +248,7 @@ static void imx93_dwmac_fix_speed(void *priv, int speed, unsigned int mode) if (regmap_read(dwmac->intf_regmap, dwmac->intf_reg_off, &iface)) return; - iface &= MX93_GPR_ENET_QOS_INTF_MASK; + iface &= MX93_GPR_ENET_QOS_INTF_SEL_MASK; if (iface != MX93_GPR_ENET_QOS_INTF_SEL_RGMII) return; From d73c1dccfb9909f0e2d517af887fe414ab421cea Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 3 Nov 2025 11:50:26 +0000 Subject: [PATCH 519/867] net: stmmac: imx: use FIELD_PREP()/FIELD_GET() for PHY_INTF_SEL_x Use FIELD_PREP()/FIELD_GET() in the functions to construct the PHY interface selection bitfield or to extract its value. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vFt4s-0000000Chp4-0kwf@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-imx.c | 44 +++++++++---------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index 4fbee59e7337c..f1cfccd4269c3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -24,24 +24,12 @@ #define GPR_ENET_QOS_INTF_MODE_MASK GENMASK(21, 16) #define GPR_ENET_QOS_INTF_SEL_MASK GENMASK(20, 16) -#define GPR_ENET_QOS_INTF_SEL_MII FIELD_PREP(GPR_ENET_QOS_INTF_SEL_MASK, \ - PHY_INTF_SEL_GMII_MII) -#define GPR_ENET_QOS_INTF_SEL_RMII FIELD_PREP(GPR_ENET_QOS_INTF_SEL_MASK, \ - PHY_INTF_SEL_RMII) -#define GPR_ENET_QOS_INTF_SEL_RGMII FIELD_PREP(GPR_ENET_QOS_INTF_SEL_MASK, \ - PHY_INTF_SEL_RGMII) #define GPR_ENET_QOS_CLK_GEN_EN (0x1 << 19) #define GPR_ENET_QOS_CLK_TX_CLK_SEL (0x1 << 20) #define GPR_ENET_QOS_RGMII_EN (0x1 << 21) #define MX93_GPR_ENET_QOS_INTF_MODE_MASK GENMASK(3, 0) #define MX93_GPR_ENET_QOS_INTF_SEL_MASK GENMASK(3, 1) -#define MX93_GPR_ENET_QOS_INTF_SEL_MII FIELD_PREP(MX93_GPR_ENET_QOS_INTF_SEL_MASK, \ - PHY_INTF_SEL_GMII_MII) -#define MX93_GPR_ENET_QOS_INTF_SEL_RMII FIELD_PREP(MX93_GPR_ENET_QOS_INTF_SEL_MASK, \ - PHY_INTF_SEL_RMII) -#define MX93_GPR_ENET_QOS_INTF_SEL_RGMII FIELD_PREP(MX93_GPR_ENET_QOS_INTF_SEL_MASK, \ - PHY_INTF_SEL_RGMII) #define MX93_GPR_ENET_QOS_CLK_GEN_EN (0x1 << 0) #define MX93_GPR_ENET_QOS_CLK_SEL_MASK BIT_MASK(0) #define MX93_GPR_CLK_SEL_OFFSET (4) @@ -77,22 +65,24 @@ struct imx_priv_data { static int imx8mp_set_intf_mode(struct plat_stmmacenet_data *plat_dat) { struct imx_priv_data *dwmac = plat_dat->bsp_priv; + u8 phy_intf_sel; int val; switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_MII: - val = GPR_ENET_QOS_INTF_SEL_MII; + phy_intf_sel = PHY_INTF_SEL_GMII_MII; + val = 0; break; case PHY_INTERFACE_MODE_RMII: - val = GPR_ENET_QOS_INTF_SEL_RMII; - val |= (dwmac->rmii_refclk_ext ? 0 : GPR_ENET_QOS_CLK_TX_CLK_SEL); + phy_intf_sel = PHY_INTF_SEL_RMII; + val = dwmac->rmii_refclk_ext ? 0 : GPR_ENET_QOS_CLK_TX_CLK_SEL; break; case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_TXID: - val = GPR_ENET_QOS_INTF_SEL_RGMII | - GPR_ENET_QOS_RGMII_EN; + phy_intf_sel = PHY_INTF_SEL_RGMII; + val = GPR_ENET_QOS_RGMII_EN; break; default: pr_debug("imx dwmac doesn't support %s interface\n", @@ -100,7 +90,9 @@ static int imx8mp_set_intf_mode(struct plat_stmmacenet_data *plat_dat) return -EINVAL; } - val |= GPR_ENET_QOS_CLK_GEN_EN; + val |= FIELD_PREP(GPR_ENET_QOS_INTF_SEL_MASK, phy_intf_sel) | + GPR_ENET_QOS_CLK_GEN_EN; + return regmap_update_bits(dwmac->intf_regmap, dwmac->intf_reg_off, GPR_ENET_QOS_INTF_MODE_MASK, val); }; @@ -117,11 +109,12 @@ imx8dxl_set_intf_mode(struct plat_stmmacenet_data *plat_dat) static int imx93_set_intf_mode(struct plat_stmmacenet_data *plat_dat) { struct imx_priv_data *dwmac = plat_dat->bsp_priv; + u8 phy_intf_sel; int val, ret; switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_MII: - val = MX93_GPR_ENET_QOS_INTF_SEL_MII; + phy_intf_sel = PHY_INTF_SEL_GMII_MII; break; case PHY_INTERFACE_MODE_RMII: if (dwmac->rmii_refclk_ext) { @@ -132,13 +125,13 @@ static int imx93_set_intf_mode(struct plat_stmmacenet_data *plat_dat) if (ret) return ret; } - val = MX93_GPR_ENET_QOS_INTF_SEL_RMII; + phy_intf_sel = PHY_INTF_SEL_RMII; break; case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_TXID: - val = MX93_GPR_ENET_QOS_INTF_SEL_RGMII; + phy_intf_sel = PHY_INTF_SEL_RGMII; break; default: dev_dbg(dwmac->dev, "imx dwmac doesn't support %s interface\n", @@ -146,7 +139,9 @@ static int imx93_set_intf_mode(struct plat_stmmacenet_data *plat_dat) return -EINVAL; } - val |= MX93_GPR_ENET_QOS_CLK_GEN_EN; + val = FIELD_PREP(MX93_GPR_ENET_QOS_INTF_SEL_MASK, phy_intf_sel) | + MX93_GPR_ENET_QOS_CLK_GEN_EN; + return regmap_update_bits(dwmac->intf_regmap, dwmac->intf_reg_off, MX93_GPR_ENET_QOS_INTF_MODE_MASK, val); }; @@ -248,8 +243,8 @@ static void imx93_dwmac_fix_speed(void *priv, int speed, unsigned int mode) if (regmap_read(dwmac->intf_regmap, dwmac->intf_reg_off, &iface)) return; - iface &= MX93_GPR_ENET_QOS_INTF_SEL_MASK; - if (iface != MX93_GPR_ENET_QOS_INTF_SEL_RGMII) + if (FIELD_GET(MX93_GPR_ENET_QOS_INTF_SEL_MASK, iface) != + PHY_INTF_SEL_RGMII) return; old_ctrl = readl(dwmac->base_addr + MAC_CTRL_REG); @@ -262,6 +257,7 @@ static void imx93_dwmac_fix_speed(void *priv, int speed, unsigned int mode) readl(dwmac->base_addr + MAC_CTRL_REG); usleep_range(10, 20); + iface &= MX93_GPR_ENET_QOS_INTF_SEL_MASK; iface |= MX93_GPR_ENET_QOS_CLK_GEN_EN; regmap_update_bits(dwmac->intf_regmap, dwmac->intf_reg_off, MX93_GPR_ENET_QOS_INTF_MODE_MASK, iface); From c012710c14a70dfa21691e2542d18dd4b621c518 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 3 Nov 2025 11:50:31 +0000 Subject: [PATCH 520/867] net: stmmac: imx: use stmmac_get_phy_intf_sel() i.MX implementations other than IMX8DXL involve setting the dwmac core phy_intf_sel input. Use stmmac_get_phy_intf_sel() to decode the PHY interface mode to the phy_intf_sel value, validating the result, and passing it into the implementation specific .set_intf_mode() method rather than each .set_intf_mode() method doing this. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vFt4x-0000000ChpA-1Edr@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-imx.c | 43 +++++++++++-------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index f1cfccd4269c3..dc28486a7af05 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -45,7 +45,8 @@ struct imx_dwmac_ops { bool mac_rgmii_txclk_auto_adj; int (*fix_soc_reset)(struct stmmac_priv *priv, void __iomem *ioaddr); - int (*set_intf_mode)(struct plat_stmmacenet_data *plat_dat); + int (*set_intf_mode)(struct plat_stmmacenet_data *plat_dat, + u8 phy_intf_sel); void (*fix_mac_speed)(void *priv, int speed, unsigned int mode); }; @@ -62,26 +63,23 @@ struct imx_priv_data { struct plat_stmmacenet_data *plat_dat; }; -static int imx8mp_set_intf_mode(struct plat_stmmacenet_data *plat_dat) +static int imx8mp_set_intf_mode(struct plat_stmmacenet_data *plat_dat, + u8 phy_intf_sel) { struct imx_priv_data *dwmac = plat_dat->bsp_priv; - u8 phy_intf_sel; int val; switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_MII: - phy_intf_sel = PHY_INTF_SEL_GMII_MII; val = 0; break; case PHY_INTERFACE_MODE_RMII: - phy_intf_sel = PHY_INTF_SEL_RMII; val = dwmac->rmii_refclk_ext ? 0 : GPR_ENET_QOS_CLK_TX_CLK_SEL; break; case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_TXID: - phy_intf_sel = PHY_INTF_SEL_RGMII; val = GPR_ENET_QOS_RGMII_EN; break; default: @@ -98,7 +96,8 @@ static int imx8mp_set_intf_mode(struct plat_stmmacenet_data *plat_dat) }; static int -imx8dxl_set_intf_mode(struct plat_stmmacenet_data *plat_dat) +imx8dxl_set_intf_mode(struct plat_stmmacenet_data *plat_dat, + u8 phy_intf_sel) { int ret = 0; @@ -106,16 +105,13 @@ imx8dxl_set_intf_mode(struct plat_stmmacenet_data *plat_dat) return ret; } -static int imx93_set_intf_mode(struct plat_stmmacenet_data *plat_dat) +static int imx93_set_intf_mode(struct plat_stmmacenet_data *plat_dat, + u8 phy_intf_sel) { struct imx_priv_data *dwmac = plat_dat->bsp_priv; - u8 phy_intf_sel; int val, ret; switch (plat_dat->phy_interface) { - case PHY_INTERFACE_MODE_MII: - phy_intf_sel = PHY_INTF_SEL_GMII_MII; - break; case PHY_INTERFACE_MODE_RMII: if (dwmac->rmii_refclk_ext) { ret = regmap_clear_bits(dwmac->intf_regmap, @@ -125,13 +121,12 @@ static int imx93_set_intf_mode(struct plat_stmmacenet_data *plat_dat) if (ret) return ret; } - phy_intf_sel = PHY_INTF_SEL_RMII; break; + case PHY_INTERFACE_MODE_MII: case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_TXID: - phy_intf_sel = PHY_INTF_SEL_RGMII; break; default: dev_dbg(dwmac->dev, "imx dwmac doesn't support %s interface\n", @@ -176,12 +171,24 @@ static int imx_dwmac_init(struct platform_device *pdev, void *priv) { struct plat_stmmacenet_data *plat_dat; struct imx_priv_data *dwmac = priv; - int ret; - - plat_dat = dwmac->plat_dat; + phy_interface_t interface; + int phy_intf_sel, ret; if (dwmac->ops->set_intf_mode) { - ret = dwmac->ops->set_intf_mode(plat_dat); + plat_dat = dwmac->plat_dat; + interface = plat_dat->phy_interface; + + phy_intf_sel = stmmac_get_phy_intf_sel(interface); + if (phy_intf_sel != PHY_INTF_SEL_GMII_MII && + phy_intf_sel != PHY_INTF_SEL_RGMII && + phy_intf_sel != PHY_INTF_SEL_RMII) { + dev_dbg(dwmac->dev, + "imx dwmac doesn't support %s interface\n", + phy_modes(interface)); + return phy_intf_sel < 0 ? phy_intf_sel : -EINVAL; + } + + ret = dwmac->ops->set_intf_mode(plat_dat, phy_intf_sel); if (ret) return ret; } From 35103babce3036058cd9ed8674c98e9ab397d715 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 3 Nov 2025 11:50:36 +0000 Subject: [PATCH 521/867] net: stmmac: imx: simplify set_intf_mode() implementations Simplify the set_intf_mode() implementations, testing the phy_intf_sel value rather than the PHY interface mode. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vFt52-0000000ChpG-1bsd@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-imx.c | 67 ++++++------------- 1 file changed, 19 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index dc28486a7af05..d69be9de44682 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -67,29 +67,15 @@ static int imx8mp_set_intf_mode(struct plat_stmmacenet_data *plat_dat, u8 phy_intf_sel) { struct imx_priv_data *dwmac = plat_dat->bsp_priv; - int val; - - switch (plat_dat->phy_interface) { - case PHY_INTERFACE_MODE_MII: - val = 0; - break; - case PHY_INTERFACE_MODE_RMII: - val = dwmac->rmii_refclk_ext ? 0 : GPR_ENET_QOS_CLK_TX_CLK_SEL; - break; - case PHY_INTERFACE_MODE_RGMII: - case PHY_INTERFACE_MODE_RGMII_ID: - case PHY_INTERFACE_MODE_RGMII_RXID: - case PHY_INTERFACE_MODE_RGMII_TXID: - val = GPR_ENET_QOS_RGMII_EN; - break; - default: - pr_debug("imx dwmac doesn't support %s interface\n", - phy_modes(plat_dat->phy_interface)); - return -EINVAL; - } + unsigned int val; - val |= FIELD_PREP(GPR_ENET_QOS_INTF_SEL_MASK, phy_intf_sel) | - GPR_ENET_QOS_CLK_GEN_EN; + val = FIELD_PREP(GPR_ENET_QOS_INTF_SEL_MASK, phy_intf_sel) | + GPR_ENET_QOS_CLK_GEN_EN; + + if (phy_intf_sel == PHY_INTF_SEL_RMII && !dwmac->rmii_refclk_ext) + val |= GPR_ENET_QOS_CLK_TX_CLK_SEL; + else if (phy_intf_sel == PHY_INTF_SEL_RGMII) + val |= GPR_ENET_QOS_RGMII_EN; return regmap_update_bits(dwmac->intf_regmap, dwmac->intf_reg_off, GPR_ENET_QOS_INTF_MODE_MASK, val); @@ -99,39 +85,24 @@ static int imx8dxl_set_intf_mode(struct plat_stmmacenet_data *plat_dat, u8 phy_intf_sel) { - int ret = 0; - /* TBD: depends on imx8dxl scu interfaces to be upstreamed */ - return ret; + return 0; } static int imx93_set_intf_mode(struct plat_stmmacenet_data *plat_dat, u8 phy_intf_sel) { struct imx_priv_data *dwmac = plat_dat->bsp_priv; - int val, ret; - - switch (plat_dat->phy_interface) { - case PHY_INTERFACE_MODE_RMII: - if (dwmac->rmii_refclk_ext) { - ret = regmap_clear_bits(dwmac->intf_regmap, - dwmac->intf_reg_off + - MX93_GPR_CLK_SEL_OFFSET, - MX93_GPR_ENET_QOS_CLK_SEL_MASK); - if (ret) - return ret; - } - break; - case PHY_INTERFACE_MODE_MII: - case PHY_INTERFACE_MODE_RGMII: - case PHY_INTERFACE_MODE_RGMII_ID: - case PHY_INTERFACE_MODE_RGMII_RXID: - case PHY_INTERFACE_MODE_RGMII_TXID: - break; - default: - dev_dbg(dwmac->dev, "imx dwmac doesn't support %s interface\n", - phy_modes(plat_dat->phy_interface)); - return -EINVAL; + unsigned int val; + int ret; + + if (phy_intf_sel == PHY_INTF_SEL_RMII && dwmac->rmii_refclk_ext) { + ret = regmap_clear_bits(dwmac->intf_regmap, + dwmac->intf_reg_off + + MX93_GPR_CLK_SEL_OFFSET, + MX93_GPR_ENET_QOS_CLK_SEL_MASK); + if (ret) + return ret; } val = FIELD_PREP(MX93_GPR_ENET_QOS_INTF_SEL_MASK, phy_intf_sel) | From 38cd4e84b369c11680966fdea129e11dbb28a6ec Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 3 Nov 2025 11:50:41 +0000 Subject: [PATCH 522/867] net: stmmac: imx: cleanup arguments for set_intf_mode() method Pass the imx_priv_data instead of the plat_stmmacenet_data into the set_intf_mode() SoC specific methods. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vFt57-0000000ChpL-25kS@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-imx.c | 22 +++++++------------ 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index d69be9de44682..ae1b73e1bcb2f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -39,14 +39,15 @@ #define RMII_RESET_SPEED (0x3 << 14) #define CTRL_SPEED_MASK GENMASK(15, 14) +struct imx_priv_data; + struct imx_dwmac_ops { u32 addr_width; u32 flags; bool mac_rgmii_txclk_auto_adj; int (*fix_soc_reset)(struct stmmac_priv *priv, void __iomem *ioaddr); - int (*set_intf_mode)(struct plat_stmmacenet_data *plat_dat, - u8 phy_intf_sel); + int (*set_intf_mode)(struct imx_priv_data *dwmac, u8 phy_intf_sel); void (*fix_mac_speed)(void *priv, int speed, unsigned int mode); }; @@ -63,10 +64,8 @@ struct imx_priv_data { struct plat_stmmacenet_data *plat_dat; }; -static int imx8mp_set_intf_mode(struct plat_stmmacenet_data *plat_dat, - u8 phy_intf_sel) +static int imx8mp_set_intf_mode(struct imx_priv_data *dwmac, u8 phy_intf_sel) { - struct imx_priv_data *dwmac = plat_dat->bsp_priv; unsigned int val; val = FIELD_PREP(GPR_ENET_QOS_INTF_SEL_MASK, phy_intf_sel) | @@ -82,17 +81,14 @@ static int imx8mp_set_intf_mode(struct plat_stmmacenet_data *plat_dat, }; static int -imx8dxl_set_intf_mode(struct plat_stmmacenet_data *plat_dat, - u8 phy_intf_sel) +imx8dxl_set_intf_mode(struct imx_priv_data *dwmac, u8 phy_intf_sel) { /* TBD: depends on imx8dxl scu interfaces to be upstreamed */ return 0; } -static int imx93_set_intf_mode(struct plat_stmmacenet_data *plat_dat, - u8 phy_intf_sel) +static int imx93_set_intf_mode(struct imx_priv_data *dwmac, u8 phy_intf_sel) { - struct imx_priv_data *dwmac = plat_dat->bsp_priv; unsigned int val; int ret; @@ -140,14 +136,12 @@ static int imx_dwmac_clks_config(void *priv, bool enabled) static int imx_dwmac_init(struct platform_device *pdev, void *priv) { - struct plat_stmmacenet_data *plat_dat; struct imx_priv_data *dwmac = priv; phy_interface_t interface; int phy_intf_sel, ret; if (dwmac->ops->set_intf_mode) { - plat_dat = dwmac->plat_dat; - interface = plat_dat->phy_interface; + interface = dwmac->plat_dat->phy_interface; phy_intf_sel = stmmac_get_phy_intf_sel(interface); if (phy_intf_sel != PHY_INTF_SEL_GMII_MII && @@ -159,7 +153,7 @@ static int imx_dwmac_init(struct platform_device *pdev, void *priv) return phy_intf_sel < 0 ? phy_intf_sel : -EINVAL; } - ret = dwmac->ops->set_intf_mode(plat_dat, phy_intf_sel); + ret = dwmac->ops->set_intf_mode(dwmac, phy_intf_sel); if (ret) return ret; } From eaca1a4dc51e5e4979e45a4ad72a1c2a88a80a72 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 3 Nov 2025 11:50:46 +0000 Subject: [PATCH 523/867] net: stmmac: imx: use ->set_phy_intf_sel() Rather than placing the phy_intf_sel() setup in the ->init() method, move it to the new ->set_phy_intf_sel() method. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vFt5C-0000000ChpR-2kAB@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-imx.c | 38 +++++-------------- 1 file changed, 10 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index ae1b73e1bcb2f..db288fbd5a4df 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -134,36 +134,19 @@ static int imx_dwmac_clks_config(void *priv, bool enabled) return ret; } -static int imx_dwmac_init(struct platform_device *pdev, void *priv) +static int imx_set_phy_intf_sel(void *bsp_priv, u8 phy_intf_sel) { - struct imx_priv_data *dwmac = priv; - phy_interface_t interface; - int phy_intf_sel, ret; - - if (dwmac->ops->set_intf_mode) { - interface = dwmac->plat_dat->phy_interface; - - phy_intf_sel = stmmac_get_phy_intf_sel(interface); - if (phy_intf_sel != PHY_INTF_SEL_GMII_MII && - phy_intf_sel != PHY_INTF_SEL_RGMII && - phy_intf_sel != PHY_INTF_SEL_RMII) { - dev_dbg(dwmac->dev, - "imx dwmac doesn't support %s interface\n", - phy_modes(interface)); - return phy_intf_sel < 0 ? phy_intf_sel : -EINVAL; - } + struct imx_priv_data *dwmac = bsp_priv; - ret = dwmac->ops->set_intf_mode(dwmac, phy_intf_sel); - if (ret) - return ret; - } + if (!dwmac->ops->set_intf_mode) + return 0; - return 0; -} + if (phy_intf_sel != PHY_INTF_SEL_GMII_MII && + phy_intf_sel != PHY_INTF_SEL_RGMII && + phy_intf_sel != PHY_INTF_SEL_RMII) + return -EINVAL; -static void imx_dwmac_exit(struct platform_device *pdev, void *priv) -{ - /* nothing to do now */ + return dwmac->ops->set_intf_mode(dwmac, phy_intf_sel); } static int imx_dwmac_set_clk_tx_rate(void *bsp_priv, struct clk *clk_tx_i, @@ -342,8 +325,7 @@ static int imx_dwmac_probe(struct platform_device *pdev) plat_dat->tx_queues_cfg[i].tbs_en = 1; plat_dat->host_dma_width = dwmac->ops->addr_width; - plat_dat->init = imx_dwmac_init; - plat_dat->exit = imx_dwmac_exit; + plat_dat->set_phy_intf_sel = imx_set_phy_intf_sel; plat_dat->clks_config = imx_dwmac_clks_config; plat_dat->bsp_priv = dwmac; dwmac->plat_dat = plat_dat; From f4b2786fb14bef2b16c66be076e41863da1e511b Mon Sep 17 00:00:00 2001 From: Chu Guangqing Date: Mon, 3 Nov 2025 15:43:05 +0800 Subject: [PATCH 524/867] virtio_net: Fix a typo error in virtio_net Fix the spelling error of "separate". Signed-off-by: Chu Guangqing Reviewed-by: Jacob Keller Acked-by: Michael S. Tsirkin Link: https://patch.msgid.link/20251103074305.4727-1-chuguangqing@inspur.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 8e8a179aaa491..1e6f5e650f115 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3760,7 +3760,7 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) * (2) no user configuration. * * During rss command processing, device updates queue_pairs using rss.max_tx_vq. That is, - * the device updates queue_pairs together with rss, so we can skip the sperate queue_pairs + * the device updates queue_pairs together with rss, so we can skip the separate queue_pairs * update (VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET below) and return directly. */ if (vi->has_rss && !netif_is_rxfh_configured(dev)) { From 2428803d5eef1fd8451a6c4ba41d17cd199f3715 Mon Sep 17 00:00:00 2001 From: Chu Guangqing Date: Mon, 3 Nov 2025 14:05:04 +0800 Subject: [PATCH 525/867] gtp: Fix a typo error for size Fix the spelling error of "size". Signed-off-by: Chu Guangqing Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20251103060504.3524-1-chuguangqing@inspur.com Signed-off-by: Jakub Kicinski --- drivers/net/gtp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 5cb59d72bc820..4213c3b2d532b 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -633,7 +633,7 @@ static void gtp1u_build_echo_msg(struct gtp1_header_long *hdr, __u8 msg_type) hdr->tid = 0; /* seq, npdu and next should be counted to the length of the GTP packet - * that's why szie of gtp1_header should be subtracted, + * that's why size of gtp1_header should be subtracted, * not size of gtp1_header_long. */ From 9781642e58903e52f3c607e957e5220e95e792b6 Mon Sep 17 00:00:00 2001 From: Chu Guangqing Date: Mon, 3 Nov 2025 13:53:51 +0800 Subject: [PATCH 526/867] veth: Fix a typo error in veth Fix a spellling error for resources Signed-off-by: Chu Guangqing Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20251103055351.3150-1-chuguangqing@inspur.com Signed-off-by: Jakub Kicinski --- drivers/net/veth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index a3046142cb8e2..87a63c4bee777 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1323,7 +1323,7 @@ static int veth_set_channels(struct net_device *dev, if (peer) netif_carrier_off(peer); - /* try to allocate new resurces, as needed*/ + /* try to allocate new resources, as needed*/ err = veth_enable_range_safe(dev, old_rx_count, new_rx_count); if (err) goto out; From 96c68954cd3b65f321d40b9078cdb49a26bdaf9a Mon Sep 17 00:00:00 2001 From: Chu Guangqing Date: Mon, 3 Nov 2025 13:44:43 +0800 Subject: [PATCH 527/867] net: sungem_phy: Fix a typo error in sungem_phy Fix a spelling mistakes for regularly Signed-off-by: Chu Guangqing Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251103054443.2878-1-chuguangqing@inspur.com Signed-off-by: Jakub Kicinski --- drivers/net/sungem_phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c index 55aa8d0c8e1f2..c10198d44576e 100644 --- a/drivers/net/sungem_phy.c +++ b/drivers/net/sungem_phy.c @@ -1165,7 +1165,7 @@ int sungem_phy_probe(struct mii_phy *phy, int mii_id) int i; /* We do not reset the mii_phy structure as the driver - * may re-probe the PHY regulary + * may re-probe the PHY regularly */ phy->mii_id = mii_id; From 52665fcc2241f8f9a17543d9a6531b1a1b029bde Mon Sep 17 00:00:00 2001 From: Chu Guangqing Date: Mon, 3 Nov 2025 11:22:12 +0800 Subject: [PATCH 528/867] xen/netfront: Comment Correction: Fix Spelling Error and Description of Queue Quantity Rules The original comments contained spelling errors and incomplete logical descriptions, which could easily lead to misunderstandings of the code logic. The specific modifications are as follows: Correct the spelling error by changing "inut max" to "but not exceed the maximum limit"; Add the note "If the user has not specified a value, the default maximum limit is 8" to clarify the default value logic; Improve the coherence of the statement to make the queue quantity rules clearer. After the modification, the comments can accurately reflect the code behavior of "taking the smaller value between the number of CPUs and the default maximum limit of 8 for the number of queues", enhancing code maintainability. Signed-off-by: Chu Guangqing Reviewed-by: Juergen Gross Link: https://patch.msgid.link/20251103032212.2462-1-chuguangqing@inspur.com Signed-off-by: Jakub Kicinski --- drivers/net/xen-netfront.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index a11a0e9494005..7c2220366623e 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -2696,8 +2696,9 @@ static int __init netif_init(void) pr_info("Initialising Xen virtual ethernet driver\n"); - /* Allow as many queues as there are CPUs inut max. 8 if user has not - * specified a value. + /* Allow the number of queues to match the number of CPUs, but not exceed + * the maximum limit. If the user has not specified a value, the default + * maximum limit is 8. */ if (xennet_max_queues == 0) xennet_max_queues = min_t(unsigned int, MAX_QUEUES_DEFAULT, From 091400a5d411ee7398095ba832361eb12b345f3d Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 30 Oct 2025 15:32:33 +0200 Subject: [PATCH 529/867] net/mlx5e: Enhance function structures for self loopback prevention application The re-application of self loopback prevention attributes in TIRs is necessary in old firmwares (where tis_tir_td_order cap is cleared) after recreation of SQs. However, this is not needed in new firmware with tis_tir_td_order=1. As a preparation patch, enhance the function structures to differentiate between an explicit loopback prevention configuration apply, and the re-apply operation required by old firmware. Loopback selftests should now call mlx5e_modify_tirs_lb() directly, as their use case is not related to the firmware limitation. Signed-off-by: Tariq Toukan Reviewed-by: Carolina Jubran Reviewed-by: Simon Horman Link: https://patch.msgid.link/1761831159-1013140-2-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 +++- .../net/ethernet/mellanox/mlx5/core/en_common.c | 16 ++++++++++++---- .../net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- .../ethernet/mellanox/mlx5/core/en_selftest.c | 4 ++-- .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 2 +- 5 files changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index fd107906bc28b..4a29333285c0a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1156,7 +1156,9 @@ extern const struct ethtool_ops mlx5e_ethtool_ops; int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey); int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev, bool create_tises); void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); -int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, +int mlx5e_modify_tirs_lb(struct mlx5_core_dev *mdev, bool enable_uc_lb, + bool enable_mc_lb); +int mlx5e_refresh_tirs(struct mlx5_core_dev *mdev, bool enable_uc_lb, bool enable_mc_lb); void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 30424ccad584f..376a018b2db12 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -247,10 +247,9 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) memset(res, 0, sizeof(*res)); } -int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, - bool enable_mc_lb) +int mlx5e_modify_tirs_lb(struct mlx5_core_dev *mdev, bool enable_uc_lb, + bool enable_mc_lb) { - struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_tir *tir; u8 lb_flags = 0; int err = 0; @@ -285,7 +284,16 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, kvfree(in); if (err) - netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err); + mlx5_core_err(mdev, + "modify tir(0x%x) enable_lb uc(%d) mc(%d) failed, %d\n", + tirn, + enable_uc_lb, enable_mc_lb, err); return err; } + +int mlx5e_refresh_tirs(struct mlx5_core_dev *mdev, bool enable_uc_lb, + bool enable_mc_lb) +{ + return mlx5e_modify_tirs_lb(mdev, enable_uc_lb, enable_mc_lb); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6b905848fe86e..f0d7a61e014b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -6136,7 +6136,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) static int mlx5e_update_nic_rx(struct mlx5e_priv *priv) { - return mlx5e_refresh_tirs(priv, false, false); + return mlx5e_refresh_tirs(priv->mdev, false, false); } static const struct mlx5e_profile mlx5e_nic_profile = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 2f7a543feca62..fcad464bc4d58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -214,7 +214,7 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv, return err; } - err = mlx5e_refresh_tirs(priv, true, false); + err = mlx5e_modify_tirs_lb(priv->mdev, true, false); if (err) goto out; @@ -243,7 +243,7 @@ static void mlx5e_test_loopback_cleanup(struct mlx5e_priv *priv, mlx5_nic_vport_update_local_lb(priv->mdev, false); dev_remove_pack(&lbtp->pt); - mlx5e_refresh_tirs(priv, false, false); + mlx5e_modify_tirs_lb(priv->mdev, false, false); } static int mlx5e_cond_loopback(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 906b1fbc27aa0..976347ac1faf5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -334,7 +334,7 @@ void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, u32 qpn) int mlx5i_update_nic_rx(struct mlx5e_priv *priv) { - return mlx5e_refresh_tirs(priv, true, true); + return mlx5e_refresh_tirs(priv->mdev, true, true); } int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn) From 5c51a86122b20326229c6c9dff4a92c186cbb6bf Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 30 Oct 2025 15:32:34 +0200 Subject: [PATCH 530/867] net/mlx5e: Use TIR API in mlx5e_modify_tirs_lb() Extend the TIR API and use it in mlx5e_modify_tirs_lb() instead of the explicit modify_tir code. Signed-off-by: Tariq Toukan Reviewed-by: Carolina Jubran Reviewed-by: Dragos Tatulea Reviewed-by: Simon Horman Link: https://patch.msgid.link/1761831159-1013140-3-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/en/tir.c | 29 +++++++++++-- .../net/ethernet/mellanox/mlx5/core/en/tir.h | 3 ++ .../ethernet/mellanox/mlx5/core/en_common.c | 41 +++++++------------ 3 files changed, 43 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c index 19499072f67f6..0b55e77f19c8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c @@ -146,6 +146,31 @@ void mlx5e_tir_builder_build_direct(struct mlx5e_tir_builder *builder) MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); } +static void mlx5e_tir_context_self_lb_block(void *tirc, bool enable_uc_lb, + bool enable_mc_lb) +{ + u8 lb_flags = 0; + + if (enable_uc_lb) + lb_flags = MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; + if (enable_mc_lb) + lb_flags |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST; + + MLX5_SET(tirc, tirc, self_lb_block, lb_flags); +} + +void mlx5e_tir_builder_build_self_lb_block(struct mlx5e_tir_builder *builder, + bool enable_uc_lb, + bool enable_mc_lb) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + + if (builder->modify) + MLX5_SET(modify_tir_in, builder->in, bitmask.self_lb_en, 1); + + mlx5e_tir_context_self_lb_block(tirc, enable_uc_lb, enable_mc_lb); +} + void mlx5e_tir_builder_build_tls(struct mlx5e_tir_builder *builder) { void *tirc = mlx5e_tir_builder_get_tirc(builder); @@ -153,9 +178,7 @@ void mlx5e_tir_builder_build_tls(struct mlx5e_tir_builder *builder) WARN_ON(builder->modify); MLX5_SET(tirc, tirc, tls_en, 1); - MLX5_SET(tirc, tirc, self_lb_block, - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST | - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST); + mlx5e_tir_context_self_lb_block(tirc, true, true); } int mlx5e_tir_init(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h index e8df3aaf6562f..958eeb959a192 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h @@ -35,6 +35,9 @@ void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder, const struct mlx5e_rss_params_traffic_type *rss_tt, bool inner); void mlx5e_tir_builder_build_direct(struct mlx5e_tir_builder *builder); +void mlx5e_tir_builder_build_self_lb_block(struct mlx5e_tir_builder *builder, + bool enable_uc_lb, + bool enable_mc_lb); void mlx5e_tir_builder_build_tls(struct mlx5e_tir_builder *builder); struct mlx5_core_dev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 376a018b2db12..022a0cf7063cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -250,44 +250,31 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) int mlx5e_modify_tirs_lb(struct mlx5_core_dev *mdev, bool enable_uc_lb, bool enable_mc_lb) { + struct mlx5e_tir_builder *builder; struct mlx5e_tir *tir; - u8 lb_flags = 0; - int err = 0; - u32 tirn = 0; - int inlen; - void *in; + int err = 0; - inlen = MLX5_ST_SZ_BYTES(modify_tir_in); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) + builder = mlx5e_tir_builder_alloc(true); + if (!builder) return -ENOMEM; - if (enable_uc_lb) - lb_flags = MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; - - if (enable_mc_lb) - lb_flags |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST; - - if (lb_flags) - MLX5_SET(modify_tir_in, in, ctx.self_lb_block, lb_flags); - - MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); + mlx5e_tir_builder_build_self_lb_block(builder, enable_uc_lb, + enable_mc_lb); mutex_lock(&mdev->mlx5e_res.hw_objs.td.list_lock); list_for_each_entry(tir, &mdev->mlx5e_res.hw_objs.td.tirs_list, list) { - tirn = tir->tirn; - err = mlx5_core_modify_tir(mdev, tirn, in); - if (err) + err = mlx5e_tir_modify(tir, builder); + if (err) { + mlx5_core_err(mdev, + "modify tir(0x%x) enable_lb uc(%d) mc(%d) failed, %d\n", + mlx5e_tir_get_tirn(tir), + enable_uc_lb, enable_mc_lb, err); break; + } } mutex_unlock(&mdev->mlx5e_res.hw_objs.td.list_lock); - kvfree(in); - if (err) - mlx5_core_err(mdev, - "modify tir(0x%x) enable_lb uc(%d) mc(%d) failed, %d\n", - tirn, - enable_uc_lb, enable_mc_lb, err); + mlx5e_tir_builder_free(builder); return err; } From 99b002018f6a3dc08c789e2962070d6de7cb3bac Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 30 Oct 2025 15:32:35 +0200 Subject: [PATCH 531/867] net/mlx5e: Allow setting self loopback prevention bits on TIR init Until now, IPoIB was creating TIRs without setting self loopback prevention, then modifying them in activation stage. This is a preparation patch, that will be used by IPoIB to init TIRs properly without the need for following calls of modify_tir. Signed-off-by: Tariq Toukan Reviewed-by: Carolina Jubran Reviewed-by: Simon Horman Link: https://patch.msgid.link/1761831159-1013140-4-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/rss.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en/rss.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c | 7 +++++++ drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h | 1 + 4 files changed, 11 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c index c96cbc4b0dbfa..88b0e1050d1af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c @@ -231,6 +231,8 @@ mlx5e_rss_create_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, rqtn, rss_inner); mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param); rss_tt = mlx5e_rss_get_tt_config(rss, tt); + mlx5e_tir_builder_build_self_lb_block(builder, rss->params.self_lb_blk, + rss->params.self_lb_blk); mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner); err = mlx5e_tir_init(tir, builder, rss->mdev, true); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h index 5fb03cd0a411b..17664757a561c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h @@ -23,6 +23,7 @@ struct mlx5e_rss_init_params { struct mlx5e_rss_params { bool inner_ft_support; u32 drop_rqn; + bool self_lb_blk; }; struct mlx5e_rss_params_traffic_type diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c index ac26a32845d0d..55c117b7d8c4a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -71,6 +71,8 @@ static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res, rss_params = (struct mlx5e_rss_params) { .inner_ft_support = inner_ft_support, .drop_rqn = res->drop_rqn, + .self_lb_blk = + res->features & MLX5E_RX_RES_FEATURE_SELF_LB_BLOCK, }; rss = mlx5e_rss_init(res->mdev, &rss_params, &init_params); @@ -104,6 +106,8 @@ int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 rss_idx, unsigned int in rss_params = (struct mlx5e_rss_params) { .inner_ft_support = inner_ft_support, .drop_rqn = res->drop_rqn, + .self_lb_blk = + res->features & MLX5E_RX_RES_FEATURE_SELF_LB_BLOCK, }; rss = mlx5e_rss_init(res->mdev, &rss_params, &init_params); @@ -346,6 +350,7 @@ static struct mlx5e_rx_res *mlx5e_rx_res_alloc(struct mlx5_core_dev *mdev, unsig static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res) { bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + bool self_lb_blk = res->features & MLX5E_RX_RES_FEATURE_SELF_LB_BLOCK; struct mlx5e_tir_builder *builder; int err = 0; int ix; @@ -376,6 +381,8 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res) mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), inner_ft_support); mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param); + mlx5e_tir_builder_build_self_lb_block(builder, self_lb_blk, + self_lb_blk); mlx5e_tir_builder_build_direct(builder); err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h index 65a857c215e1a..675780120a20d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h @@ -21,6 +21,7 @@ enum mlx5e_rx_res_features { MLX5E_RX_RES_FEATURE_INNER_FT = BIT(0), MLX5E_RX_RES_FEATURE_PTP = BIT(1), MLX5E_RX_RES_FEATURE_MULTI_VHCA = BIT(2), + MLX5E_RX_RES_FEATURE_SELF_LB_BLOCK = BIT(3), }; /* Setup */ From a4c81e72f132b93a3b920196621a7b78c71fb7fc Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 30 Oct 2025 15:32:36 +0200 Subject: [PATCH 532/867] net/mlx5: IPoIB, set self loopback prevention in TIR init In IPoIB, the self loopback prevention configuration apply in activation stage has two roles: fulfill a firmware requirement for old firmware (tis_tir_td_order=0), and update the proper configuration as it was not set in init. Here we set the proper configuration in init, to allow skipping the modify_tirs commands on new firmware in a downstream patch. Signed-off-by: Tariq Toukan Reviewed-by: Carolina Jubran Reviewed-by: Simon Horman Link: https://patch.msgid.link/1761831159-1013140-5-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 976347ac1faf5..0a6003fe60e9f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -427,6 +427,7 @@ static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) static int mlx5i_init_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; + enum mlx5e_rx_res_features features; int err; priv->fs = mlx5e_fs_init(priv->profile, mdev, @@ -445,7 +446,9 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) goto err_destroy_q_counters; } - priv->rx_res = mlx5e_rx_res_create(priv->mdev, 0, priv->max_nch, priv->drop_rq.rqn, + features = MLX5E_RX_RES_FEATURE_SELF_LB_BLOCK; + priv->rx_res = mlx5e_rx_res_create(priv->mdev, features, priv->max_nch, + priv->drop_rq.rqn, &priv->channels.params.packet_merge, priv->channels.params.num_channels); if (IS_ERR(priv->rx_res)) { From 477c352adda4ba0bd80c945ab13165161802239e Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 30 Oct 2025 15:32:37 +0200 Subject: [PATCH 533/867] net/mlx5e: Do not re-apply TIR loopback configuration if not necessary On old firmware, (tis_tir_td_order=0), TIR of a transport domain should either be created after all SQs of the same domain, or TIR.self_lb_en should be reapplied using MODIFY_TIR, for self loopback filtering to function correctly. This is not necessary anymnore on new FW (tis_tir_td_order=1), thus there's no need for calling modify_tir operations after creating a new set of SQs to maintain the self loopback prevention functional. Skip these operations. This saves O(max_num_channels) MODIFY_TIR firmware commands in operations like interface up or channels configuration change. Signed-off-by: Tariq Toukan Reviewed-by: Carolina Jubran Reviewed-by: Simon Horman Link: https://patch.msgid.link/1761831159-1013140-6-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 022a0cf7063cd..5a2ac7b6f2607 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -282,5 +282,8 @@ int mlx5e_modify_tirs_lb(struct mlx5_core_dev *mdev, bool enable_uc_lb, int mlx5e_refresh_tirs(struct mlx5_core_dev *mdev, bool enable_uc_lb, bool enable_mc_lb) { + if (MLX5_CAP_GEN(mdev, tis_tir_td_order)) + return 0; /* refresh not needed */ + return mlx5e_modify_tirs_lb(mdev, enable_uc_lb, enable_mc_lb); } From 911e3a37b024163d8329e3560d6fd5f0f0da2558 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 30 Oct 2025 15:32:38 +0200 Subject: [PATCH 534/867] net/mlx5e: Pass old channels as argument to mlx5e_switch_priv_channels Let the caller function mlx5e_safe_switch_params() maintain a copy of the old channels, and pass it to mlx5e_switch_priv_channels(). This is in preparation for the next patch. Signed-off-by: Tariq Toukan Reviewed-by: Carolina Jubran Reviewed-by: Simon Horman Link: https://patch.msgid.link/1761831159-1013140-7-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f0d7a61e014b0..4adc5a4297454 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3356,12 +3356,12 @@ static int mlx5e_switch_priv_params(struct mlx5e_priv *priv, } static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *old_chs, struct mlx5e_channels *new_chs, mlx5e_fp_preactivate preactivate, void *context) { struct net_device *netdev = priv->netdev; - struct mlx5e_channels old_chs; int carrier_ok; int err = 0; @@ -3370,7 +3370,6 @@ static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv, mlx5e_deactivate_priv_channels(priv); - old_chs = priv->channels; priv->channels = *new_chs; /* New channels are ready to roll, call the preactivate hook if needed @@ -3379,12 +3378,12 @@ static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv, if (preactivate) { err = preactivate(priv, context); if (err) { - priv->channels = old_chs; + priv->channels = *old_chs; goto out; } } - mlx5e_close_channels(&old_chs); + mlx5e_close_channels(old_chs); priv->profile->update_rx(priv); mlx5e_selq_apply(&priv->selq); @@ -3403,16 +3402,20 @@ int mlx5e_safe_switch_params(struct mlx5e_priv *priv, mlx5e_fp_preactivate preactivate, void *context, bool reset) { - struct mlx5e_channels *new_chs; + struct mlx5e_channels *old_chs, *new_chs; int err; reset &= test_bit(MLX5E_STATE_OPENED, &priv->state); if (!reset) return mlx5e_switch_priv_params(priv, params, preactivate, context); + old_chs = kzalloc(sizeof(*old_chs), GFP_KERNEL); new_chs = kzalloc(sizeof(*new_chs), GFP_KERNEL); - if (!new_chs) - return -ENOMEM; + if (!old_chs || !new_chs) { + err = -ENOMEM; + goto err_free_chs; + } + new_chs->params = *params; mlx5e_selq_prepare_params(&priv->selq, &new_chs->params); @@ -3421,11 +3424,15 @@ int mlx5e_safe_switch_params(struct mlx5e_priv *priv, if (err) goto err_cancel_selq; - err = mlx5e_switch_priv_channels(priv, new_chs, preactivate, context); + *old_chs = priv->channels; + + err = mlx5e_switch_priv_channels(priv, old_chs, new_chs, + preactivate, context); if (err) goto err_close; kfree(new_chs); + kfree(old_chs); return 0; err_close: @@ -3433,7 +3440,9 @@ int mlx5e_safe_switch_params(struct mlx5e_priv *priv, err_cancel_selq: mlx5e_selq_cancel(&priv->selq); +err_free_chs: kfree(new_chs); + kfree(old_chs); return err; } From 3b88a535a8e10d83335f04c60aafbdfd37146a01 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 30 Oct 2025 15:32:39 +0200 Subject: [PATCH 535/867] net/mlx5e: Defer channels closure to reduce interface down time Cap bit tis_tir_td_order=1 indicates that an old firmware requirement / limitation no longer exists. When unset, the latency of several firmware commands significantly increases with the presence of high number of co-existing channels (both old and new sets). Hence, we used to close unneeded old channels before invoking those firmware commands. Today, on capable devices, this is no longer the case. Minimize the interface down time by deferring the old channels closure, after the activation of the new ones. Perf numbers: Measured the number of dropped packets in a simple ping flood test, during a configuration change operation, that switches the number of channels from 247 to 248. Before: 71 packets lost After: 15 packets lost, ~80% saving. Signed-off-by: Tariq Toukan Reviewed-by: Carolina Jubran Reviewed-by: Simon Horman Link: https://patch.msgid.link/1761831159-1013140-8-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 4adc5a4297454..7b7a0060979d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3383,7 +3383,8 @@ static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv, } } - mlx5e_close_channels(old_chs); + if (!MLX5_CAP_GEN(priv->mdev, tis_tir_td_order)) + mlx5e_close_channels(old_chs); priv->profile->update_rx(priv); mlx5e_selq_apply(&priv->selq); @@ -3431,6 +3432,9 @@ int mlx5e_safe_switch_params(struct mlx5e_priv *priv, if (err) goto err_close; + if (MLX5_CAP_GEN(priv->mdev, tis_tir_td_order)) + mlx5e_close_channels(old_chs); + kfree(new_chs); kfree(old_chs); return 0; From c3838262b824c71c145cd3668722e99a69bc9cd9 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 31 Oct 2025 14:05:51 +0800 Subject: [PATCH 536/867] virtio_net: fix alignment for virtio_net_hdr_v1_hash Changing alignment of header would mean it's no longer safe to cast a 2 byte aligned pointer between formats. Use two 16 bit fields to make it 2 byte aligned as previously. This fixes the performance regression since commit ("virtio_net: enable gso over UDP tunnel support.") as it uses virtio_net_hdr_v1_hash_tunnel which embeds virtio_net_hdr_v1_hash. Pktgen in guest + XDP_DROP on TAP + vhost_net shows the TX PPS is recovered from 2.4Mpps to 4.45Mpps. Fixes: 56a06bd40fab ("virtio_net: enable gso over UDP tunnel support.") Cc: stable@vger.kernel.org Signed-off-by: Michael S. Tsirkin Signed-off-by: Jason Wang Tested-by: Lei Yang Link: https://patch.msgid.link/20251031060551.126-1-jasowang@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 15 +++++++++++++-- include/linux/virtio_net.h | 3 ++- include/uapi/linux/virtio_net.h | 3 ++- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 8e8a179aaa491..e6e650bc3bc32 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2539,6 +2539,13 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, return NULL; } +static inline u32 +virtio_net_hash_value(const struct virtio_net_hdr_v1_hash *hdr_hash) +{ + return __le16_to_cpu(hdr_hash->hash_value_lo) | + (__le16_to_cpu(hdr_hash->hash_value_hi) << 16); +} + static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, struct sk_buff *skb) { @@ -2565,7 +2572,7 @@ static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, default: rss_hash_type = PKT_HASH_TYPE_NONE; } - skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type); + skb_set_hash(skb, virtio_net_hash_value(hdr_hash), rss_hash_type); } static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, @@ -3311,6 +3318,10 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); + /* Make sure it's safe to cast between formats */ + BUILD_BUG_ON(__alignof__(*hdr) != __alignof__(hdr->hash_hdr)); + BUILD_BUG_ON(__alignof__(*hdr) != __alignof__(hdr->hash_hdr.hdr)); + can_push = vi->any_header_sg && !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; @@ -6750,7 +6761,7 @@ static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, hash_report = VIRTIO_NET_HASH_REPORT_NONE; *rss_type = virtnet_xdp_rss_type[hash_report]; - *hash = __le32_to_cpu(hdr_hash->hash_value); + *hash = virtio_net_hash_value(hdr_hash); return 0; } diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 4d1780848d0e0..b673c31569f32 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -401,7 +401,8 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, if (!tnl_hdr_negotiated) return -EINVAL; - vhdr->hash_hdr.hash_value = 0; + vhdr->hash_hdr.hash_value_lo = 0; + vhdr->hash_hdr.hash_value_hi = 0; vhdr->hash_hdr.hash_report = 0; vhdr->hash_hdr.padding = 0; diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 8bf27ab8bcb4d..1db45b01532b5 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -193,7 +193,8 @@ struct virtio_net_hdr_v1 { struct virtio_net_hdr_v1_hash { struct virtio_net_hdr_v1 hdr; - __le32 hash_value; + __le16 hash_value_lo; + __le16 hash_value_hi; #define VIRTIO_NET_HASH_REPORT_NONE 0 #define VIRTIO_NET_HASH_REPORT_IPv4 1 #define VIRTIO_NET_HASH_REPORT_TCPv4 2 From f88191c7f3618405f1fc5c331a94ebfe601c5b08 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Sat, 1 Nov 2025 18:56:51 +0100 Subject: [PATCH 537/867] mptcp: pm: in-kernel: record fullmesh endp nb Instead of iterating over all endpoints, under RCU read lock, just to check if one of them as the fullmesh flag, we can keep a counter of fullmesh endpoint, similar to what is done with the other flags. This counter is now checked, before iterating over all endpoints. Similar to the other counters, this new one is also exposed. A userspace app can then know when it is being used in a fullmesh mode, with potentially (too) many subflows. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251101-net-next-mptcp-fm-endp-nb-bind-v1-1-b4166772d6bb@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 3 ++- net/mptcp/pm_kernel.c | 38 +++++++++++++++++++++++++++++++++++--- net/mptcp/protocol.h | 1 + net/mptcp/sockopt.c | 2 ++ 4 files changed, 40 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 87cfab874e241..04eea6d1d0a9b 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -70,7 +70,8 @@ struct mptcp_info { __u64 mptcpi_bytes_acked; __u8 mptcpi_subflows_total; __u8 mptcpi_endp_laminar_max; - __u8 reserved[2]; + __u8 mptcpi_endp_fullmesh_max; + __u8 reserved; __u32 mptcpi_last_data_sent; __u32 mptcpi_last_data_recv; __u32 mptcpi_last_ack_recv; diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c index 2ae95476dba35..e2918c68ff023 100644 --- a/net/mptcp/pm_kernel.c +++ b/net/mptcp/pm_kernel.c @@ -22,6 +22,7 @@ struct pm_nl_pernet { u8 endp_signal_max; u8 endp_subflow_max; u8 endp_laminar_max; + u8 endp_fullmesh_max; u8 limit_add_addr_accepted; u8 limit_extra_subflows; u8 next_id; @@ -70,6 +71,14 @@ u8 mptcp_pm_get_endp_laminar_max(const struct mptcp_sock *msk) } EXPORT_SYMBOL_GPL(mptcp_pm_get_endp_laminar_max); +u8 mptcp_pm_get_endp_fullmesh_max(const struct mptcp_sock *msk) +{ + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); + + return READ_ONCE(pernet->endp_fullmesh_max); +} +EXPORT_SYMBOL_GPL(mptcp_pm_get_endp_fullmesh_max); + u8 mptcp_pm_get_limit_add_addr_accepted(const struct mptcp_sock *msk) { struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); @@ -603,9 +612,12 @@ fill_local_addresses_vec(struct mptcp_sock *msk, struct mptcp_addr_info *remote, int i; /* If there is at least one MPTCP endpoint with a fullmesh flag */ - i = fill_local_addresses_vec_fullmesh(msk, remote, locals, c_flag_case); - if (i) - return i; + if (mptcp_pm_get_endp_fullmesh_max(msk)) { + i = fill_local_addresses_vec_fullmesh(msk, remote, locals, + c_flag_case); + if (i) + return i; + } /* If there is at least one MPTCP endpoint with a laminar flag */ if (mptcp_pm_get_endp_laminar_max(msk)) @@ -790,6 +802,10 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, addr_max = pernet->endp_laminar_max; WRITE_ONCE(pernet->endp_laminar_max, addr_max + 1); } + if (entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { + addr_max = pernet->endp_fullmesh_max; + WRITE_ONCE(pernet->endp_fullmesh_max, addr_max + 1); + } pernet->endpoints++; if (!entry->addr.port) @@ -1187,6 +1203,10 @@ int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info) addr_max = pernet->endp_laminar_max; WRITE_ONCE(pernet->endp_laminar_max, addr_max - 1); } + if (entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { + addr_max = pernet->endp_fullmesh_max; + WRITE_ONCE(pernet->endp_fullmesh_max, addr_max - 1); + } pernet->endpoints--; list_del_rcu(&entry->list); @@ -1502,6 +1522,18 @@ int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, changed = (local->flags ^ entry->flags) & mask; entry->flags = (entry->flags & ~mask) | (local->flags & mask); *local = *entry; + + if (changed & MPTCP_PM_ADDR_FLAG_FULLMESH) { + u8 addr_max = pernet->endp_fullmesh_max; + + if (entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) + addr_max++; + else + addr_max--; + + WRITE_ONCE(pernet->endp_fullmesh_max, addr_max); + } + spin_unlock_bh(&pernet->lock); mptcp_pm_nl_set_flags_all(net, local, changed); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 379a88e14e8d2..9a34291757585 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1183,6 +1183,7 @@ void __mptcp_pm_kernel_worker(struct mptcp_sock *msk); u8 mptcp_pm_get_endp_signal_max(const struct mptcp_sock *msk); u8 mptcp_pm_get_endp_subflow_max(const struct mptcp_sock *msk); u8 mptcp_pm_get_endp_laminar_max(const struct mptcp_sock *msk); +u8 mptcp_pm_get_endp_fullmesh_max(const struct mptcp_sock *msk); u8 mptcp_pm_get_limit_add_addr_accepted(const struct mptcp_sock *msk); u8 mptcp_pm_get_limit_extra_subflows(const struct mptcp_sock *msk); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index a28a483858852..de90a2897d2d8 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -982,6 +982,8 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) mptcp_pm_get_endp_subflow_max(msk); info->mptcpi_endp_laminar_max = mptcp_pm_get_endp_laminar_max(msk); + info->mptcpi_endp_fullmesh_max = + mptcp_pm_get_endp_fullmesh_max(msk); } if (__mptcp_check_fallback(msk)) From e461e8a799a2984e9b55f40c65d123a114496dff Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Sat, 1 Nov 2025 18:56:52 +0100 Subject: [PATCH 538/867] mptcp: pm: in kernel: only use fullmesh endp if any Our documentation is saying that the in-kernel PM is only using fullmesh endpoints to establish subflows to announced addresses when at least one endpoint has a fullmesh flag. But this was not totally correct: only fullmesh endpoints were used if at least one endpoint *from the same address family as the received ADD_ADDR* has the fullmesh flag. This is confusing, and it seems clearer not to have differences depending on the address family. So, now, when at least one MPTCP endpoint has a fullmesh flag, the local addresses are picked from all fullmesh endpoints, which might be 0 if there are no endpoints for the correct address family. One selftest needs to be adapted for this behaviour change. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251101-net-next-mptcp-fm-endp-nb-bind-v1-2-b4166772d6bb@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_kernel.c | 10 +++------- tools/testing/selftests/net/mptcp/mptcp_join.sh | 6 +++++- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c index e2918c68ff023..e50721c670d00 100644 --- a/net/mptcp/pm_kernel.c +++ b/net/mptcp/pm_kernel.c @@ -609,15 +609,11 @@ fill_local_addresses_vec(struct mptcp_sock *msk, struct mptcp_addr_info *remote, struct mptcp_pm_local *locals) { bool c_flag_case = remote->id && mptcp_pm_add_addr_c_flag_case(msk); - int i; /* If there is at least one MPTCP endpoint with a fullmesh flag */ - if (mptcp_pm_get_endp_fullmesh_max(msk)) { - i = fill_local_addresses_vec_fullmesh(msk, remote, locals, - c_flag_case); - if (i) - return i; - } + if (mptcp_pm_get_endp_fullmesh_max(msk)) + return fill_local_addresses_vec_fullmesh(msk, remote, locals, + c_flag_case); /* If there is at least one MPTCP endpoint with a laminar flag */ if (mptcp_pm_get_endp_laminar_max(msk)) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 78a1aa4ecff2b..e7a498dd5a468 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2952,7 +2952,11 @@ mixed_tests() pm_nl_add_endpoint $ns1 10.0.1.1 flags signal speed=slow \ run_tests $ns1 $ns2 dead:beef:2::1 - chk_join_nr 1 1 1 + if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_fullmesh_max$"; then + chk_join_nr 0 0 0 + else + chk_join_nr 1 1 1 + fi fi # fullmesh still tries to create all the possibly subflows with From 4a6220a453c8afd46a01f0bbe56bc6734adb77b7 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Sat, 1 Nov 2025 18:56:53 +0100 Subject: [PATCH 539/867] selftests: mptcp: join: do_transfer: reduce code dup The same extra long commands are present twice, with small differences: the variable for the stdin file is different. Use new dedicated variables in one command to avoid this code duplication. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251101-net-next-mptcp-fm-endp-nb-bind-v1-3-b4166772d6bb@kernel.org Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 30 +++++++------------ 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index e7a498dd5a468..4c9ee094381e5 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -951,6 +951,8 @@ do_transfer() local FAILING_LINKS=${FAILING_LINKS:-""} local fastclose=${fastclose:-""} local speed=${speed:-"fast"} + local listener_in="${sin}" + local connector_in="${cin}" port=$(get_port) :> "$cout" @@ -999,16 +1001,12 @@ do_transfer() extra_srv_args="$extra_args $extra_srv_args" if [ "$test_linkfail" -gt 1 ];then - timeout ${timeout_test} \ - ip netns exec ${listener_ns} \ - ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ - $extra_srv_args "::" < "$sinfail" > "$sout" & - else - timeout ${timeout_test} \ - ip netns exec ${listener_ns} \ - ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ - $extra_srv_args "::" < "$sin" > "$sout" & + listener_in="${sinfail}" fi + timeout ${timeout_test} \ + ip netns exec ${listener_ns} \ + ./mptcp_connect -t ${timeout_poll} -l -p ${port} -s ${srv_proto} \ + ${extra_srv_args} "::" < "${listener_in}" > "${sout}" & local spid=$! mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" @@ -1020,6 +1018,7 @@ do_transfer() ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ $extra_cl_args $connect_addr < "$cin" > "$cout" & elif [ "$test_linkfail" -eq 1 ] || [ "$test_linkfail" -eq 2 ];then + connector_in="${cinsent}" ( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \ tee "$cinsent" | \ timeout ${timeout_test} \ @@ -1027,6 +1026,7 @@ do_transfer() ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ $extra_cl_args $connect_addr > "$cout" & else + connector_in="${cinsent}" tee "$cinsent" < "$cinfail" | \ timeout ${timeout_test} \ ip netns exec ${connector_ns} \ @@ -1057,17 +1057,9 @@ do_transfer() return 1 fi - if [ "$test_linkfail" -gt 1 ];then - check_transfer $sinfail $cout "file received by client" $trunc_size - else - check_transfer $sin $cout "file received by client" $trunc_size - fi + check_transfer $listener_in $cout "file received by client" $trunc_size retc=$? - if [ "$test_linkfail" -eq 0 ];then - check_transfer $cin $sout "file received by server" $trunc_size - else - check_transfer $cinsent $sout "file received by server" $trunc_size - fi + check_transfer $connector_in $sout "file received by server" $trunc_size rets=$? [ $retc -eq 0 ] && [ $rets -eq 0 ] From 5c59df126bae1f2bb6fce6c11eef0e9776b32598 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Sat, 1 Nov 2025 18:56:54 +0100 Subject: [PATCH 540/867] selftests: mptcp: join: validate extra bind cases By design, an MPTCP connection will not accept extra subflows where no MPTCP listening sockets can accept such requests. In other words, it means that if the 'server' listens on a specific address / device, it cannot accept MP_JOIN sent to a different address / device. Except if there is another MPTCP listening socket accepting them. This is what the new tests are validating: - Forcing a bind on the main v4/v6 address, and checking that MP_JOIN to announced addresses are not accepted. - Also forcing a bind on the main v4/v6 address, but before, another listening socket is created to accept additional subflows. Note that 'mptcpize run nc -l' -- or something else only doing: socket(MPTCP), bind(), listen(0) -- would be enough, but here mptcp_connect is reused not to depend on another tool just for that. - Same as the previous one, but using v6 link-local addresses: this is a bit particular because it is required to specify the outgoing network interface when connecting to a link-local address announced by the other peer. When using the routing rules, this doesn't work (the outgoing interface is not known) ; but it does work with a 'laminar' endpoint having a specified interface. Note that extra small modifications are needed for these tests to work: - mptcp_connect's check_getpeername_connect() check should strip the specified interface when comparing addresses. - With IPv6 link-local addresses, it is required to wait for them to be ready (no longer in 'tentative' mode) before using them, otherwise the bind() will not be allowed. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/591 Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251101-net-next-mptcp-fm-endp-nb-bind-v1-4-b4166772d6bb@kernel.org Signed-off-by: Jakub Kicinski --- .../selftests/net/mptcp/mptcp_connect.c | 10 +- .../testing/selftests/net/mptcp/mptcp_join.sh | 153 +++++++++++++++++- 2 files changed, 161 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index b148cadb96d0b..c030b08a71957 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -1064,6 +1064,8 @@ static void check_getpeername_connect(int fd) socklen_t salen = sizeof(ss); char a[INET6_ADDRSTRLEN]; char b[INET6_ADDRSTRLEN]; + const char *iface; + size_t len; if (getpeername(fd, (struct sockaddr *)&ss, &salen) < 0) { perror("getpeername"); @@ -1073,7 +1075,13 @@ static void check_getpeername_connect(int fd) xgetnameinfo((struct sockaddr *)&ss, salen, a, sizeof(a), b, sizeof(b)); - if (strcmp(cfg_host, a) || strcmp(cfg_port, b)) + iface = strchr(cfg_host, '%'); + if (iface) + len = iface - cfg_host; + else + len = strlen(cfg_host) + 1; + + if (strncmp(cfg_host, a, len) || strcmp(cfg_port, b)) fprintf(stderr, "%s: %s vs %s, %s vs %s\n", __func__, cfg_host, a, cfg_port, b); } diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 4c9ee094381e5..4faf58fecc94f 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -62,6 +62,7 @@ unset sflags unset fastclose unset fullmesh unset speed +unset bind_addr unset join_syn_rej unset join_csum_ns1 unset join_csum_ns2 @@ -645,6 +646,27 @@ wait_mpj() done } +wait_ll_ready() +{ + local ns="${1}" + + local i + for i in $(seq 50); do + ip -n "${ns}" -6 addr show scope link | grep "inet6 fe80" | + grep -qw "tentative" || break + sleep 0.1 + done +} + +get_ll_addr() +{ + local ns="${1}" + local iface="${2}" + + ip -n "${ns}" -6 addr show dev "${iface}" scope link | + grep "inet6 fe80" | sed 's#.*\(fe80::.*\)/.*#\1#' +} + kill_events_pids() { mptcp_lib_kill_wait $evts_ns1_pid @@ -951,6 +973,7 @@ do_transfer() local FAILING_LINKS=${FAILING_LINKS:-""} local fastclose=${fastclose:-""} local speed=${speed:-"fast"} + local bind_addr=${bind_addr:-"::"} local listener_in="${sin}" local connector_in="${cin}" port=$(get_port) @@ -1006,7 +1029,7 @@ do_transfer() timeout ${timeout_test} \ ip netns exec ${listener_ns} \ ./mptcp_connect -t ${timeout_poll} -l -p ${port} -s ${srv_proto} \ - ${extra_srv_args} "::" < "${listener_in}" > "${sout}" & + ${extra_srv_args} "${bind_addr}" < "${listener_in}" > "${sout}" & local spid=$! mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" @@ -3229,6 +3252,133 @@ add_addr_ports_tests() fi } +bind_tests() +{ + # bind to one address should not allow extra subflows to other addresses + if reset "bind main address v4, no join v4"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + bind_addr="10.0.1.1" \ + run_tests $ns1 $ns2 10.0.1.1 + join_syn_tx=1 \ + chk_join_nr 0 0 0 + chk_add_nr 1 1 + fi + + # bind to one address should not allow extra subflows to other addresses + if reset "bind main address v6, no join v6"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + bind_addr="dead:beef:1::1" \ + run_tests $ns1 $ns2 dead:beef:1::1 + join_syn_tx=1 \ + chk_join_nr 0 0 0 + chk_add_nr 1 1 + fi + + # multiple binds to allow extra subflows to other addresses + if reset "multiple bind to allow joins v4"; then + local extra_bind + + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + + # Launching another app listening on a different address + # Note: it could be a totally different app, e.g. nc, socat, ... + ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \ + -s MPTCP 10.0.2.1 & + extra_bind=$! + + bind_addr="10.0.1.1" \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + + kill ${extra_bind} + fi + + # multiple binds to allow extra subflows to other addresses + if reset "multiple bind to allow joins v6"; then + local extra_bind + + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + + # Launching another app listening on a different address + # Note: it could be a totally different app, e.g. nc, socat, ... + ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \ + -s MPTCP dead:beef:2::1 & + extra_bind=$! + + bind_addr="dead:beef:1::1" \ + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + + kill ${extra_bind} + fi + + # multiple binds to allow extra subflows to other addresses: v6 LL case + if reset "multiple bind to allow joins v6 link-local routing"; then + local extra_bind ns1ll1 ns1ll2 + + ns1ll1="$(get_ll_addr $ns1 ns1eth1)" + ns1ll2="$(get_ll_addr $ns1 ns1eth2)" + + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 "${ns1ll2}" flags signal + + wait_ll_ready $ns1 # to be able to bind + wait_ll_ready $ns2 # also needed to bind on the client side + ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \ + -s MPTCP "${ns1ll2}%ns1eth2" & + extra_bind=$! + + bind_addr="${ns1ll1}%ns1eth1" \ + run_tests $ns1 $ns2 "${ns1ll1}%ns2eth1" + # it is not possible to connect to the announced LL addr without + # specifying the outgoing interface. + join_connect_err=1 \ + chk_join_nr 0 0 0 + chk_add_nr 1 1 + + kill ${extra_bind} + fi + + # multiple binds to allow extra subflows to v6 LL addresses: laminar + if reset "multiple bind to allow joins v6 link-local laminar" && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + local extra_bind ns1ll1 ns1ll2 ns2ll2 + + ns1ll1="$(get_ll_addr $ns1 ns1eth1)" + ns1ll2="$(get_ll_addr $ns1 ns1eth2)" + ns2ll2="$(get_ll_addr $ns2 ns2eth2)" + + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 "${ns1ll2}" flags signal + pm_nl_add_endpoint $ns2 "${ns2ll2}" flags laminar dev ns2eth2 + + wait_ll_ready $ns1 # to be able to bind + wait_ll_ready $ns2 # also needed to bind on the client side + ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \ + -s MPTCP "${ns1ll2}%ns1eth2" & + extra_bind=$! + + bind_addr="${ns1ll1}%ns1eth1" \ + run_tests $ns1 $ns2 "${ns1ll1}%ns2eth1" + chk_join_nr 1 1 1 + chk_add_nr 1 1 + + kill ${extra_bind} + fi +} + syncookies_tests() { # single subflow, syncookies @@ -4183,6 +4333,7 @@ all_tests_sorted=( M@mixed_tests b@backup_tests p@add_addr_ports_tests + B@bind_tests k@syncookies_tests S@checksum_tests d@deny_join_id0_tests From 284922f4c563aa3a8558a00f2a05722133237fe8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 4 Nov 2025 15:25:20 +0900 Subject: [PATCH 541/867] x86: uaccess: don't use runtime-const rewriting in modules The runtime-const infrastructure was never designed to handle the modular case, because the constant fixup is only done at boot time for core kernel code. But by the time I used it for the x86-64 user space limit handling in commit 86e6b1547b3d ("x86: fix user address masking non-canonical speculation issue"), I had completely repressed that fact. And it all happens to work because the only code that currently actually gets inlined by modules is for the access_ok() limit check, where the default constant value works even when not fixed up. Because at least I had intentionally made it be something that is in the non-canonical address space region. But it's technically very wrong, and it does mean that at least in theory, the use of 'access_ok()' + '__get_user()' can trigger the same speculation issue with non-canonical addresses that the original commit was all about. The pattern is unusual enough that this probably doesn't matter in practice, but very wrong is still very wrong. Also, let's fix it before the nice optimized scoped user accessor helpers that Thomas Gleixner is working on cause this pseudo-constant to then be more widely used. This all came up due to an unrelated discussion with Mateusz Guzik about using the runtime const infrastructure for names_cachep accesses too. There the modular case was much more obviously broken, and Mateusz noted it in his 'v2' of the patch series. That then made me notice how broken 'access_ok()' had been in modules all along. Mea culpa, mea maxima culpa. Fix it by simply not using the runtime-const code in modules, and just using the USER_PTR_MAX variable value instead. This is not performance-critical like the core user accessor functions (get_user() and friends) are. Also make sure this doesn't get forgotten the next time somebody wants to do runtime constant optimizations by having the x86 runtime-const.h header file error out if included by modules. Fixes: 86e6b1547b3d ("x86: fix user address masking non-canonical speculation issue") Acked-by: Borislav Petkov Acked-by: Sean Christopherson Cc: Thomas Gleixner Triggered-by: Mateusz Guzik Link: https://lore.kernel.org/all/20251030105242.801528-1-mjguzik@gmail.com/ Signed-off-by: Linus Torvalds --- arch/x86/include/asm/runtime-const.h | 4 ++++ arch/x86/include/asm/uaccess_64.h | 10 +++++----- arch/x86/kernel/cpu/common.c | 6 +++++- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h index 8d983cfd06ea6..e5a13dc8816e2 100644 --- a/arch/x86/include/asm/runtime-const.h +++ b/arch/x86/include/asm/runtime-const.h @@ -2,6 +2,10 @@ #ifndef _ASM_RUNTIME_CONST_H #define _ASM_RUNTIME_CONST_H +#ifdef MODULE + #error "Cannot use runtime-const infrastructure from modules" +#endif + #ifdef __ASSEMBLY__ .macro RUNTIME_CONST_PTR sym reg diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index c8a5ae35c8714..641f45c22f9da 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -12,12 +12,12 @@ #include #include #include -#include -/* - * Virtual variable: there's no actual backing store for this, - * it can purely be used as 'runtime_const_ptr(USER_PTR_MAX)' - */ +#ifdef MODULE + #define runtime_const_ptr(sym) (sym) +#else + #include +#endif extern unsigned long USER_PTR_MAX; #ifdef CONFIG_ADDRESS_MASKING diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c7d3512914ca9..02d97834a1d4d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -78,6 +78,10 @@ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); +/* Used for modules: built-in code uses runtime constants */ +unsigned long USER_PTR_MAX; +EXPORT_SYMBOL(USER_PTR_MAX); + u32 elf_hwcap2 __read_mostly; /* Number of siblings per CPU package */ @@ -2579,7 +2583,7 @@ void __init arch_cpu_finalize_init(void) alternative_instructions(); if (IS_ENABLED(CONFIG_X86_64)) { - unsigned long USER_PTR_MAX = TASK_SIZE_MAX; + USER_PTR_MAX = TASK_SIZE_MAX; /* * Enable this when LAM is gated on LASS support From bc7208ca805ae6062f353a4753467d913d963bc6 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 3 Nov 2025 16:56:55 -0800 Subject: [PATCH 542/867] bnxt_en: Shutdown FW DMA in bnxt_shutdown() The netif_close() call in bnxt_shutdown() only stops packet DMA. There may be FW DMA for trace logging (recently added) that will continue. If we kexec to a new kernel, the DMA will corrupt memory in the new kernel. Add bnxt_hwrm_func_drv_unrgtr() to unregister the driver from the FW. This will stop the FW DMA. In case the call fails, call pcie_flr() to reset the function and stop the DMA. Fixes: 24d694aec139 ("bnxt_en: Allocate backing store memory for FW trace logs") Reported-by: Jakub Kicinski Reviewed-by: Damodharam Ammepalli Reviewed-by: Kalesh AP Reviewed-by: Somnath Kotur Signed-off-by: Michael Chan Link: https://patch.msgid.link/20251104005700.542174-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 3fc33b1b4dfb1..c0e9caa1df735 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -16892,6 +16892,10 @@ static void bnxt_shutdown(struct pci_dev *pdev) if (netif_running(dev)) netif_close(dev); + if (bnxt_hwrm_func_drv_unrgtr(bp)) { + pcie_flr(pdev); + goto shutdown_exit; + } bnxt_ptp_clear(bp); bnxt_clear_int_mode(bp); pci_disable_device(pdev); From deb8eb39164382f1f67ef8e8af9176baf5e10f2d Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Mon, 3 Nov 2025 16:56:56 -0800 Subject: [PATCH 543/867] bnxt_en: Fix a possible memory leak in bnxt_ptp_init In bnxt_ptp_init(), when ptp_clock_register() fails, the driver is not freeing the memory allocated for ptp_info->pin_config. Fix it to unconditionally free ptp_info->pin_config in bnxt_ptp_free(). Fixes: caf3eedbcd8d ("bnxt_en: 1PPS support for 5750X family chips") Reviewed-by: Pavan Chebbi Reviewed-by: Somnath Kotur Signed-off-by: Kalesh AP Signed-off-by: Michael Chan Link: https://patch.msgid.link/20251104005700.542174-3-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index db81cf6d5289b..0abaa2bbe3577 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -1051,9 +1051,9 @@ static void bnxt_ptp_free(struct bnxt *bp) if (ptp->ptp_clock) { ptp_clock_unregister(ptp->ptp_clock); ptp->ptp_clock = NULL; - kfree(ptp->ptp_info.pin_config); - ptp->ptp_info.pin_config = NULL; } + kfree(ptp->ptp_info.pin_config); + ptp->ptp_info.pin_config = NULL; } int bnxt_ptp_init(struct bnxt *bp) From ff02be05f78399c766be68ab0b2285ff90b2aaa8 Mon Sep 17 00:00:00 2001 From: Gautam R A Date: Mon, 3 Nov 2025 16:56:57 -0800 Subject: [PATCH 544/867] bnxt_en: Fix null pointer dereference in bnxt_bs_trace_check_wrap() With older FW, we may get the ASYNC_EVENT_CMPL_EVENT_ID_DBG_BUF_PRODUCER for FW trace data type that has not been initialized. This will result in a crash in bnxt_bs_trace_type_wrap(). Add a guard to check for a valid magic_byte pointer before proceeding. Fixes: 84fcd9449fd7 ("bnxt_en: Manage the FW trace context memory") Reviewed-by: Somnath Kotur Reviewed-by: Shruti Parab Signed-off-by: Gautam R A Signed-off-by: Michael Chan Link: https://patch.msgid.link/20251104005700.542174-4-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 741b2d854789b..7df46a21dd185 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2149,7 +2149,7 @@ struct bnxt_bs_trace_info { static inline void bnxt_bs_trace_check_wrap(struct bnxt_bs_trace_info *bs_trace, u32 offset) { - if (!bs_trace->wrapped && + if (!bs_trace->wrapped && bs_trace->magic_byte && *bs_trace->magic_byte != BNXT_TRACE_BUF_MAGIC_BYTE) bs_trace->wrapped = 1; bs_trace->last_offset = offset; From 28d9a84ef0ce56cc623da2a1ebf7583c00d52b31 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Mon, 3 Nov 2025 16:56:58 -0800 Subject: [PATCH 545/867] bnxt_en: Always provide max entry and entry size in coredump segments While populating firmware host logging segments for the coredump, it is possible for the FW command that flushes the segment to fail. When that happens, the existing code will not update the max entry and entry size in the segment header and this causes software that decodes the coredump to skip the segment. The segment most likely has already collected some DMA data, so always update these 2 segment fields in the header to allow the decoder to decode any data in the segment. Fixes: 3c2179e66355 ("bnxt_en: Add FW trace coredump segments to the coredump") Reviewed-by: Shruti Parab Signed-off-by: Kashyap Desai Signed-off-by: Michael Chan Link: https://patch.msgid.link/20251104005700.542174-5-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index 0181ab1f2dfdc..ccb8b509662dd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -333,13 +333,14 @@ static void bnxt_fill_drv_seg_record(struct bnxt *bp, u32 offset = 0; int rc = 0; + record->max_entries = cpu_to_le32(ctxm->max_entries); + record->entry_size = cpu_to_le32(ctxm->entry_size); + rc = bnxt_dbg_hwrm_log_buffer_flush(bp, type, 0, &offset); if (rc) return; bnxt_bs_trace_check_wrap(bs_trace, offset); - record->max_entries = cpu_to_le32(ctxm->max_entries); - record->entry_size = cpu_to_le32(ctxm->entry_size); record->offset = cpu_to_le32(bs_trace->last_offset); record->wrapped = bs_trace->wrapped; } From 5204943a4c6efc832993c0fa17dec275071eeccc Mon Sep 17 00:00:00 2001 From: Shantiprasad Shettar Date: Mon, 3 Nov 2025 16:56:59 -0800 Subject: [PATCH 546/867] bnxt_en: Fix warning in bnxt_dl_reload_down() The existing code calls bnxt_cancel_reservations() after bnxt_hwrm_func_drv_unrgtr() in bnxt_dl_reload_down(). bnxt_cancel_reservations() calls the FW and it will always fail since the driver has already unregistered, triggering this warning: bnxt_en 0000:0a:00.0 ens2np0: resc_qcaps failed Fix it by calling bnxt_clear_reservations() which will skip the unnecessary FW call since we have unregistered. Fixes: 228ea8c187d8 ("bnxt_en: implement devlink dev reload driver_reinit") Reviewed-by: Mohammad Shuab Siddique Reviewed-by: Somnath Kotur Reviewed-by: Kalesh AP Signed-off-by: Shantiprasad Shettar Signed-off-by: Michael Chan Link: https://patch.msgid.link/20251104005700.542174-6-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c0e9caa1df735..a625e7c311dd7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12439,7 +12439,7 @@ static int bnxt_try_recover_fw(struct bnxt *bp) return -ENODEV; } -static void bnxt_clear_reservations(struct bnxt *bp, bool fw_reset) +void bnxt_clear_reservations(struct bnxt *bp, bool fw_reset) { struct bnxt_hw_resc *hw_resc = &bp->hw_resc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 7df46a21dd185..3613a172483a1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2941,6 +2941,7 @@ void bnxt_report_link(struct bnxt *bp); int bnxt_update_link(struct bnxt *bp, bool chng_link_state); int bnxt_hwrm_set_pause(struct bnxt *); int bnxt_hwrm_set_link_setting(struct bnxt *, bool, bool); +void bnxt_clear_reservations(struct bnxt *bp, bool fw_reset); int bnxt_cancel_reservations(struct bnxt *bp, bool fw_reset); int bnxt_hwrm_alloc_wol_fltr(struct bnxt *bp); int bnxt_hwrm_free_wol_fltr(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 02961d93ed35d..67ca02d84c979 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -461,7 +461,7 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, rtnl_unlock(); break; } - bnxt_cancel_reservations(bp, false); + bnxt_clear_reservations(bp, false); bnxt_free_ctx_mem(bp, false); break; } From f2143e283c6b993f4ad8b85a45aa16ac899f1abc Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Mon, 3 Nov 2025 15:20:46 +0800 Subject: [PATCH 547/867] net: devmem: Remove unused declaration net_devmem_bind_tx_release() Commit bd61848900bf ("net: devmem: Implement TX path") declared this but never implemented it. Signed-off-by: Yue Haibing Acked-by: Stanislav Fomichev Reviewed-by: Mina Almasry Link: https://patch.msgid.link/20251103072046.1670574-1-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski --- net/core/devmem.h | 1 - 1 file changed, 1 deletion(-) diff --git a/net/core/devmem.h b/net/core/devmem.h index 101150d761af2..0b43a648cd2ef 100644 --- a/net/core/devmem.h +++ b/net/core/devmem.h @@ -94,7 +94,6 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding); int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, struct net_devmem_dmabuf_binding *binding, struct netlink_ext_ack *extack); -void net_devmem_bind_tx_release(struct sock *sk); static inline struct dmabuf_genpool_chunk_owner * net_devmem_iov_to_chunk_owner(const struct net_iov *niov) From e0c78fcad2bb04651af2ad40ed20714501931d7a Mon Sep 17 00:00:00 2001 From: Buday Csaba Date: Mon, 3 Nov 2025 09:13:42 +0100 Subject: [PATCH 548/867] dt-bindings: net: ethernet-phy: clarify when compatible must specify PHY ID Change PHY ID description in ethernet-phy.yaml to clarify that a PHY ID is required (may -> must) when the PHY requires special initialization sequence. Link: https://lore.kernel.org/netdev/20251026212026.GA2959311-robh@kernel.org/ Link: https://lore.kernel.org/netdev/aQIZvDt5gooZSTcp@debianbuilder/ Signed-off-by: Buday Csaba Acked-by: Conor Dooley Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/64c52d1a726944a68a308355433e8ef0f82c4240.1762157515.git.buday.csaba@prolan.hu Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/net/ethernet-phy.yaml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml index 2ec2d9fda7e30..bb4c49fc5fd88 100644 --- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml @@ -35,9 +35,13 @@ properties: description: PHYs that implement IEEE802.3 clause 45 - pattern: "^ethernet-phy-id[a-f0-9]{4}\\.[a-f0-9]{4}$" description: - If the PHY reports an incorrect ID (or none at all) then the - compatible list may contain an entry with the correct PHY ID - in the above form. + PHYs contain identification registers. These will be read to + identify the PHY. If the PHY reports an incorrect ID, or the + PHY requires a specific initialization sequence (like a + particular order of clocks, resets, power supplies), in + order to be able to read the ID registers, then the + compatible list must contain an entry with the correct PHY + ID in the above form. The first group of digits is the 16 bit Phy Identifier 1 register, this is the chip vendor OUI bits 3:18. The second group of digits is the Phy Identifier 2 register, From 2b38447548813ccdb8ad79385094a35d77c01e3b Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Mon, 3 Nov 2025 15:09:47 +0000 Subject: [PATCH 549/867] net: liquidio: convert to use ndo_hwtstamp callbacks The driver implemented SIOCSHWTSTAMP ioctl command only, but there is a way to get configured status. Implement both ndo_hwtstamp_set and ndo_hwtstamp_get callbacks. Reviewed-by: Kory Maincent Signed-off-by: Vadim Fedorenko Link: https://patch.msgid.link/20251103150952.3538205-3-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- .../net/ethernet/cavium/liquidio/lio_main.c | 50 ++++++++----------- 1 file changed, 20 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 8e2fcec26ea13..0732440eeacd6 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -2107,20 +2107,16 @@ liquidio_get_stats64(struct net_device *netdev, lstats->tx_fifo_errors; } -/** - * hwtstamp_ioctl - Handler for SIOCSHWTSTAMP ioctl - * @netdev: network device - * @ifr: interface request - */ -static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr) +static int liquidio_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *conf, + struct netlink_ext_ack *extack) { - struct hwtstamp_config conf; struct lio *lio = GET_LIO(netdev); - if (copy_from_user(&conf, ifr->ifr_data, sizeof(conf))) - return -EFAULT; + if (!lio->oct_dev->ptp_enable) + return -EOPNOTSUPP; - switch (conf.tx_type) { + switch (conf->tx_type) { case HWTSTAMP_TX_ON: case HWTSTAMP_TX_OFF: break; @@ -2128,7 +2124,7 @@ static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr) return -ERANGE; } - switch (conf.rx_filter) { + switch (conf->rx_filter) { case HWTSTAMP_FILTER_NONE: break; case HWTSTAMP_FILTER_ALL: @@ -2146,39 +2142,32 @@ static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: - conf.rx_filter = HWTSTAMP_FILTER_ALL; + conf->rx_filter = HWTSTAMP_FILTER_ALL; break; default: return -ERANGE; } - if (conf.rx_filter == HWTSTAMP_FILTER_ALL) + if (conf->rx_filter == HWTSTAMP_FILTER_ALL) ifstate_set(lio, LIO_IFSTATE_RX_TIMESTAMP_ENABLED); else ifstate_reset(lio, LIO_IFSTATE_RX_TIMESTAMP_ENABLED); - return copy_to_user(ifr->ifr_data, &conf, sizeof(conf)) ? -EFAULT : 0; + return 0; } -/** - * liquidio_ioctl - ioctl handler - * @netdev: network device - * @ifr: interface request - * @cmd: command - */ -static int liquidio_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +static int liquidio_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *conf) { struct lio *lio = GET_LIO(netdev); - switch (cmd) { - case SIOCSHWTSTAMP: - if (lio->oct_dev->ptp_enable) - return hwtstamp_ioctl(netdev, ifr); - fallthrough; - default: - return -EOPNOTSUPP; - } + /* TX timestamping is technically always on */ + conf->tx_type = HWTSTAMP_TX_ON; + conf->rx_filter = ifstate_check(lio, LIO_IFSTATE_RX_TIMESTAMP_ENABLED) ? + HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE; + + return 0; } /** @@ -3227,7 +3216,6 @@ static const struct net_device_ops lionetdevops = { .ndo_vlan_rx_add_vid = liquidio_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = liquidio_vlan_rx_kill_vid, .ndo_change_mtu = liquidio_change_mtu, - .ndo_eth_ioctl = liquidio_ioctl, .ndo_fix_features = liquidio_fix_features, .ndo_set_features = liquidio_set_features, .ndo_set_vf_mac = liquidio_set_vf_mac, @@ -3238,6 +3226,8 @@ static const struct net_device_ops lionetdevops = { .ndo_set_vf_link_state = liquidio_set_vf_link_state, .ndo_get_vf_stats = liquidio_get_vf_stats, .ndo_get_port_parent_id = liquidio_get_port_parent_id, + .ndo_hwtstamp_get = liquidio_hwtstamp_get, + .ndo_hwtstamp_set = liquidio_hwtstamp_set, }; /** From 94037a0e18e3340912a039fa8435a69bee50bfd1 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Mon, 3 Nov 2025 15:09:48 +0000 Subject: [PATCH 550/867] net: liquidio_vf: convert to use ndo_hwtstamp callbacks The driver implemented SIOCSHWTSTAMP ioctl command only, but there is a way to get configuration back. Implement both ndo_hwtstamp_set and ndo_hwtstamp_set callbacks. Reviewed-by: Kory Maincent Signed-off-by: Vadim Fedorenko Link: https://patch.msgid.link/20251103150952.3538205-4-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- .../ethernet/cavium/liquidio/lio_vf_main.c | 48 ++++++++----------- 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index 3230dff5ba056..e02942dbbcce5 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -1236,20 +1236,13 @@ liquidio_get_stats64(struct net_device *netdev, lstats->tx_carrier_errors; } -/** - * hwtstamp_ioctl - Handler for SIOCSHWTSTAMP ioctl - * @netdev: network device - * @ifr: interface request - */ -static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr) +static int liquidio_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *conf, + struct netlink_ext_ack *extack) { struct lio *lio = GET_LIO(netdev); - struct hwtstamp_config conf; - - if (copy_from_user(&conf, ifr->ifr_data, sizeof(conf))) - return -EFAULT; - switch (conf.tx_type) { + switch (conf->tx_type) { case HWTSTAMP_TX_ON: case HWTSTAMP_TX_OFF: break; @@ -1257,7 +1250,7 @@ static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr) return -ERANGE; } - switch (conf.rx_filter) { + switch (conf->rx_filter) { case HWTSTAMP_FILTER_NONE: break; case HWTSTAMP_FILTER_ALL: @@ -1275,35 +1268,31 @@ static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: - conf.rx_filter = HWTSTAMP_FILTER_ALL; + conf->rx_filter = HWTSTAMP_FILTER_ALL; break; default: return -ERANGE; } - if (conf.rx_filter == HWTSTAMP_FILTER_ALL) + if (conf->rx_filter == HWTSTAMP_FILTER_ALL) ifstate_set(lio, LIO_IFSTATE_RX_TIMESTAMP_ENABLED); else ifstate_reset(lio, LIO_IFSTATE_RX_TIMESTAMP_ENABLED); - return copy_to_user(ifr->ifr_data, &conf, sizeof(conf)) ? -EFAULT : 0; + return 0; } -/** - * liquidio_ioctl - ioctl handler - * @netdev: network device - * @ifr: interface request - * @cmd: command - */ -static int liquidio_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +static int liquidio_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *conf) { - switch (cmd) { - case SIOCSHWTSTAMP: - return hwtstamp_ioctl(netdev, ifr); - default: - return -EOPNOTSUPP; - } + struct lio *lio = GET_LIO(netdev); + + /* TX timestamping is techically always on */ + conf->tx_type = HWTSTAMP_TX_ON; + conf->rx_filter = ifstate_check(lio, LIO_IFSTATE_RX_TIMESTAMP_ENABLED) ? + HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE; + return 0; } static void handle_timestamp(struct octeon_device *oct, u32 status, void *buf) @@ -1881,9 +1870,10 @@ static const struct net_device_ops lionetdevops = { .ndo_vlan_rx_add_vid = liquidio_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = liquidio_vlan_rx_kill_vid, .ndo_change_mtu = liquidio_change_mtu, - .ndo_eth_ioctl = liquidio_ioctl, .ndo_fix_features = liquidio_fix_features, .ndo_set_features = liquidio_set_features, + .ndo_hwtstamp_get = liquidio_hwtstamp_get, + .ndo_hwtstamp_set = liquidio_hwtstamp_set, }; static int lio_nic_info(struct octeon_recv_info *recv_info, void *buf) From 72c35e3a9589005e6ba55d63902eeb8b9fdb3a4e Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Mon, 3 Nov 2025 15:09:49 +0000 Subject: [PATCH 551/867] net: octeon: mgmt: convert to use ndo_hwtstamp callbacks The driver implemented SIOCSHWTSTAMP ioctl command only. But it stores timestamping configuration, so it is possible to report it to users. Implement both ndo_hwtstamp_set and ndo_hwtstamp_get callbacks. After this the ndo_eth_ioctl effectively becomes phy_do_ioctl - adjust callback accordingly. Reviewed-by: Kory Maincent Signed-off-by: Vadim Fedorenko Link: https://patch.msgid.link/20251103150952.3538205-5-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- .../net/ethernet/cavium/octeon/octeon_mgmt.c | 62 ++++++++++--------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index 393b9951490a1..c190fc6538d4c 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -690,19 +690,16 @@ static irqreturn_t octeon_mgmt_interrupt(int cpl, void *dev_id) return IRQ_HANDLED; } -static int octeon_mgmt_ioctl_hwtstamp(struct net_device *netdev, - struct ifreq *rq, int cmd) +static int octeon_mgmt_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { struct octeon_mgmt *p = netdev_priv(netdev); - struct hwtstamp_config config; - union cvmx_mio_ptp_clock_cfg ptp; union cvmx_agl_gmx_rxx_frm_ctl rxx_frm_ctl; + union cvmx_mio_ptp_clock_cfg ptp; bool have_hw_timestamps = false; - if (copy_from_user(&config, rq->ifr_data, sizeof(config))) - return -EFAULT; - - /* Check the status of hardware for tiemstamps */ + /* Check the status of hardware for timestamps */ if (OCTEON_IS_MODEL(OCTEON_CN6XXX)) { /* Get the current state of the PTP clock */ ptp.u64 = cvmx_read_csr(CVMX_MIO_PTP_CLOCK_CFG); @@ -733,10 +730,12 @@ static int octeon_mgmt_ioctl_hwtstamp(struct net_device *netdev, have_hw_timestamps = true; } - if (!have_hw_timestamps) + if (!have_hw_timestamps) { + NL_SET_ERR_MSG_MOD(extack, "HW doesn't support timestamping"); return -EINVAL; + } - switch (config.tx_type) { + switch (config->tx_type) { case HWTSTAMP_TX_OFF: case HWTSTAMP_TX_ON: break; @@ -744,7 +743,7 @@ static int octeon_mgmt_ioctl_hwtstamp(struct net_device *netdev, return -ERANGE; } - switch (config.rx_filter) { + switch (config->rx_filter) { case HWTSTAMP_FILTER_NONE: p->has_rx_tstamp = false; rxx_frm_ctl.u64 = cvmx_read_csr(p->agl + AGL_GMX_RX_FRM_CTL); @@ -766,33 +765,34 @@ static int octeon_mgmt_ioctl_hwtstamp(struct net_device *netdev, case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: - p->has_rx_tstamp = have_hw_timestamps; - config.rx_filter = HWTSTAMP_FILTER_ALL; - if (p->has_rx_tstamp) { - rxx_frm_ctl.u64 = cvmx_read_csr(p->agl + AGL_GMX_RX_FRM_CTL); - rxx_frm_ctl.s.ptp_mode = 1; - cvmx_write_csr(p->agl + AGL_GMX_RX_FRM_CTL, rxx_frm_ctl.u64); - } + p->has_rx_tstamp = true; + config->rx_filter = HWTSTAMP_FILTER_ALL; + rxx_frm_ctl.u64 = cvmx_read_csr(p->agl + AGL_GMX_RX_FRM_CTL); + rxx_frm_ctl.s.ptp_mode = 1; + cvmx_write_csr(p->agl + AGL_GMX_RX_FRM_CTL, rxx_frm_ctl.u64); break; default: return -ERANGE; } - if (copy_to_user(rq->ifr_data, &config, sizeof(config))) - return -EFAULT; - return 0; } -static int octeon_mgmt_ioctl(struct net_device *netdev, - struct ifreq *rq, int cmd) +static int octeon_mgmt_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config) { - switch (cmd) { - case SIOCSHWTSTAMP: - return octeon_mgmt_ioctl_hwtstamp(netdev, rq, cmd); - default: - return phy_do_ioctl(netdev, rq, cmd); - } + struct octeon_mgmt *p = netdev_priv(netdev); + + /* Check the status of hardware for timestamps */ + if (!OCTEON_IS_MODEL(OCTEON_CN6XXX)) + return -EINVAL; + + config->tx_type = HWTSTAMP_TX_ON; + config->rx_filter = p->has_rx_tstamp ? + HWTSTAMP_FILTER_ALL : + HWTSTAMP_FILTER_NONE; + + return 0; } static void octeon_mgmt_disable_link(struct octeon_mgmt *p) @@ -1370,11 +1370,13 @@ static const struct net_device_ops octeon_mgmt_ops = { .ndo_start_xmit = octeon_mgmt_xmit, .ndo_set_rx_mode = octeon_mgmt_set_rx_filtering, .ndo_set_mac_address = octeon_mgmt_set_mac_address, - .ndo_eth_ioctl = octeon_mgmt_ioctl, + .ndo_eth_ioctl = phy_do_ioctl, .ndo_change_mtu = octeon_mgmt_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = octeon_mgmt_poll_controller, #endif + .ndo_hwtstamp_get = octeon_mgmt_hwtstamp_get, + .ndo_hwtstamp_set = octeon_mgmt_hwtstamp_set, }; static int octeon_mgmt_probe(struct platform_device *pdev) From a23d0486d05a7b6d816d55099a7574c3c4ca4869 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Mon, 3 Nov 2025 15:09:50 +0000 Subject: [PATCH 552/867] net: thunderx: convert to use ndo_hwtstamp callbacks The driver implemented SIOCSHWTSTAMP ioctl command only, but it also stores configuration in private data, so it's possible to report it back to users. Implement both ndo_hwtstamp_set and ndo_hwtstamp_get callbacks. Reviewed-by: Kory Maincent Signed-off-by: Vadim Fedorenko Link: https://patch.msgid.link/20251103150952.3538205-6-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- .../net/ethernet/cavium/thunder/nicvf_main.c | 45 ++++++++++--------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 1be2dc40a1a63..0b6e30a8feb07 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1899,18 +1899,18 @@ static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp) } } -static int nicvf_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr) +static int nicvf_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { - struct hwtstamp_config config; struct nicvf *nic = netdev_priv(netdev); - if (!nic->ptp_clock) + if (!nic->ptp_clock) { + NL_SET_ERR_MSG_MOD(extack, "HW timestamping is not supported"); return -ENODEV; + } - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - switch (config.tx_type) { + switch (config->tx_type) { case HWTSTAMP_TX_OFF: case HWTSTAMP_TX_ON: break; @@ -1918,7 +1918,7 @@ static int nicvf_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr) return -ERANGE; } - switch (config.rx_filter) { + switch (config->rx_filter) { case HWTSTAMP_FILTER_NONE: nic->hw_rx_tstamp = false; break; @@ -1937,7 +1937,7 @@ static int nicvf_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: nic->hw_rx_tstamp = true; - config.rx_filter = HWTSTAMP_FILTER_ALL; + config->rx_filter = HWTSTAMP_FILTER_ALL; break; default: return -ERANGE; @@ -1946,20 +1946,24 @@ static int nicvf_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr) if (netif_running(netdev)) nicvf_config_hw_rx_tstamp(nic, nic->hw_rx_tstamp); - if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) - return -EFAULT; - return 0; } -static int nicvf_ioctl(struct net_device *netdev, struct ifreq *req, int cmd) +static int nicvf_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config) { - switch (cmd) { - case SIOCSHWTSTAMP: - return nicvf_config_hwtstamp(netdev, req); - default: - return -EOPNOTSUPP; - } + struct nicvf *nic = netdev_priv(netdev); + + if (!nic->ptp_clock) + return -ENODEV; + + /* TX timestamping is technically always on */ + config->tx_type = HWTSTAMP_TX_ON; + config->rx_filter = nic->hw_rx_tstamp ? + HWTSTAMP_FILTER_ALL : + HWTSTAMP_FILTER_NONE; + + return 0; } static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs, @@ -2081,8 +2085,9 @@ static const struct net_device_ops nicvf_netdev_ops = { .ndo_fix_features = nicvf_fix_features, .ndo_set_features = nicvf_set_features, .ndo_bpf = nicvf_xdp, - .ndo_eth_ioctl = nicvf_ioctl, .ndo_set_rx_mode = nicvf_set_rx_mode, + .ndo_hwtstamp_get = nicvf_hwtstamp_get, + .ndo_hwtstamp_set = nicvf_hwtstamp_set, }; static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) From d8fdc7069474a298ec63ee338b5fcc879a33c754 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Mon, 3 Nov 2025 15:09:51 +0000 Subject: [PATCH 553/867] net: pch_gbe: convert to use ndo_hwtstamp callbacks The driver implemented SIOCSHWTSTAMP ioctl command only, but it stores configuration in the private data, so it is possible to report it back to users. Implement both ndo_hwtstamp_set and ndo_hwtstamp_get callbacks. To properly report RX filter type, store it in hwts_rx_en instead of using this field as a simple flag. The logic didn't change because receive path used this field as boolean flag. Signed-off-by: Vadim Fedorenko Reviewed-by: Kory Maincent Link: https://patch.msgid.link/20251103150952.3538205-7-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- .../ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 38 +++++++++++-------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index e5a6f59af0b63..62f05f4569b10 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -198,23 +198,21 @@ pch_tx_timestamp(struct pch_gbe_adapter *adapter, struct sk_buff *skb) pch_ch_event_write(pdev, TX_SNAPSHOT_LOCKED); } -static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +static int pch_gbe_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack) { - struct hwtstamp_config cfg; struct pch_gbe_adapter *adapter = netdev_priv(netdev); struct pci_dev *pdev; u8 station[20]; - if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) - return -EFAULT; - /* Get ieee1588's dev information */ pdev = adapter->ptp_pdev; - if (cfg.tx_type != HWTSTAMP_TX_OFF && cfg.tx_type != HWTSTAMP_TX_ON) + if (cfg->tx_type != HWTSTAMP_TX_OFF && cfg->tx_type != HWTSTAMP_TX_ON) return -ERANGE; - switch (cfg.rx_filter) { + switch (cfg->rx_filter) { case HWTSTAMP_FILTER_NONE: adapter->hwts_rx_en = 0; break; @@ -223,17 +221,17 @@ static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) pch_ch_control_write(pdev, SLAVE_MODE | CAP_MODE0); break; case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: - adapter->hwts_rx_en = 1; + adapter->hwts_rx_en = cfg->rx_filter; pch_ch_control_write(pdev, MASTER_MODE | CAP_MODE0); break; case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: - adapter->hwts_rx_en = 1; + adapter->hwts_rx_en = cfg->rx_filter; pch_ch_control_write(pdev, V2_MODE | CAP_MODE2); strcpy(station, PTP_L4_MULTICAST_SA); pch_set_station_address(station, pdev); break; case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: - adapter->hwts_rx_en = 1; + adapter->hwts_rx_en = cfg->rx_filter; pch_ch_control_write(pdev, V2_MODE | CAP_MODE2); strcpy(station, PTP_L2_MULTICAST_SA); pch_set_station_address(station, pdev); @@ -242,12 +240,23 @@ static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) return -ERANGE; } - adapter->hwts_tx_en = cfg.tx_type == HWTSTAMP_TX_ON; + adapter->hwts_tx_en = cfg->tx_type == HWTSTAMP_TX_ON; /* Clear out any old time stamps. */ pch_ch_event_write(pdev, TX_SNAPSHOT_LOCKED | RX_SNAPSHOT_LOCKED); - return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; + return 0; +} + +static int pch_gbe_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *cfg) +{ + struct pch_gbe_adapter *adapter = netdev_priv(netdev); + + cfg->tx_type = adapter->hwts_tx_en ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; + cfg->rx_filter = adapter->hwts_rx_en; + + return 0; } static inline void pch_gbe_mac_load_mac_addr(struct pch_gbe_hw *hw) @@ -2234,9 +2243,6 @@ static int pch_gbe_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) netdev_dbg(netdev, "cmd : 0x%04x\n", cmd); - if (cmd == SIOCSHWTSTAMP) - return hwtstamp_ioctl(netdev, ifr, cmd); - return generic_mii_ioctl(&adapter->mii, if_mii(ifr), cmd, NULL); } @@ -2328,6 +2334,8 @@ static const struct net_device_ops pch_gbe_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = pch_gbe_netpoll, #endif + .ndo_hwtstamp_get = pch_gbe_hwtstamp_get, + .ndo_hwtstamp_set = pch_gbe_hwtstamp_set, }; static pci_ers_result_t pch_gbe_io_error_detected(struct pci_dev *pdev, From 3f02b82725576a85a1219547e28a2ab30b53666f Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Mon, 3 Nov 2025 17:29:02 +0000 Subject: [PATCH 554/867] ti: netcp: convert to ndo_hwtstamp callbacks Convert TI NetCP driver to use ndo_hwtstamp_get()/ndo_hwtstamp_set() callbacks. The logic is slightly changed, because I believe the original logic was not really correct. Config reading part is using the very first module to get the configuration instead of iterating over all of them and keep the last one as the configuration is supposed to be identical for all modules. HW timestamp config set path is now trying to configure all modules, but in case of error from one module it adds extack message. This way the configuration will be as synchronized as possible. There are only 2 modules using netcp core infrastructure, and both use the very same function to configure HW timestamping, so no actual difference in behavior is expected. Signed-off-by: Vadim Fedorenko Reviewed-by: Kory Maincent Link: https://patch.msgid.link/20251103172902.3538392-1-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/netcp.h | 5 ++ drivers/net/ethernet/ti/netcp_core.c | 58 +++++++++++++++++++++ drivers/net/ethernet/ti/netcp_ethss.c | 72 +++++++++++++++------------ 3 files changed, 103 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/ti/netcp.h b/drivers/net/ethernet/ti/netcp.h index 7007eb8bed365..b9cbd3b4a8a22 100644 --- a/drivers/net/ethernet/ti/netcp.h +++ b/drivers/net/ethernet/ti/netcp.h @@ -207,6 +207,11 @@ struct netcp_module { int (*del_vid)(void *intf_priv, int vid); int (*ioctl)(void *intf_priv, struct ifreq *req, int cmd); int (*set_rx_mode)(void *intf_priv, bool promisc); + int (*hwtstamp_get)(void *intf_priv, + struct kernel_hwtstamp_config *cfg); + int (*hwtstamp_set)(void *intf_priv, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack); /* used internally */ struct list_head module_list; diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 857820657bac5..cee2686a48935 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1781,6 +1781,62 @@ static int netcp_ndo_stop(struct net_device *ndev) return 0; } +static int netcp_ndo_hwtstamp_get(struct net_device *ndev, + struct kernel_hwtstamp_config *config) +{ + struct netcp_intf *netcp = netdev_priv(ndev); + struct netcp_intf_modpriv *intf_modpriv; + struct netcp_module *module; + int err = -EOPNOTSUPP; + + if (!netif_running(ndev)) + return -EINVAL; + + for_each_module(netcp, intf_modpriv) { + module = intf_modpriv->netcp_module; + if (!module->hwtstamp_get) + continue; + + err = module->hwtstamp_get(intf_modpriv->module_priv, config); + break; + } + + return err; +} + +static int netcp_ndo_hwtstamp_set(struct net_device *ndev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) +{ + struct netcp_intf *netcp = netdev_priv(ndev); + struct netcp_intf_modpriv *intf_modpriv; + struct netcp_module *module; + int ret = -1, err = -EOPNOTSUPP; + + if (!netif_running(ndev)) + return -EINVAL; + + for_each_module(netcp, intf_modpriv) { + module = intf_modpriv->netcp_module; + if (!module->hwtstamp_set) + continue; + + err = module->hwtstamp_set(intf_modpriv->module_priv, config, + extack); + if ((err < 0) && (err != -EOPNOTSUPP)) { + NL_SET_ERR_MSG_WEAK_MOD(extack, + "At least one module failed to setup HW timestamps"); + ret = err; + goto out; + } + if (err == 0) + ret = err; + } + +out: + return (ret == 0) ? 0 : err; +} + static int netcp_ndo_ioctl(struct net_device *ndev, struct ifreq *req, int cmd) { @@ -1952,6 +2008,8 @@ static const struct net_device_ops netcp_netdev_ops = { .ndo_tx_timeout = netcp_ndo_tx_timeout, .ndo_select_queue = dev_pick_tx_zero, .ndo_setup_tc = netcp_setup_tc, + .ndo_hwtstamp_get = netcp_ndo_hwtstamp_get, + .ndo_hwtstamp_set = netcp_ndo_hwtstamp_set, }; static int netcp_create_interface(struct netcp_device *netcp_device, diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 55a1a96cd8349..0ae44112812cf 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -2591,20 +2591,26 @@ static int gbe_rxtstamp(struct gbe_intf *gbe_intf, struct netcp_packet *p_info) return 0; } -static int gbe_hwtstamp_get(struct gbe_intf *gbe_intf, struct ifreq *ifr) +static int gbe_hwtstamp_get(void *intf_priv, struct kernel_hwtstamp_config *cfg) { - struct gbe_priv *gbe_dev = gbe_intf->gbe_dev; - struct cpts *cpts = gbe_dev->cpts; - struct hwtstamp_config cfg; + struct gbe_intf *gbe_intf = intf_priv; + struct gbe_priv *gbe_dev; + struct phy_device *phy; + + gbe_dev = gbe_intf->gbe_dev; - if (!cpts) + if (!gbe_dev->cpts) + return -EOPNOTSUPP; + + phy = gbe_intf->slave->phy; + if (phy_has_hwtstamp(phy)) return -EOPNOTSUPP; - cfg.flags = 0; - cfg.tx_type = gbe_dev->tx_ts_enabled ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; - cfg.rx_filter = gbe_dev->rx_ts_enabled; + cfg->flags = 0; + cfg->tx_type = gbe_dev->tx_ts_enabled ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; + cfg->rx_filter = gbe_dev->rx_ts_enabled; - return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; + return 0; } static void gbe_hwtstamp(struct gbe_intf *gbe_intf) @@ -2637,19 +2643,23 @@ static void gbe_hwtstamp(struct gbe_intf *gbe_intf) writel(ctl, GBE_REG_ADDR(slave, port_regs, ts_ctl_ltype2)); } -static int gbe_hwtstamp_set(struct gbe_intf *gbe_intf, struct ifreq *ifr) +static int gbe_hwtstamp_set(void *intf_priv, struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack) { - struct gbe_priv *gbe_dev = gbe_intf->gbe_dev; - struct cpts *cpts = gbe_dev->cpts; - struct hwtstamp_config cfg; + struct gbe_intf *gbe_intf = intf_priv; + struct gbe_priv *gbe_dev; + struct phy_device *phy; - if (!cpts) + gbe_dev = gbe_intf->gbe_dev; + + if (!gbe_dev->cpts) return -EOPNOTSUPP; - if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) - return -EFAULT; + phy = gbe_intf->slave->phy; + if (phy_has_hwtstamp(phy)) + return phy->mii_ts->hwtstamp(phy->mii_ts, cfg, extack); - switch (cfg.tx_type) { + switch (cfg->tx_type) { case HWTSTAMP_TX_OFF: gbe_dev->tx_ts_enabled = 0; break; @@ -2660,7 +2670,7 @@ static int gbe_hwtstamp_set(struct gbe_intf *gbe_intf, struct ifreq *ifr) return -ERANGE; } - switch (cfg.rx_filter) { + switch (cfg->rx_filter) { case HWTSTAMP_FILTER_NONE: gbe_dev->rx_ts_enabled = HWTSTAMP_FILTER_NONE; break; @@ -2668,7 +2678,7 @@ static int gbe_hwtstamp_set(struct gbe_intf *gbe_intf, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: gbe_dev->rx_ts_enabled = HWTSTAMP_FILTER_PTP_V1_L4_EVENT; - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT; + cfg->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT; break; case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: @@ -2680,7 +2690,7 @@ static int gbe_hwtstamp_set(struct gbe_intf *gbe_intf, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: gbe_dev->rx_ts_enabled = HWTSTAMP_FILTER_PTP_V2_EVENT; - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; break; default: return -ERANGE; @@ -2688,7 +2698,7 @@ static int gbe_hwtstamp_set(struct gbe_intf *gbe_intf, struct ifreq *ifr) gbe_hwtstamp(gbe_intf); - return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; + return 0; } static void gbe_register_cpts(struct gbe_priv *gbe_dev) @@ -2745,12 +2755,15 @@ static inline void gbe_unregister_cpts(struct gbe_priv *gbe_dev) { } -static inline int gbe_hwtstamp_get(struct gbe_intf *gbe_intf, struct ifreq *req) +static inline int gbe_hwtstamp_get(struct gbe_intf *gbe_intf, + struct kernel_hwtstamp_config *cfg) { return -EOPNOTSUPP; } -static inline int gbe_hwtstamp_set(struct gbe_intf *gbe_intf, struct ifreq *req) +static inline int gbe_hwtstamp_set(struct gbe_intf *gbe_intf, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } @@ -2816,15 +2829,6 @@ static int gbe_ioctl(void *intf_priv, struct ifreq *req, int cmd) struct gbe_intf *gbe_intf = intf_priv; struct phy_device *phy = gbe_intf->slave->phy; - if (!phy_has_hwtstamp(phy)) { - switch (cmd) { - case SIOCGHWTSTAMP: - return gbe_hwtstamp_get(gbe_intf, req); - case SIOCSHWTSTAMP: - return gbe_hwtstamp_set(gbe_intf, req); - } - } - if (phy) return phy_mii_ioctl(phy, req, cmd); @@ -3824,6 +3828,8 @@ static struct netcp_module gbe_module = { .add_vid = gbe_add_vid, .del_vid = gbe_del_vid, .ioctl = gbe_ioctl, + .hwtstamp_get = gbe_hwtstamp_get, + .hwtstamp_set = gbe_hwtstamp_set, }; static struct netcp_module xgbe_module = { @@ -3841,6 +3847,8 @@ static struct netcp_module xgbe_module = { .add_vid = gbe_add_vid, .del_vid = gbe_del_vid, .ioctl = gbe_ioctl, + .hwtstamp_get = gbe_hwtstamp_get, + .hwtstamp_set = gbe_hwtstamp_set, }; static int __init keystone_gbe_init(void) From ee61c10cd4820e8844dba4315f2d1e522f1f3b98 Mon Sep 17 00:00:00 2001 From: Dong Yibo Date: Sat, 1 Nov 2025 09:38:45 +0800 Subject: [PATCH 555/867] net: rnpgbe: Add build support for rnpgbe Add build options and doc for mucse. Initialize pci device access for MUCSE devices. Signed-off-by: Dong Yibo Reviewed-by: Andrew Lunn Reviewed-by: Vadim Fedorenko Reviewed-by: MD Danish Anwar Link: https://patch.msgid.link/20251101013849.120565-2-dong100@mucse.com Signed-off-by: Jakub Kicinski --- .../device_drivers/ethernet/index.rst | 1 + .../device_drivers/ethernet/mucse/rnpgbe.rst | 17 +++ MAINTAINERS | 8 ++ drivers/net/ethernet/Kconfig | 1 + drivers/net/ethernet/Makefile | 1 + drivers/net/ethernet/mucse/Kconfig | 33 +++++ drivers/net/ethernet/mucse/Makefile | 7 + drivers/net/ethernet/mucse/rnpgbe/Makefile | 8 ++ drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h | 18 +++ .../net/ethernet/mucse/rnpgbe/rnpgbe_main.c | 120 ++++++++++++++++++ 10 files changed, 214 insertions(+) create mode 100644 Documentation/networking/device_drivers/ethernet/mucse/rnpgbe.rst create mode 100644 drivers/net/ethernet/mucse/Kconfig create mode 100644 drivers/net/ethernet/mucse/Makefile create mode 100644 drivers/net/ethernet/mucse/rnpgbe/Makefile create mode 100644 drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h create mode 100644 drivers/net/ethernet/mucse/rnpgbe/rnpgbe_main.c diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst index 7cfcd183054f8..bcc02355f828b 100644 --- a/Documentation/networking/device_drivers/ethernet/index.rst +++ b/Documentation/networking/device_drivers/ethernet/index.rst @@ -47,6 +47,7 @@ Contents: mellanox/mlx5/index meta/fbnic microsoft/netvsc + mucse/rnpgbe neterion/s2io netronome/nfp pensando/ionic diff --git a/Documentation/networking/device_drivers/ethernet/mucse/rnpgbe.rst b/Documentation/networking/device_drivers/ethernet/mucse/rnpgbe.rst new file mode 100644 index 0000000000000..d35cf8a46b6cd --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/mucse/rnpgbe.rst @@ -0,0 +1,17 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=========================================================== +Linux Base Driver for MUCSE(R) Gigabit PCI Express Adapters +=========================================================== + +Contents +======== + +- Identifying Your Adapter + +Identifying Your Adapter +======================== +The driver is compatible with devices based on the following: + + * MUCSE(R) Ethernet Controller N210 series + * MUCSE(R) Ethernet Controller N500 series diff --git a/MAINTAINERS b/MAINTAINERS index 1ab7e87462993..bdf0a3a0dd36e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17610,6 +17610,14 @@ T: git git://linuxtv.org/media.git F: Documentation/devicetree/bindings/media/i2c/aptina,mt9v111.yaml F: drivers/media/i2c/mt9v111.c +MUCSE ETHERNET DRIVER +M: Yibo Dong +L: netdev@vger.kernel.org +S: Maintained +W: https://www.mucse.com/en/ +F: Documentation/networking/device_drivers/ethernet/mucse/ +F: drivers/net/ethernet/mucse/ + MULTIFUNCTION DEVICES (MFD) M: Lee Jones S: Maintained diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index aead145dd91d1..4a1b368ca7e61 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -129,6 +129,7 @@ source "drivers/net/ethernet/microchip/Kconfig" source "drivers/net/ethernet/mscc/Kconfig" source "drivers/net/ethernet/microsoft/Kconfig" source "drivers/net/ethernet/moxa/Kconfig" +source "drivers/net/ethernet/mucse/Kconfig" source "drivers/net/ethernet/myricom/Kconfig" config FEALNX diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 998dd628b202c..2e18df8ca8ec5 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -65,6 +65,7 @@ obj-$(CONFIG_NET_VENDOR_MICREL) += micrel/ obj-$(CONFIG_NET_VENDOR_MICROCHIP) += microchip/ obj-$(CONFIG_NET_VENDOR_MICROSEMI) += mscc/ obj-$(CONFIG_NET_VENDOR_MOXART) += moxa/ +obj-$(CONFIG_NET_VENDOR_MUCSE) += mucse/ obj-$(CONFIG_NET_VENDOR_MYRI) += myricom/ obj-$(CONFIG_FEALNX) += fealnx.o obj-$(CONFIG_NET_VENDOR_NATSEMI) += natsemi/ diff --git a/drivers/net/ethernet/mucse/Kconfig b/drivers/net/ethernet/mucse/Kconfig new file mode 100644 index 0000000000000..0b3e853d625f1 --- /dev/null +++ b/drivers/net/ethernet/mucse/Kconfig @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Mucse network device configuration +# + +config NET_VENDOR_MUCSE + bool "Mucse devices" + default y + help + If you have a network (Ethernet) card from Mucse(R), say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about Mucse(R) cards. If you say Y, you will + be asked for your specific card in the following questions. + +if NET_VENDOR_MUCSE + +config MGBE + tristate "Mucse(R) 1GbE PCI Express adapters support" + depends on PCI + help + This driver supports Mucse(R) 1GbE PCI Express family of + adapters. + + More specific information on configuring the driver is in + . + + To compile this driver as a module, choose M here. The module + will be called rnpgbe. + +endif # NET_VENDOR_MUCSE + diff --git a/drivers/net/ethernet/mucse/Makefile b/drivers/net/ethernet/mucse/Makefile new file mode 100644 index 0000000000000..675173fa05f74 --- /dev/null +++ b/drivers/net/ethernet/mucse/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2020 - 2025 MUCSE Corporation. +# +# Makefile for the MUCSE(R) network device drivers +# + +obj-$(CONFIG_MGBE) += rnpgbe/ diff --git a/drivers/net/ethernet/mucse/rnpgbe/Makefile b/drivers/net/ethernet/mucse/rnpgbe/Makefile new file mode 100644 index 0000000000000..9df536f0d04c2 --- /dev/null +++ b/drivers/net/ethernet/mucse/rnpgbe/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2020 - 2025 MUCSE Corporation. +# +# Makefile for the MUCSE(R) 1GbE PCI Express ethernet driver +# + +obj-$(CONFIG_MGBE) += rnpgbe.o +rnpgbe-objs := rnpgbe_main.o diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h new file mode 100644 index 0000000000000..d3439d28c6544 --- /dev/null +++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2020 - 2025 Mucse Corporation. */ + +#ifndef _RNPGBE_H +#define _RNPGBE_H + +enum rnpgbe_boards { + board_n500, + board_n210 +}; + +/* Device IDs */ +#define PCI_VENDOR_ID_MUCSE 0x8848 +#define RNPGBE_DEVICE_ID_N500_QUAD_PORT 0x8308 +#define RNPGBE_DEVICE_ID_N500_DUAL_PORT 0x8318 +#define RNPGBE_DEVICE_ID_N210 0x8208 +#define RNPGBE_DEVICE_ID_N210L 0x820a +#endif /* _RNPGBE_H */ diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_main.c b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_main.c new file mode 100644 index 0000000000000..019e819fb497b --- /dev/null +++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_main.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2020 - 2025 Mucse Corporation. */ + +#include + +#include "rnpgbe.h" + +static const char rnpgbe_driver_name[] = "rnpgbe"; + +/* rnpgbe_pci_tbl - PCI Device ID Table + * + * { PCI_VDEVICE(Vendor ID, Device ID), + * private_data (used for different hw chip) } + */ +static struct pci_device_id rnpgbe_pci_tbl[] = { + { PCI_VDEVICE(MUCSE, RNPGBE_DEVICE_ID_N210), board_n210 }, + { PCI_VDEVICE(MUCSE, RNPGBE_DEVICE_ID_N210L), board_n210 }, + { PCI_VDEVICE(MUCSE, RNPGBE_DEVICE_ID_N500_DUAL_PORT), board_n500 }, + { PCI_VDEVICE(MUCSE, RNPGBE_DEVICE_ID_N500_QUAD_PORT), board_n500 }, + /* required last entry */ + {0, }, +}; + +/** + * rnpgbe_probe - Device initialization routine + * @pdev: PCI device information struct + * @id: entry in rnpgbe_pci_tbl + * + * rnpgbe_probe initializes a PF adapter identified by a pci_dev + * structure. + * + * Return: 0 on success, negative errno on failure + **/ +static int rnpgbe_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + int err; + + err = pci_enable_device_mem(pdev); + if (err) + return err; + + err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(56)); + if (err) { + dev_err(&pdev->dev, + "No usable DMA configuration, aborting %d\n", err); + goto err_disable_dev; + } + + err = pci_request_mem_regions(pdev, rnpgbe_driver_name); + if (err) { + dev_err(&pdev->dev, + "pci_request_selected_regions failed %d\n", err); + goto err_disable_dev; + } + + pci_set_master(pdev); + err = pci_save_state(pdev); + if (err) { + dev_err(&pdev->dev, "pci_save_state failed %d\n", err); + goto err_free_regions; + } + + return 0; +err_free_regions: + pci_release_mem_regions(pdev); +err_disable_dev: + pci_disable_device(pdev); + return err; +} + +/** + * rnpgbe_remove - Device removal routine + * @pdev: PCI device information struct + * + * rnpgbe_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. This could be caused by a + * Hot-Plug event, or because the driver is going to be removed from + * memory. + **/ +static void rnpgbe_remove(struct pci_dev *pdev) +{ + pci_release_mem_regions(pdev); + pci_disable_device(pdev); +} + +/** + * rnpgbe_dev_shutdown - Device shutdown routine + * @pdev: PCI device information struct + **/ +static void rnpgbe_dev_shutdown(struct pci_dev *pdev) +{ + pci_disable_device(pdev); +} + +/** + * rnpgbe_shutdown - Device shutdown routine + * @pdev: PCI device information struct + * + * rnpgbe_shutdown is called by the PCI subsystem to alert the driver + * that os shutdown. Device should setup wakeup state here. + **/ +static void rnpgbe_shutdown(struct pci_dev *pdev) +{ + rnpgbe_dev_shutdown(pdev); +} + +static struct pci_driver rnpgbe_driver = { + .name = rnpgbe_driver_name, + .id_table = rnpgbe_pci_tbl, + .probe = rnpgbe_probe, + .remove = rnpgbe_remove, + .shutdown = rnpgbe_shutdown, +}; + +module_pci_driver(rnpgbe_driver); + +MODULE_DEVICE_TABLE(pci, rnpgbe_pci_tbl); +MODULE_AUTHOR("Yibo Dong, "); +MODULE_DESCRIPTION("Mucse(R) 1 Gigabit PCI Express Network Driver"); +MODULE_LICENSE("GPL"); From 1b7f85f733fd243d7c9073b9ff3d93e6ba5d1055 Mon Sep 17 00:00:00 2001 From: Dong Yibo Date: Sat, 1 Nov 2025 09:38:46 +0800 Subject: [PATCH 556/867] net: rnpgbe: Add n500/n210 chip support with BAR2 mapping Add hardware initialization foundation for MUCSE 1Gbe controller, including: 1. Map PCI BAR2 as hardware register base; 2. Bind PCI device to driver private data (struct mucse) and initialize hardware context (struct mucse_hw); 3. Reserve board-specific init framework via rnpgbe_init_hw. Signed-off-by: Dong Yibo Reviewed-by: Vadim Fedorenko Reviewed-by: MD Danish Anwar Link: https://patch.msgid.link/20251101013849.120565-3-dong100@mucse.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h | 10 +++ drivers/net/ethernet/mucse/rnpgbe/rnpgbe_hw.h | 8 ++ .../net/ethernet/mucse/rnpgbe/rnpgbe_main.c | 80 +++++++++++++++++++ 3 files changed, 98 insertions(+) create mode 100644 drivers/net/ethernet/mucse/rnpgbe/rnpgbe_hw.h diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h index d3439d28c6544..a121ce4872a64 100644 --- a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h +++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h @@ -9,6 +9,16 @@ enum rnpgbe_boards { board_n210 }; +struct mucse_hw { + void __iomem *hw_addr; +}; + +struct mucse { + struct net_device *netdev; + struct pci_dev *pdev; + struct mucse_hw hw; +}; + /* Device IDs */ #define PCI_VENDOR_ID_MUCSE 0x8848 #define RNPGBE_DEVICE_ID_N500_QUAD_PORT 0x8308 diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_hw.h b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_hw.h new file mode 100644 index 0000000000000..3a779806e8be3 --- /dev/null +++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_hw.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2020 - 2025 Mucse Corporation. */ + +#ifndef _RNPGBE_HW_H +#define _RNPGBE_HW_H + +#define RNPGBE_MAX_QUEUES 8 +#endif /* _RNPGBE_HW_H */ diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_main.c b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_main.c index 019e819fb497b..305657d73e256 100644 --- a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_main.c +++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_main.c @@ -2,8 +2,11 @@ /* Copyright(c) 2020 - 2025 Mucse Corporation. */ #include +#include +#include #include "rnpgbe.h" +#include "rnpgbe_hw.h" static const char rnpgbe_driver_name[] = "rnpgbe"; @@ -21,6 +24,54 @@ static struct pci_device_id rnpgbe_pci_tbl[] = { {0, }, }; +/** + * rnpgbe_add_adapter - Add netdev for this pci_dev + * @pdev: PCI device information structure + * @board_type: board type + * + * rnpgbe_add_adapter initializes a netdev for this pci_dev + * structure. Initializes Bar map, private structure, and a + * hardware reset occur. + * + * Return: 0 on success, negative errno on failure + **/ +static int rnpgbe_add_adapter(struct pci_dev *pdev, + int board_type) +{ + struct net_device *netdev; + void __iomem *hw_addr; + struct mucse *mucse; + struct mucse_hw *hw; + int err; + + netdev = alloc_etherdev_mq(sizeof(struct mucse), RNPGBE_MAX_QUEUES); + if (!netdev) + return -ENOMEM; + + SET_NETDEV_DEV(netdev, &pdev->dev); + mucse = netdev_priv(netdev); + mucse->netdev = netdev; + mucse->pdev = pdev; + pci_set_drvdata(pdev, mucse); + + hw = &mucse->hw; + hw_addr = devm_ioremap(&pdev->dev, + pci_resource_start(pdev, 2), + pci_resource_len(pdev, 2)); + if (!hw_addr) { + err = -EIO; + goto err_free_net; + } + + hw->hw_addr = hw_addr; + + return 0; + +err_free_net: + free_netdev(netdev); + return err; +} + /** * rnpgbe_probe - Device initialization routine * @pdev: PCI device information struct @@ -33,6 +84,7 @@ static struct pci_device_id rnpgbe_pci_tbl[] = { **/ static int rnpgbe_probe(struct pci_dev *pdev, const struct pci_device_id *id) { + int board_type = id->driver_data; int err; err = pci_enable_device_mem(pdev); @@ -60,6 +112,10 @@ static int rnpgbe_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_free_regions; } + err = rnpgbe_add_adapter(pdev, board_type); + if (err) + goto err_free_regions; + return 0; err_free_regions: pci_release_mem_regions(pdev); @@ -68,6 +124,23 @@ static int rnpgbe_probe(struct pci_dev *pdev, const struct pci_device_id *id) return err; } +/** + * rnpgbe_rm_adapter - Remove netdev for this mucse structure + * @pdev: PCI device information struct + * + * rnpgbe_rm_adapter remove a netdev for this mucse structure + **/ +static void rnpgbe_rm_adapter(struct pci_dev *pdev) +{ + struct mucse *mucse = pci_get_drvdata(pdev); + struct net_device *netdev; + + if (!mucse) + return; + netdev = mucse->netdev; + free_netdev(netdev); +} + /** * rnpgbe_remove - Device removal routine * @pdev: PCI device information struct @@ -79,6 +152,7 @@ static int rnpgbe_probe(struct pci_dev *pdev, const struct pci_device_id *id) **/ static void rnpgbe_remove(struct pci_dev *pdev) { + rnpgbe_rm_adapter(pdev); pci_release_mem_regions(pdev); pci_disable_device(pdev); } @@ -89,6 +163,12 @@ static void rnpgbe_remove(struct pci_dev *pdev) **/ static void rnpgbe_dev_shutdown(struct pci_dev *pdev) { + struct mucse *mucse = pci_get_drvdata(pdev); + struct net_device *netdev = mucse->netdev; + + rtnl_lock(); + netif_device_detach(netdev); + rtnl_unlock(); pci_disable_device(pdev); } From 4543534c3ef5115189ee2981e0911d1626af57ed Mon Sep 17 00:00:00 2001 From: Dong Yibo Date: Sat, 1 Nov 2025 09:38:47 +0800 Subject: [PATCH 557/867] net: rnpgbe: Add basic mbx ops support Add fundamental mailbox (MBX) communication operations between PF (Physical Function) and firmware for n500/n210 chips Signed-off-by: Dong Yibo Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20251101013849.120565-4-dong100@mucse.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mucse/rnpgbe/Makefile | 4 +- drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h | 17 + .../net/ethernet/mucse/rnpgbe/rnpgbe_chip.c | 70 +++ drivers/net/ethernet/mucse/rnpgbe/rnpgbe_hw.h | 7 + .../net/ethernet/mucse/rnpgbe/rnpgbe_main.c | 5 + .../net/ethernet/mucse/rnpgbe/rnpgbe_mbx.c | 405 ++++++++++++++++++ .../net/ethernet/mucse/rnpgbe/rnpgbe_mbx.h | 20 + 7 files changed, 527 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/mucse/rnpgbe/rnpgbe_chip.c create mode 100644 drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx.c create mode 100644 drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx.h diff --git a/drivers/net/ethernet/mucse/rnpgbe/Makefile b/drivers/net/ethernet/mucse/rnpgbe/Makefile index 9df536f0d04c2..5fc878ada4b11 100644 --- a/drivers/net/ethernet/mucse/rnpgbe/Makefile +++ b/drivers/net/ethernet/mucse/rnpgbe/Makefile @@ -5,4 +5,6 @@ # obj-$(CONFIG_MGBE) += rnpgbe.o -rnpgbe-objs := rnpgbe_main.o +rnpgbe-objs := rnpgbe_main.o\ + rnpgbe_chip.o\ + rnpgbe_mbx.o diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h index a121ce4872a64..4c70b0cedd1ff 100644 --- a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h +++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h @@ -4,13 +4,28 @@ #ifndef _RNPGBE_H #define _RNPGBE_H +#include + enum rnpgbe_boards { board_n500, board_n210 }; +struct mucse_mbx_info { + u32 timeout_us; + u32 delay_us; + u16 fw_req; + u16 fw_ack; + /* fw <--> pf mbx */ + u32 fwpf_shm_base; + u32 pf2fw_mbx_ctrl; + u32 fwpf_mbx_mask; + u32 fwpf_ctrl_base; +}; + struct mucse_hw { void __iomem *hw_addr; + struct mucse_mbx_info mbx; }; struct mucse { @@ -19,6 +34,8 @@ struct mucse { struct mucse_hw hw; }; +int rnpgbe_init_hw(struct mucse_hw *hw, int board_type); + /* Device IDs */ #define PCI_VENDOR_ID_MUCSE 0x8848 #define RNPGBE_DEVICE_ID_N500_QUAD_PORT 0x8308 diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_chip.c b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_chip.c new file mode 100644 index 0000000000000..5739db98f12a1 --- /dev/null +++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_chip.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2020 - 2025 Mucse Corporation. */ + +#include + +#include "rnpgbe.h" +#include "rnpgbe_hw.h" +#include "rnpgbe_mbx.h" + +/** + * rnpgbe_init_n500 - Setup n500 hw info + * @hw: hw information structure + * + * rnpgbe_init_n500 initializes all private + * structure for n500 + **/ +static void rnpgbe_init_n500(struct mucse_hw *hw) +{ + struct mucse_mbx_info *mbx = &hw->mbx; + + mbx->fwpf_ctrl_base = MUCSE_N500_FWPF_CTRL_BASE; + mbx->fwpf_shm_base = MUCSE_N500_FWPF_SHM_BASE; +} + +/** + * rnpgbe_init_n210 - Setup n210 hw info + * @hw: hw information structure + * + * rnpgbe_init_n210 initializes all private + * structure for n210 + **/ +static void rnpgbe_init_n210(struct mucse_hw *hw) +{ + struct mucse_mbx_info *mbx = &hw->mbx; + + mbx->fwpf_ctrl_base = MUCSE_N210_FWPF_CTRL_BASE; + mbx->fwpf_shm_base = MUCSE_N210_FWPF_SHM_BASE; +} + +/** + * rnpgbe_init_hw - Setup hw info according to board_type + * @hw: hw information structure + * @board_type: board type + * + * rnpgbe_init_hw initializes all hw data + * + * Return: 0 on success, -EINVAL on failure + **/ +int rnpgbe_init_hw(struct mucse_hw *hw, int board_type) +{ + struct mucse_mbx_info *mbx = &hw->mbx; + + mbx->pf2fw_mbx_ctrl = MUCSE_GBE_PFFW_MBX_CTRL_OFFSET; + mbx->fwpf_mbx_mask = MUCSE_GBE_FWPF_MBX_MASK_OFFSET; + + switch (board_type) { + case board_n500: + rnpgbe_init_n500(hw); + break; + case board_n210: + rnpgbe_init_n210(hw); + break; + default: + return -EINVAL; + } + /* init_params with mbx base */ + mucse_init_mbx_params_pf(hw); + + return 0; +} diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_hw.h b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_hw.h index 3a779806e8be3..268f572936aa4 100644 --- a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_hw.h +++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_hw.h @@ -4,5 +4,12 @@ #ifndef _RNPGBE_HW_H #define _RNPGBE_HW_H +#define MUCSE_N500_FWPF_CTRL_BASE 0x28b00 +#define MUCSE_N500_FWPF_SHM_BASE 0x2d000 +#define MUCSE_GBE_PFFW_MBX_CTRL_OFFSET 0x5500 +#define MUCSE_GBE_FWPF_MBX_MASK_OFFSET 0x5700 +#define MUCSE_N210_FWPF_CTRL_BASE 0x29400 +#define MUCSE_N210_FWPF_SHM_BASE 0x2d900 + #define RNPGBE_MAX_QUEUES 8 #endif /* _RNPGBE_HW_H */ diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_main.c b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_main.c index 305657d73e256..d8aaac79ff4bb 100644 --- a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_main.c +++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_main.c @@ -64,6 +64,11 @@ static int rnpgbe_add_adapter(struct pci_dev *pdev, } hw->hw_addr = hw_addr; + err = rnpgbe_init_hw(hw, board_type); + if (err) { + dev_err(&pdev->dev, "Init hw err %d\n", err); + goto err_free_net; + } return 0; diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx.c b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx.c new file mode 100644 index 0000000000000..5de4b104455e7 --- /dev/null +++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx.c @@ -0,0 +1,405 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2022 - 2025 Mucse Corporation. */ + +#include +#include +#include + +#include "rnpgbe_mbx.h" + +/** + * mbx_data_rd32 - Reads reg with base mbx->fwpf_shm_base + * @mbx: pointer to the MBX structure + * @reg: register offset + * + * Return: register value + **/ +static u32 mbx_data_rd32(struct mucse_mbx_info *mbx, u32 reg) +{ + struct mucse_hw *hw = container_of(mbx, struct mucse_hw, mbx); + + return readl(hw->hw_addr + mbx->fwpf_shm_base + reg); +} + +/** + * mbx_data_wr32 - Writes value to reg with base mbx->fwpf_shm_base + * @mbx: pointer to the MBX structure + * @reg: register offset + * @value: value to be written + * + **/ +static void mbx_data_wr32(struct mucse_mbx_info *mbx, u32 reg, u32 value) +{ + struct mucse_hw *hw = container_of(mbx, struct mucse_hw, mbx); + + writel(value, hw->hw_addr + mbx->fwpf_shm_base + reg); +} + +/** + * mbx_ctrl_rd32 - Reads reg with base mbx->fwpf_ctrl_base + * @mbx: pointer to the MBX structure + * @reg: register offset + * + * Return: register value + **/ +static u32 mbx_ctrl_rd32(struct mucse_mbx_info *mbx, u32 reg) +{ + struct mucse_hw *hw = container_of(mbx, struct mucse_hw, mbx); + + return readl(hw->hw_addr + mbx->fwpf_ctrl_base + reg); +} + +/** + * mbx_ctrl_wr32 - Writes value to reg with base mbx->fwpf_ctrl_base + * @mbx: pointer to the MBX structure + * @reg: register offset + * @value: value to be written + * + **/ +static void mbx_ctrl_wr32(struct mucse_mbx_info *mbx, u32 reg, u32 value) +{ + struct mucse_hw *hw = container_of(mbx, struct mucse_hw, mbx); + + writel(value, hw->hw_addr + mbx->fwpf_ctrl_base + reg); +} + +/** + * mucse_mbx_get_lock_pf - Write ctrl and read back lock status + * @hw: pointer to the HW structure + * + * Return: register value after write + **/ +static u32 mucse_mbx_get_lock_pf(struct mucse_hw *hw) +{ + struct mucse_mbx_info *mbx = &hw->mbx; + u32 reg = MUCSE_MBX_PF2FW_CTRL(mbx); + + mbx_ctrl_wr32(mbx, reg, MUCSE_MBX_PFU); + + return mbx_ctrl_rd32(mbx, reg); +} + +/** + * mucse_obtain_mbx_lock_pf - Obtain mailbox lock + * @hw: pointer to the HW structure + * + * Pair with mucse_release_mbx_lock_pf() + * This function maybe used in an irq handler. + * + * Return: 0 on success, negative errno on failure + **/ +static int mucse_obtain_mbx_lock_pf(struct mucse_hw *hw) +{ + struct mucse_mbx_info *mbx = &hw->mbx; + u32 val; + + return read_poll_timeout_atomic(mucse_mbx_get_lock_pf, + val, val & MUCSE_MBX_PFU, + mbx->delay_us, + mbx->timeout_us, + false, hw); +} + +/** + * mucse_release_mbx_lock_pf - Release mailbox lock + * @hw: pointer to the HW structure + * @req: send a request or not + * + * Pair with mucse_obtain_mbx_lock_pf(): + * - Releases the mailbox lock by clearing MUCSE_MBX_PFU bit + * - Simultaneously sends the request by setting MUCSE_MBX_REQ bit + * if req is true + * (Both bits are in the same mailbox control register, + * so operations are combined) + **/ +static void mucse_release_mbx_lock_pf(struct mucse_hw *hw, bool req) +{ + struct mucse_mbx_info *mbx = &hw->mbx; + u32 reg = MUCSE_MBX_PF2FW_CTRL(mbx); + + mbx_ctrl_wr32(mbx, reg, req ? MUCSE_MBX_REQ : 0); +} + +/** + * mucse_mbx_get_fwreq - Read fw req from reg + * @mbx: pointer to the mbx structure + * + * Return: the fwreq value + **/ +static u16 mucse_mbx_get_fwreq(struct mucse_mbx_info *mbx) +{ + u32 val = mbx_data_rd32(mbx, MUCSE_MBX_FW2PF_CNT); + + return FIELD_GET(GENMASK_U32(15, 0), val); +} + +/** + * mucse_mbx_inc_pf_ack - Increase ack + * @hw: pointer to the HW structure + * + * mucse_mbx_inc_pf_ack reads pf_ack from hw, then writes + * new value back after increase + **/ +static void mucse_mbx_inc_pf_ack(struct mucse_hw *hw) +{ + struct mucse_mbx_info *mbx = &hw->mbx; + u16 ack; + u32 val; + + val = mbx_data_rd32(mbx, MUCSE_MBX_PF2FW_CNT); + ack = FIELD_GET(GENMASK_U32(31, 16), val); + ack++; + val &= ~GENMASK_U32(31, 16); + val |= FIELD_PREP(GENMASK_U32(31, 16), ack); + mbx_data_wr32(mbx, MUCSE_MBX_PF2FW_CNT, val); +} + +/** + * mucse_read_mbx_pf - Read a message from the mailbox + * @hw: pointer to the HW structure + * @msg: the message buffer + * @size: length of buffer + * + * mucse_read_mbx_pf copies a message from the mbx buffer to the caller's + * memory buffer. The presumption is that the caller knows that there was + * a message due to a fw request so no polling for message is needed. + * + * Return: 0 on success, negative errno on failure + **/ +static int mucse_read_mbx_pf(struct mucse_hw *hw, u32 *msg, u16 size) +{ + const int size_in_words = size / sizeof(u32); + struct mucse_mbx_info *mbx = &hw->mbx; + int err; + + err = mucse_obtain_mbx_lock_pf(hw); + if (err) + return err; + + for (int i = 0; i < size_in_words; i++) + msg[i] = mbx_data_rd32(mbx, MUCSE_MBX_FWPF_SHM + 4 * i); + /* Hw needs write data_reg at last */ + mbx_data_wr32(mbx, MUCSE_MBX_FWPF_SHM, 0); + /* flush reqs as we have read this request data */ + hw->mbx.fw_req = mucse_mbx_get_fwreq(mbx); + mucse_mbx_inc_pf_ack(hw); + mucse_release_mbx_lock_pf(hw, false); + + return 0; +} + +/** + * mucse_check_for_msg_pf - Check to see if the fw has sent mail + * @hw: pointer to the HW structure + * + * Return: 0 if the fw has set the Status bit or else -EIO + **/ +static int mucse_check_for_msg_pf(struct mucse_hw *hw) +{ + struct mucse_mbx_info *mbx = &hw->mbx; + u16 fw_req; + + fw_req = mucse_mbx_get_fwreq(mbx); + /* chip's register is reset to 0 when rc send reset + * mbx command. Return -EIO if in this state, others + * fw == hw->mbx.fw_req means no new msg. + **/ + if (fw_req == 0 || fw_req == hw->mbx.fw_req) + return -EIO; + + return 0; +} + +/** + * mucse_poll_for_msg - Wait for message notification + * @hw: pointer to the HW structure + * + * Return: 0 on success, negative errno on failure + **/ +static int mucse_poll_for_msg(struct mucse_hw *hw) +{ + struct mucse_mbx_info *mbx = &hw->mbx; + int val; + + return read_poll_timeout(mucse_check_for_msg_pf, + val, !val, mbx->delay_us, + mbx->timeout_us, + false, hw); +} + +/** + * mucse_poll_and_read_mbx - Wait for message notification and receive message + * @hw: pointer to the HW structure + * @msg: the message buffer + * @size: length of buffer + * + * Return: 0 if it successfully received a message notification and + * copied it into the receive buffer, negative errno on failure + **/ +int mucse_poll_and_read_mbx(struct mucse_hw *hw, u32 *msg, u16 size) +{ + int err; + + err = mucse_poll_for_msg(hw); + if (err) + return err; + + return mucse_read_mbx_pf(hw, msg, size); +} + +/** + * mucse_mbx_get_fwack - Read fw ack from reg + * @mbx: pointer to the MBX structure + * + * Return: the fwack value + **/ +static u16 mucse_mbx_get_fwack(struct mucse_mbx_info *mbx) +{ + u32 val = mbx_data_rd32(mbx, MUCSE_MBX_FW2PF_CNT); + + return FIELD_GET(GENMASK_U32(31, 16), val); +} + +/** + * mucse_mbx_inc_pf_req - Increase req + * @hw: pointer to the HW structure + * + * mucse_mbx_inc_pf_req reads pf_req from hw, then writes + * new value back after increase + **/ +static void mucse_mbx_inc_pf_req(struct mucse_hw *hw) +{ + struct mucse_mbx_info *mbx = &hw->mbx; + u16 req; + u32 val; + + val = mbx_data_rd32(mbx, MUCSE_MBX_PF2FW_CNT); + req = FIELD_GET(GENMASK_U32(15, 0), val); + req++; + val &= ~GENMASK_U32(15, 0); + val |= FIELD_PREP(GENMASK_U32(15, 0), req); + mbx_data_wr32(mbx, MUCSE_MBX_PF2FW_CNT, val); +} + +/** + * mucse_write_mbx_pf - Place a message in the mailbox + * @hw: pointer to the HW structure + * @msg: the message buffer + * @size: length of buffer + * + * Return: 0 if it successfully copied message into the buffer, + * negative errno on failure + **/ +static int mucse_write_mbx_pf(struct mucse_hw *hw, u32 *msg, u16 size) +{ + const int size_in_words = size / sizeof(u32); + struct mucse_mbx_info *mbx = &hw->mbx; + int err; + + err = mucse_obtain_mbx_lock_pf(hw); + if (err) + return err; + + for (int i = 0; i < size_in_words; i++) + mbx_data_wr32(mbx, MUCSE_MBX_FWPF_SHM + i * 4, msg[i]); + + /* flush acks as we are overwriting the message buffer */ + hw->mbx.fw_ack = mucse_mbx_get_fwack(mbx); + mucse_mbx_inc_pf_req(hw); + mucse_release_mbx_lock_pf(hw, true); + + return 0; +} + +/** + * mucse_check_for_ack_pf - Check to see if the fw has ACKed + * @hw: pointer to the HW structure + * + * Return: 0 if the fw has set the Status bit or else -EIO + **/ +static int mucse_check_for_ack_pf(struct mucse_hw *hw) +{ + struct mucse_mbx_info *mbx = &hw->mbx; + u16 fw_ack; + + fw_ack = mucse_mbx_get_fwack(mbx); + /* chip's register is reset to 0 when rc send reset + * mbx command. Return -EIO if in this state, others + * fw_ack == hw->mbx.fw_ack means no new ack. + **/ + if (fw_ack == 0 || fw_ack == hw->mbx.fw_ack) + return -EIO; + + return 0; +} + +/** + * mucse_poll_for_ack - Wait for message acknowledgment + * @hw: pointer to the HW structure + * + * Return: 0 if it successfully received a message acknowledgment, + * else negative errno + **/ +static int mucse_poll_for_ack(struct mucse_hw *hw) +{ + struct mucse_mbx_info *mbx = &hw->mbx; + int val; + + return read_poll_timeout(mucse_check_for_ack_pf, + val, !val, mbx->delay_us, + mbx->timeout_us, + false, hw); +} + +/** + * mucse_write_and_wait_ack_mbx - Write a message to the mailbox, wait for ack + * @hw: pointer to the HW structure + * @msg: the message buffer + * @size: length of buffer + * + * Return: 0 if it successfully copied message into the buffer and + * received an ack to that message within delay * timeout_cnt period + **/ +int mucse_write_and_wait_ack_mbx(struct mucse_hw *hw, u32 *msg, u16 size) +{ + int err; + + err = mucse_write_mbx_pf(hw, msg, size); + if (err) + return err; + + return mucse_poll_for_ack(hw); +} + +/** + * mucse_mbx_reset - Reset mbx info, sync info from regs + * @hw: pointer to the HW structure + * + * mucse_mbx_reset resets all mbx variables to default. + **/ +static void mucse_mbx_reset(struct mucse_hw *hw) +{ + struct mucse_mbx_info *mbx = &hw->mbx; + u32 val; + + val = mbx_data_rd32(mbx, MUCSE_MBX_FW2PF_CNT); + hw->mbx.fw_req = FIELD_GET(GENMASK_U32(15, 0), val); + hw->mbx.fw_ack = FIELD_GET(GENMASK_U32(31, 16), val); + mbx_ctrl_wr32(mbx, MUCSE_MBX_PF2FW_CTRL(mbx), 0); + mbx_ctrl_wr32(mbx, MUCSE_MBX_FWPF_MASK(mbx), GENMASK_U32(31, 16)); +} + +/** + * mucse_init_mbx_params_pf - Set initial values for pf mailbox + * @hw: pointer to the HW structure + * + * Initializes the hw->mbx struct to correct values for pf mailbox + */ +void mucse_init_mbx_params_pf(struct mucse_hw *hw) +{ + struct mucse_mbx_info *mbx = &hw->mbx; + + mbx->delay_us = 100; + mbx->timeout_us = 4 * USEC_PER_SEC; + mucse_mbx_reset(hw); +} diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx.h b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx.h new file mode 100644 index 0000000000000..e6fcc8d1d3ca7 --- /dev/null +++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2020 - 2025 Mucse Corporation. */ + +#ifndef _RNPGBE_MBX_H +#define _RNPGBE_MBX_H + +#include "rnpgbe.h" + +#define MUCSE_MBX_FW2PF_CNT 0 +#define MUCSE_MBX_PF2FW_CNT 4 +#define MUCSE_MBX_FWPF_SHM 8 +#define MUCSE_MBX_PF2FW_CTRL(mbx) ((mbx)->pf2fw_mbx_ctrl) +#define MUCSE_MBX_FWPF_MASK(mbx) ((mbx)->fwpf_mbx_mask) +#define MUCSE_MBX_REQ BIT(0) /* Request a req to mailbox */ +#define MUCSE_MBX_PFU BIT(3) /* PF owns the mailbox buffer */ + +int mucse_write_and_wait_ack_mbx(struct mucse_hw *hw, u32 *msg, u16 size); +void mucse_init_mbx_params_pf(struct mucse_hw *hw); +int mucse_poll_and_read_mbx(struct mucse_hw *hw, u32 *msg, u16 size); +#endif /* _RNPGBE_MBX_H */ From c6d3f0198eaa4efba506933d2dbcd417e987630a Mon Sep 17 00:00:00 2001 From: Dong Yibo Date: Sat, 1 Nov 2025 09:38:48 +0800 Subject: [PATCH 558/867] net: rnpgbe: Add basic mbx_fw support Add fundamental firmware (FW) communication operations via PF-FW mailbox, including: - FW sync (via HW info query with retries) - HW reset (post FW command to reset hardware) - MAC address retrieval (request FW for port-specific MAC) - Power management (powerup/powerdown notification to FW) Signed-off-by: Dong Yibo Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20251101013849.120565-5-dong100@mucse.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mucse/rnpgbe/Makefile | 3 +- drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h | 4 + .../net/ethernet/mucse/rnpgbe/rnpgbe_mbx.c | 1 + .../net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c | 191 ++++++++++++++++++ .../net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h | 88 ++++++++ 5 files changed, 286 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c create mode 100644 drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h diff --git a/drivers/net/ethernet/mucse/rnpgbe/Makefile b/drivers/net/ethernet/mucse/rnpgbe/Makefile index 5fc878ada4b11..de8bcb7772ab4 100644 --- a/drivers/net/ethernet/mucse/rnpgbe/Makefile +++ b/drivers/net/ethernet/mucse/rnpgbe/Makefile @@ -7,4 +7,5 @@ obj-$(CONFIG_MGBE) += rnpgbe.o rnpgbe-objs := rnpgbe_main.o\ rnpgbe_chip.o\ - rnpgbe_mbx.o + rnpgbe_mbx.o\ + rnpgbe_mbx_fw.o diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h index 4c70b0cedd1ff..37bd9278beaa0 100644 --- a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h +++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h @@ -5,6 +5,7 @@ #define _RNPGBE_H #include +#include enum rnpgbe_boards { board_n500, @@ -16,6 +17,8 @@ struct mucse_mbx_info { u32 delay_us; u16 fw_req; u16 fw_ack; + /* lock for only one use mbx */ + struct mutex lock; /* fw <--> pf mbx */ u32 fwpf_shm_base; u32 pf2fw_mbx_ctrl; @@ -26,6 +29,7 @@ struct mucse_mbx_info { struct mucse_hw { void __iomem *hw_addr; struct mucse_mbx_info mbx; + u8 pfvfnum; }; struct mucse { diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx.c b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx.c index 5de4b104455e7..de5e29230b3c8 100644 --- a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx.c +++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx.c @@ -401,5 +401,6 @@ void mucse_init_mbx_params_pf(struct mucse_hw *hw) mbx->delay_us = 100; mbx->timeout_us = 4 * USEC_PER_SEC; + mutex_init(&mbx->lock); mucse_mbx_reset(hw); } diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c new file mode 100644 index 0000000000000..8c8bd5e8e1db1 --- /dev/null +++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2020 - 2025 Mucse Corporation. */ + +#include +#include + +#include "rnpgbe.h" +#include "rnpgbe_mbx.h" +#include "rnpgbe_mbx_fw.h" + +/** + * mucse_fw_send_cmd_wait_resp - Send cmd req and wait for response + * @hw: pointer to the HW structure + * @req: pointer to the cmd req structure + * @reply: pointer to the fw reply structure + * + * mucse_fw_send_cmd_wait_resp sends req to pf-fw mailbox and wait + * reply from fw. + * + * Return: 0 on success, negative errno on failure + **/ +static int mucse_fw_send_cmd_wait_resp(struct mucse_hw *hw, + struct mbx_fw_cmd_req *req, + struct mbx_fw_cmd_reply *reply) +{ + int len = le16_to_cpu(req->datalen); + int retry_cnt = 3; + int err; + + mutex_lock(&hw->mbx.lock); + err = mucse_write_and_wait_ack_mbx(hw, (u32 *)req, len); + if (err) + goto out; + do { + err = mucse_poll_and_read_mbx(hw, (u32 *)reply, + sizeof(*reply)); + if (err) + goto out; + /* mucse_write_and_wait_ack_mbx return 0 means fw has + * received request, wait for the expect opcode + * reply with 'retry_cnt' times. + */ + } while (--retry_cnt >= 0 && reply->opcode != req->opcode); +out: + mutex_unlock(&hw->mbx.lock); + if (!err && retry_cnt < 0) + return -ETIMEDOUT; + if (!err && reply->error_code) + return -EIO; + + return err; +} + +/** + * mucse_mbx_get_info - Get hw info from fw + * @hw: pointer to the HW structure + * + * mucse_mbx_get_info tries to get hw info from hw. + * + * Return: 0 on success, negative errno on failure + **/ +static int mucse_mbx_get_info(struct mucse_hw *hw) +{ + struct mbx_fw_cmd_req req = { + .datalen = cpu_to_le16(MUCSE_MBX_REQ_HDR_LEN), + .opcode = cpu_to_le16(GET_HW_INFO), + }; + struct mbx_fw_cmd_reply reply = {}; + int err; + + err = mucse_fw_send_cmd_wait_resp(hw, &req, &reply); + if (!err) + hw->pfvfnum = FIELD_GET(GENMASK_U16(7, 0), + le16_to_cpu(reply.hw_info.pfnum)); + + return err; +} + +/** + * mucse_mbx_sync_fw - Try to sync with fw + * @hw: pointer to the HW structure + * + * mucse_mbx_sync_fw tries to sync with fw. It is only called in + * probe. Nothing (register network) todo if failed. + * Try more times to do sync. + * + * Return: 0 on success, negative errno on failure + **/ +int mucse_mbx_sync_fw(struct mucse_hw *hw) +{ + int try_cnt = 3; + int err; + + do { + err = mucse_mbx_get_info(hw); + } while (err == -ETIMEDOUT && try_cnt--); + + return err; +} + +/** + * mucse_mbx_powerup - Echo fw to powerup + * @hw: pointer to the HW structure + * @is_powerup: true for powerup, false for powerdown + * + * mucse_mbx_powerup echo fw to change working frequency + * to normal after received true, and reduce working frequency + * if false. + * + * Return: 0 on success, negative errno on failure + **/ +int mucse_mbx_powerup(struct mucse_hw *hw, bool is_powerup) +{ + struct mbx_fw_cmd_req req = { + .datalen = cpu_to_le16(sizeof(req.powerup) + + MUCSE_MBX_REQ_HDR_LEN), + .opcode = cpu_to_le16(POWER_UP), + .powerup = { + /* fw needs this to reply correct cmd */ + .version = cpu_to_le32(GENMASK_U32(31, 0)), + .status = cpu_to_le32(is_powerup ? 1 : 0), + }, + }; + int len, err; + + len = le16_to_cpu(req.datalen); + mutex_lock(&hw->mbx.lock); + err = mucse_write_and_wait_ack_mbx(hw, (u32 *)&req, len); + mutex_unlock(&hw->mbx.lock); + + return err; +} + +/** + * mucse_mbx_reset_hw - Posts a mbx req to reset hw + * @hw: pointer to the HW structure + * + * mucse_mbx_reset_hw posts a mbx req to firmware to reset hw. + * We use mucse_fw_send_cmd_wait_resp to wait hw reset ok. + * + * Return: 0 on success, negative errno on failure + **/ +int mucse_mbx_reset_hw(struct mucse_hw *hw) +{ + struct mbx_fw_cmd_req req = { + .datalen = cpu_to_le16(MUCSE_MBX_REQ_HDR_LEN), + .opcode = cpu_to_le16(RESET_HW), + }; + struct mbx_fw_cmd_reply reply = {}; + + return mucse_fw_send_cmd_wait_resp(hw, &req, &reply); +} + +/** + * mucse_mbx_get_macaddr - Posts a mbx req to request macaddr + * @hw: pointer to the HW structure + * @pfvfnum: index of pf/vf num + * @mac_addr: pointer to store mac_addr + * @port: port index + * + * mucse_mbx_get_macaddr posts a mbx req to firmware to get mac_addr. + * + * Return: 0 on success, negative errno on failure + **/ +int mucse_mbx_get_macaddr(struct mucse_hw *hw, int pfvfnum, + u8 *mac_addr, + int port) +{ + struct mbx_fw_cmd_req req = { + .datalen = cpu_to_le16(sizeof(req.get_mac_addr) + + MUCSE_MBX_REQ_HDR_LEN), + .opcode = cpu_to_le16(GET_MAC_ADDRESS), + .get_mac_addr = { + .port_mask = cpu_to_le32(BIT(port)), + .pfvf_num = cpu_to_le32(pfvfnum), + }, + }; + struct mbx_fw_cmd_reply reply = {}; + int err; + + err = mucse_fw_send_cmd_wait_resp(hw, &req, &reply); + if (err) + return err; + + if (le32_to_cpu(reply.mac_addr.ports) & BIT(port)) + memcpy(mac_addr, reply.mac_addr.addrs[port].mac, ETH_ALEN); + else + return -ENODATA; + + return 0; +} diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h new file mode 100644 index 0000000000000..fb24fc12b6139 --- /dev/null +++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2020 - 2025 Mucse Corporation. */ + +#ifndef _RNPGBE_MBX_FW_H +#define _RNPGBE_MBX_FW_H + +#include + +#include "rnpgbe.h" + +#define MUCSE_MBX_REQ_HDR_LEN 24 + +enum MUCSE_FW_CMD { + GET_HW_INFO = 0x0601, + GET_MAC_ADDRESS = 0x0602, + RESET_HW = 0x0603, + POWER_UP = 0x0803, +}; + +struct mucse_hw_info { + u8 link_stat; + u8 port_mask; + __le32 speed; + __le16 phy_type; + __le16 nic_mode; + __le16 pfnum; + __le32 fw_version; + __le32 axi_mhz; + union { + u8 port_id[4]; + __le32 port_ids; + }; + __le32 bd_uid; + __le32 phy_id; + __le32 wol_status; + __le32 ext_info; +} __packed; + +struct mbx_fw_cmd_req { + __le16 flags; + __le16 opcode; + __le16 datalen; + __le16 ret_value; + __le32 cookie_lo; + __le32 cookie_hi; + __le32 reply_lo; + __le32 reply_hi; + union { + u8 data[32]; + struct { + __le32 version; + __le32 status; + } powerup; + struct { + __le32 port_mask; + __le32 pfvf_num; + } get_mac_addr; + }; +} __packed; + +struct mbx_fw_cmd_reply { + __le16 flags; + __le16 opcode; + __le16 error_code; + __le16 datalen; + __le32 cookie_lo; + __le32 cookie_hi; + union { + u8 data[40]; + struct mac_addr { + __le32 ports; + struct _addr { + /* for macaddr:01:02:03:04:05:06 + * mac-hi=0x01020304 mac-lo=0x05060000 + */ + u8 mac[8]; + } addrs[4]; + } mac_addr; + struct mucse_hw_info hw_info; + }; +} __packed; + +int mucse_mbx_sync_fw(struct mucse_hw *hw); +int mucse_mbx_powerup(struct mucse_hw *hw, bool is_powerup); +int mucse_mbx_reset_hw(struct mucse_hw *hw); +int mucse_mbx_get_macaddr(struct mucse_hw *hw, int pfvfnum, + u8 *mac_addr, int port); +#endif /* _RNPGBE_MBX_FW_H */ From 2ee95ec17e97c58b65e978a08b75fa8cb6424e4e Mon Sep 17 00:00:00 2001 From: Dong Yibo Date: Sat, 1 Nov 2025 09:38:49 +0800 Subject: [PATCH 559/867] net: rnpgbe: Add register_netdev Complete the network device (netdev) registration flow for Mucse Gbe Ethernet chips, including: 1. Hardware state initialization: - Send powerup notification to firmware (via echo_fw_status) - Sync with firmware - Reset hardware 2. MAC address handling: - Retrieve permanent MAC from firmware (via mucse_mbx_get_macaddr) - Fallback to random valid MAC (eth_random_addr) if not valid mac from Fw Signed-off-by: Dong Yibo Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20251101013849.120565-6-dong100@mucse.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h | 22 ++++ .../net/ethernet/mucse/rnpgbe/rnpgbe_chip.c | 73 +++++++++++ drivers/net/ethernet/mucse/rnpgbe/rnpgbe_hw.h | 2 + .../net/ethernet/mucse/rnpgbe/rnpgbe_main.c | 119 +++++++++++++++++- 4 files changed, 214 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h index 37bd9278beaa0..5b024f9f7e17a 100644 --- a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h +++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe.h @@ -26,18 +26,37 @@ struct mucse_mbx_info { u32 fwpf_ctrl_base; }; +/* Enum for firmware notification modes, + * more modes (e.g., portup, link_report) will be added in future + **/ +enum { + mucse_fw_powerup, +}; + struct mucse_hw { void __iomem *hw_addr; + struct pci_dev *pdev; struct mucse_mbx_info mbx; + int port; u8 pfvfnum; }; +struct mucse_stats { + u64 tx_dropped; +}; + struct mucse { struct net_device *netdev; struct pci_dev *pdev; struct mucse_hw hw; + struct mucse_stats stats; }; +int rnpgbe_get_permanent_mac(struct mucse_hw *hw, u8 *perm_addr); +int rnpgbe_reset_hw(struct mucse_hw *hw); +int rnpgbe_send_notify(struct mucse_hw *hw, + bool enable, + int mode); int rnpgbe_init_hw(struct mucse_hw *hw, int board_type); /* Device IDs */ @@ -46,4 +65,7 @@ int rnpgbe_init_hw(struct mucse_hw *hw, int board_type); #define RNPGBE_DEVICE_ID_N500_DUAL_PORT 0x8318 #define RNPGBE_DEVICE_ID_N210 0x8208 #define RNPGBE_DEVICE_ID_N210L 0x820a + +#define mucse_hw_wr32(hw, reg, val) \ + writel((val), (hw)->hw_addr + (reg)) #endif /* _RNPGBE_H */ diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_chip.c b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_chip.c index 5739db98f12a1..ebc7b3750157b 100644 --- a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_chip.c +++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_chip.c @@ -1,11 +1,82 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2020 - 2025 Mucse Corporation. */ +#include #include +#include #include "rnpgbe.h" #include "rnpgbe_hw.h" #include "rnpgbe_mbx.h" +#include "rnpgbe_mbx_fw.h" + +/** + * rnpgbe_get_permanent_mac - Get permanent mac + * @hw: hw information structure + * @perm_addr: pointer to store perm_addr + * + * rnpgbe_get_permanent_mac tries to get mac from hw + * + * Return: 0 on success, negative errno on failure + **/ +int rnpgbe_get_permanent_mac(struct mucse_hw *hw, u8 *perm_addr) +{ + struct device *dev = &hw->pdev->dev; + int err; + + err = mucse_mbx_get_macaddr(hw, hw->pfvfnum, perm_addr, hw->port); + if (err) { + dev_err(dev, "Failed to get MAC from FW %d\n", err); + return err; + } + + if (!is_valid_ether_addr(perm_addr)) { + dev_err(dev, "Failed to get valid MAC from FW\n"); + return -EINVAL; + } + + return 0; +} + +/** + * rnpgbe_reset_hw - Do a hardware reset + * @hw: hw information structure + * + * rnpgbe_reset_hw calls fw to do a hardware + * reset, and cleans some regs to default. + * + * Return: 0 on success, negative errno on failure + **/ +int rnpgbe_reset_hw(struct mucse_hw *hw) +{ + mucse_hw_wr32(hw, RNPGBE_DMA_AXI_EN, 0); + return mucse_mbx_reset_hw(hw); +} + +/** + * rnpgbe_send_notify - Echo fw status + * @hw: hw information structure + * @enable: true or false status + * @mode: status mode + * + * Return: 0 on success, negative errno on failure + **/ +int rnpgbe_send_notify(struct mucse_hw *hw, + bool enable, + int mode) +{ + int err; + /* Keep switch struct to support more modes in the future */ + switch (mode) { + case mucse_fw_powerup: + err = mucse_mbx_powerup(hw, enable); + break; + default: + err = -EINVAL; + } + + return err; +} /** * rnpgbe_init_n500 - Setup n500 hw info @@ -50,6 +121,8 @@ int rnpgbe_init_hw(struct mucse_hw *hw, int board_type) { struct mucse_mbx_info *mbx = &hw->mbx; + hw->port = 0; + mbx->pf2fw_mbx_ctrl = MUCSE_GBE_PFFW_MBX_CTRL_OFFSET; mbx->fwpf_mbx_mask = MUCSE_GBE_FWPF_MBX_MASK_OFFSET; diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_hw.h b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_hw.h index 268f572936aa4..e77e6bc3d3e30 100644 --- a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_hw.h +++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_hw.h @@ -11,5 +11,7 @@ #define MUCSE_N210_FWPF_CTRL_BASE 0x29400 #define MUCSE_N210_FWPF_SHM_BASE 0x2d900 +#define RNPGBE_DMA_AXI_EN 0x0010 + #define RNPGBE_MAX_QUEUES 8 #endif /* _RNPGBE_HW_H */ diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_main.c b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_main.c index d8aaac79ff4bb..316f941629d4a 100644 --- a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_main.c +++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_main.c @@ -7,6 +7,7 @@ #include "rnpgbe.h" #include "rnpgbe_hw.h" +#include "rnpgbe_mbx_fw.h" static const char rnpgbe_driver_name[] = "rnpgbe"; @@ -24,6 +25,58 @@ static struct pci_device_id rnpgbe_pci_tbl[] = { {0, }, }; +/** + * rnpgbe_open - Called when a network interface is made active + * @netdev: network interface device structure + * + * The open entry point is called when a network interface is made + * active by the system (IFF_UP). + * + * Return: 0 + **/ +static int rnpgbe_open(struct net_device *netdev) +{ + return 0; +} + +/** + * rnpgbe_close - Disables a network interface + * @netdev: network interface device structure + * + * The close entry point is called when an interface is de-activated + * by the OS. + * + * Return: 0, this is not allowed to fail + **/ +static int rnpgbe_close(struct net_device *netdev) +{ + return 0; +} + +/** + * rnpgbe_xmit_frame - Send a skb to driver + * @skb: skb structure to be sent + * @netdev: network interface device structure + * + * Return: NETDEV_TX_OK + **/ +static netdev_tx_t rnpgbe_xmit_frame(struct sk_buff *skb, + struct net_device *netdev) +{ + struct mucse *mucse = netdev_priv(netdev); + + dev_kfree_skb_any(skb); + mucse->stats.tx_dropped++; + + return NETDEV_TX_OK; +} + +static const struct net_device_ops rnpgbe_netdev_ops = { + .ndo_open = rnpgbe_open, + .ndo_stop = rnpgbe_close, + .ndo_start_xmit = rnpgbe_xmit_frame, +}; + /** * rnpgbe_add_adapter - Add netdev for this pci_dev * @pdev: PCI device information structure @@ -39,10 +92,11 @@ static int rnpgbe_add_adapter(struct pci_dev *pdev, int board_type) { struct net_device *netdev; + u8 perm_addr[ETH_ALEN]; void __iomem *hw_addr; struct mucse *mucse; struct mucse_hw *hw; - int err; + int err, err_notify; netdev = alloc_etherdev_mq(sizeof(struct mucse), RNPGBE_MAX_QUEUES); if (!netdev) @@ -64,14 +118,67 @@ static int rnpgbe_add_adapter(struct pci_dev *pdev, } hw->hw_addr = hw_addr; + hw->pdev = pdev; + err = rnpgbe_init_hw(hw, board_type); if (err) { dev_err(&pdev->dev, "Init hw err %d\n", err); goto err_free_net; } + /* Step 1: Send power-up notification to firmware (no response expected) + * This informs firmware to initialize hardware power state, but + * firmware only acknowledges receipt without returning data. Must be + * done before synchronization as firmware may be in low-power idle + * state initially. + */ + err_notify = rnpgbe_send_notify(hw, true, mucse_fw_powerup); + if (err_notify) { + dev_warn(&pdev->dev, "Send powerup to hw failed %d\n", + err_notify); + dev_warn(&pdev->dev, "Maybe low performance\n"); + } + /* Step 2: Synchronize mailbox communication with firmware (requires + * response) After power-up, confirm firmware is ready to process + * requests with responses. This ensures subsequent request/response + * interactions work reliably. + */ + err = mucse_mbx_sync_fw(hw); + if (err) { + dev_err(&pdev->dev, "Sync fw failed! %d\n", err); + goto err_powerdown; + } - return 0; + netdev->netdev_ops = &rnpgbe_netdev_ops; + err = rnpgbe_reset_hw(hw); + if (err) { + dev_err(&pdev->dev, "Hw reset failed %d\n", err); + goto err_powerdown; + } + + err = rnpgbe_get_permanent_mac(hw, perm_addr); + if (!err) { + eth_hw_addr_set(netdev, perm_addr); + } else if (err == -EINVAL) { + dev_warn(&pdev->dev, "Using random MAC\n"); + eth_hw_addr_random(netdev); + } else if (err) { + dev_err(&pdev->dev, "get perm_addr failed %d\n", err); + goto err_powerdown; + } + + err = register_netdev(netdev); + if (err) + goto err_powerdown; + return 0; +err_powerdown: + /* notify powerdown only powerup ok */ + if (!err_notify) { + err_notify = rnpgbe_send_notify(hw, false, mucse_fw_powerup); + if (err_notify) + dev_warn(&pdev->dev, "Send powerdown to hw failed %d\n", + err_notify); + } err_free_net: free_netdev(netdev); return err; @@ -138,11 +245,17 @@ static int rnpgbe_probe(struct pci_dev *pdev, const struct pci_device_id *id) static void rnpgbe_rm_adapter(struct pci_dev *pdev) { struct mucse *mucse = pci_get_drvdata(pdev); + struct mucse_hw *hw = &mucse->hw; struct net_device *netdev; + int err; if (!mucse) return; netdev = mucse->netdev; + unregister_netdev(netdev); + err = rnpgbe_send_notify(hw, false, mucse_fw_powerup); + if (err) + dev_warn(&pdev->dev, "Send powerdown to hw failed %d\n", err); free_netdev(netdev); } @@ -173,6 +286,8 @@ static void rnpgbe_dev_shutdown(struct pci_dev *pdev) rtnl_lock(); netif_device_detach(netdev); + if (netif_running(netdev)) + rnpgbe_close(netdev); rtnl_unlock(); pci_disable_device(pdev); } From 617a0dd24ef2b4e6240df48b1fbac1c3ebfa9282 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 3 Nov 2025 23:26:49 +0100 Subject: [PATCH 560/867] net: phy: make phy_device members pause and asym_pause bitfield bits We can reduce the size of struct phy_device a little by switching the type of members pause and asym_pause from int to a single bit. As C99 is supported now, we can use type bool for the bitfield members, what provides us with the benefit of the usual implicit bool conversions. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/764e9a31-b40b-4dc9-b808-118192a16d87@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy-c45.c | 20 ++++++++++---------- drivers/net/phy/phy_device.c | 16 ++++++++-------- include/linux/phy.h | 4 ++-- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 61670be0f0957..1a7b32be4625c 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -485,8 +485,8 @@ static int genphy_c45_baset1_read_lpa(struct phy_device *phydev) mii_t1_adv_l_mod_linkmode_t(phydev->lp_advertising, 0); mii_t1_adv_m_mod_linkmode_t(phydev->lp_advertising, 0); - phydev->pause = 0; - phydev->asym_pause = 0; + phydev->pause = false; + phydev->asym_pause = false; return 0; } @@ -498,8 +498,8 @@ static int genphy_c45_baset1_read_lpa(struct phy_device *phydev) return val; mii_t1_adv_l_mod_linkmode_t(phydev->lp_advertising, val); - phydev->pause = val & MDIO_AN_T1_ADV_L_PAUSE_CAP ? 1 : 0; - phydev->asym_pause = val & MDIO_AN_T1_ADV_L_PAUSE_ASYM ? 1 : 0; + phydev->pause = val & MDIO_AN_T1_ADV_L_PAUSE_CAP; + phydev->asym_pause = val & MDIO_AN_T1_ADV_L_PAUSE_ASYM; val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_T1_LP_M); if (val < 0) @@ -536,8 +536,8 @@ int genphy_c45_read_lpa(struct phy_device *phydev) phydev->lp_advertising); mii_10gbt_stat_mod_linkmode_lpa_t(phydev->lp_advertising, 0); mii_adv_mod_linkmode_adv_t(phydev->lp_advertising, 0); - phydev->pause = 0; - phydev->asym_pause = 0; + phydev->pause = false; + phydev->asym_pause = false; return 0; } @@ -551,8 +551,8 @@ int genphy_c45_read_lpa(struct phy_device *phydev) return val; mii_adv_mod_linkmode_adv_t(phydev->lp_advertising, val); - phydev->pause = val & LPA_PAUSE_CAP ? 1 : 0; - phydev->asym_pause = val & LPA_PAUSE_ASYM ? 1 : 0; + phydev->pause = val & LPA_PAUSE_CAP; + phydev->asym_pause = val & LPA_PAUSE_ASYM; /* Read the link partner's 10G advertisement */ val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_STAT); @@ -1171,8 +1171,8 @@ int genphy_c45_read_status(struct phy_device *phydev) phydev->speed = SPEED_UNKNOWN; phydev->duplex = DUPLEX_UNKNOWN; - phydev->pause = 0; - phydev->asym_pause = 0; + phydev->pause = false; + phydev->asym_pause = false; if (phydev->autoneg == AUTONEG_ENABLE) { ret = genphy_c45_read_lpa(phydev); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 737747cf19069..81984d4ebb7cb 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -825,8 +825,8 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id, dev->speed = SPEED_UNKNOWN; dev->duplex = DUPLEX_UNKNOWN; - dev->pause = 0; - dev->asym_pause = 0; + dev->pause = false; + dev->asym_pause = false; dev->link = 0; dev->port = PORT_TP; dev->interface = PHY_INTERFACE_MODE_GMII; @@ -2092,8 +2092,8 @@ int genphy_setup_forced(struct phy_device *phydev) { u16 ctl; - phydev->pause = 0; - phydev->asym_pause = 0; + phydev->pause = false; + phydev->asym_pause = false; ctl = mii_bmcr_encode_fixed(phydev->speed, phydev->duplex); @@ -2500,8 +2500,8 @@ int genphy_read_status(struct phy_device *phydev) phydev->master_slave_state = MASTER_SLAVE_STATE_UNSUPPORTED; phydev->speed = SPEED_UNKNOWN; phydev->duplex = DUPLEX_UNKNOWN; - phydev->pause = 0; - phydev->asym_pause = 0; + phydev->pause = false; + phydev->asym_pause = false; if (phydev->is_gigabit_capable) { err = genphy_read_master_slave(phydev); @@ -2554,8 +2554,8 @@ int genphy_c37_read_status(struct phy_device *phydev, bool *changed) /* Signal link has changed */ *changed = true; phydev->duplex = DUPLEX_UNKNOWN; - phydev->pause = 0; - phydev->asym_pause = 0; + phydev->pause = false; + phydev->asym_pause = false; if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) { lpa = phy_read(phydev, MII_LPA); diff --git a/include/linux/phy.h b/include/linux/phy.h index e3474f03cbc1e..d145a200ea211 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -666,6 +666,8 @@ struct phy_device { /* The most recently read link state */ unsigned link:1; unsigned autoneg_complete:1; + bool pause:1; + bool asym_pause:1; /* Interrupts are enabled */ unsigned interrupts:1; @@ -690,8 +692,6 @@ struct phy_device { int speed; int duplex; int port; - int pause; - int asym_pause; u8 master_slave_get; u8 master_slave_set; u8 master_slave_state; From 6874520518868a660bda182da060b69372265a16 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Mon, 3 Nov 2025 11:49:24 +0100 Subject: [PATCH 561/867] net: altera-tse: Set platform drvdata before registering netdev We don't have to wait until netdev is registered before setting it as the pdev's drvdata. Move it at netdev alloc time. Reviewed-by: Andrew Lunn Signed-off-by: Maxime Chevallier Link: https://patch.msgid.link/20251103104928.58461-2-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/altera/altera_tse_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 3f6204de9e6b8..6ba1249f027df 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -1150,6 +1150,7 @@ static int altera_tse_probe(struct platform_device *pdev) } SET_NETDEV_DEV(ndev, &pdev->dev); + platform_set_drvdata(pdev, ndev); priv = netdev_priv(ndev); priv->device = &pdev->dev; @@ -1394,8 +1395,6 @@ static int altera_tse_probe(struct platform_device *pdev) goto err_register_netdev; } - platform_set_drvdata(pdev, ndev); - priv->revision = ioread32(&priv->mac_dev->megacore_revision); if (netif_msg_probe(priv)) From dd2619d38d7e97d17995e1156293be1ec08b058d Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Mon, 3 Nov 2025 11:49:25 +0100 Subject: [PATCH 562/867] net: altera-tse: Warn on bad revision at probe time Instead of reading the core revision at probe time, and print a warning for an unexecpected version at .ndo_open() time, let's print that warning directly in .probe(). This allows getting rid of the "revision" private field, and also prevent a potential race between reading the revision in .probe() after netdev registration, and accessing that revision in .ndo_open(). By printing the warning after register_netdev(), we are sure that we have a netdev name, and that we try to print the revision after having read it from the internal registers. Suggested-by: Andrew Lunn Signed-off-by: Maxime Chevallier Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251103104928.58461-3-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/altera/altera_tse.h | 3 --- drivers/net/ethernet/altera/altera_tse_main.c | 12 ++++++------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/altera/altera_tse.h b/drivers/net/ethernet/altera/altera_tse.h index 82f2363a45cd0..e5a56bb989daf 100644 --- a/drivers/net/ethernet/altera/altera_tse.h +++ b/drivers/net/ethernet/altera/altera_tse.h @@ -401,9 +401,6 @@ struct altera_tse_private { /* MAC address space */ struct altera_tse_mac __iomem *mac_dev; - /* TSE Revision */ - u32 revision; - /* mSGDMA Rx Dispatcher address space */ void __iomem *rx_dma_csr; void __iomem *rx_dma_desc; diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 6ba1249f027df..343c78a493a1b 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -892,9 +892,6 @@ static int tse_open(struct net_device *dev) netdev_warn(dev, "device MAC address %pM\n", dev->dev_addr); - if ((priv->revision < 0xd00) || (priv->revision > 0xe00)) - netdev_warn(dev, "TSE revision %x\n", priv->revision); - spin_lock(&priv->mac_cfg_lock); ret = reset_mac(priv); @@ -1142,6 +1139,7 @@ static int altera_tse_probe(struct platform_device *pdev) struct net_device *ndev; void __iomem *descmap; int ret = -ENODEV; + u32 revision; ndev = alloc_etherdev(sizeof(struct altera_tse_private)); if (!ndev) { @@ -1395,12 +1393,14 @@ static int altera_tse_probe(struct platform_device *pdev) goto err_register_netdev; } - priv->revision = ioread32(&priv->mac_dev->megacore_revision); + revision = ioread32(&priv->mac_dev->megacore_revision); + + if (revision < 0xd00 || revision > 0xe00) + netdev_warn(ndev, "TSE revision %x\n", revision); if (netif_msg_probe(priv)) dev_info(&pdev->dev, "Altera TSE MAC version %d.%d at 0x%08lx irq %d/%d\n", - (priv->revision >> 8) & 0xff, - priv->revision & 0xff, + (revision >> 8) & 0xff, revision & 0xff, (unsigned long) control_port->start, priv->rx_irq, priv->tx_irq); From 9350ea63fec6f0cd713b6e90bd60cc2ee433b14f Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Mon, 3 Nov 2025 11:49:26 +0100 Subject: [PATCH 563/867] net: altera-tse: Don't use netdev name for the PCS mdio bus The PCS mdio bus must be created before registering the net_device. To do that, we musn't depend on the netdev name to create the mdio bus name. Let's use the device's name instead. Note that this changes the bus name in /sys/bus/mdiobus Reviewed-by: Andrew Lunn Signed-off-by: Maxime Chevallier Link: https://patch.msgid.link/20251103104928.58461-4-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/altera/altera_tse_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 343c78a493a1b..003df89709984 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -1404,7 +1404,7 @@ static int altera_tse_probe(struct platform_device *pdev) (unsigned long) control_port->start, priv->rx_irq, priv->tx_irq); - snprintf(mrc.name, MII_BUS_ID_SIZE, "%s-pcs-mii", ndev->name); + snprintf(mrc.name, MII_BUS_ID_SIZE, "%s-pcs-mii", dev_name(&pdev->dev)); pcs_bus = devm_mdio_regmap_register(&pdev->dev, &mrc); if (IS_ERR(pcs_bus)) { ret = PTR_ERR(pcs_bus); From 055e554b8fff7bb47e026bb8d199b213756e4321 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Mon, 3 Nov 2025 11:49:27 +0100 Subject: [PATCH 564/867] net: altera-tse: Init PCS and phylink before registering netdev register_netdev() must be done only once all resources are ready, as they may be used in .ndo_open() immediately upon registration. Move the lynx PCS and phylink initialisation before registerng the netdevice. We also remove the call to netif_carrier_off(), as phylink takes care of that. Reviewed-by: Andrew Lunn Signed-off-by: Maxime Chevallier Link: https://patch.msgid.link/20251103104928.58461-5-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/altera/altera_tse_main.c | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 003df89709984..ca55c5fd11dfd 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -1386,24 +1386,6 @@ static int altera_tse_probe(struct platform_device *pdev) spin_lock_init(&priv->tx_lock); spin_lock_init(&priv->rxdma_irq_lock); - netif_carrier_off(ndev); - ret = register_netdev(ndev); - if (ret) { - dev_err(&pdev->dev, "failed to register TSE net device\n"); - goto err_register_netdev; - } - - revision = ioread32(&priv->mac_dev->megacore_revision); - - if (revision < 0xd00 || revision > 0xe00) - netdev_warn(ndev, "TSE revision %x\n", revision); - - if (netif_msg_probe(priv)) - dev_info(&pdev->dev, "Altera TSE MAC version %d.%d at 0x%08lx irq %d/%d\n", - (revision >> 8) & 0xff, revision & 0xff, - (unsigned long) control_port->start, priv->rx_irq, - priv->tx_irq); - snprintf(mrc.name, MII_BUS_ID_SIZE, "%s-pcs-mii", dev_name(&pdev->dev)); pcs_bus = devm_mdio_regmap_register(&pdev->dev, &mrc); if (IS_ERR(pcs_bus)) { @@ -1441,12 +1423,30 @@ static int altera_tse_probe(struct platform_device *pdev) goto err_init_phylink; } + ret = register_netdev(ndev); + if (ret) { + dev_err(&pdev->dev, "failed to register TSE net device\n"); + goto err_register_netdev; + } + + revision = ioread32(&priv->mac_dev->megacore_revision); + + if (revision < 0xd00 || revision > 0xe00) + netdev_warn(ndev, "TSE revision %x\n", revision); + + if (netif_msg_probe(priv)) + dev_info(&pdev->dev, "Altera TSE MAC version %d.%d at 0x%08lx irq %d/%d\n", + (revision >> 8) & 0xff, revision & 0xff, + (unsigned long)control_port->start, priv->rx_irq, + priv->tx_irq); + return 0; + +err_register_netdev: + phylink_destroy(priv->phylink); err_init_phylink: lynx_pcs_destroy(priv->pcs); err_init_pcs: - unregister_netdev(ndev); -err_register_netdev: netif_napi_del(&priv->napi); altera_tse_mdio_destroy(ndev); err_free_netdev: From c9445e3c087656e01d0160a48f90389856baf368 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 30 Oct 2025 22:41:19 +0100 Subject: [PATCH 565/867] net: phy: fixed_phy: add helper fixed_phy_register_100fd In few places a 100FD fixed PHY is used. Create a helper so that users don't have to define the struct fixed_phy_status. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/bf564b19-e9bc-4896-aeae-9f721cc4fecd@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/fixed_phy.c | 12 ++++++++++++ include/linux/phy_fixed.h | 6 ++++++ 2 files changed, 18 insertions(+) diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index 0e1b28f06f189..bdc3a4bffeded 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -227,6 +227,18 @@ struct phy_device *fixed_phy_register(const struct fixed_phy_status *status, } EXPORT_SYMBOL_GPL(fixed_phy_register); +struct phy_device *fixed_phy_register_100fd(void) +{ + static const struct fixed_phy_status status = { + .link = 1, + .speed = SPEED_100, + .duplex = DUPLEX_FULL, + }; + + return fixed_phy_register(&status, NULL); +} +EXPORT_SYMBOL_GPL(fixed_phy_register_100fd); + void fixed_phy_unregister(struct phy_device *phy) { phy_device_remove(phy); diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index d17ff750c7082..08275ef641478 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -20,6 +20,7 @@ extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier); void fixed_phy_add(const struct fixed_phy_status *status); struct phy_device *fixed_phy_register(const struct fixed_phy_status *status, struct device_node *np); +struct phy_device *fixed_phy_register_100fd(void); extern void fixed_phy_unregister(struct phy_device *phydev); extern int fixed_phy_set_link_update(struct phy_device *phydev, @@ -34,6 +35,11 @@ fixed_phy_register(const struct fixed_phy_status *status, return ERR_PTR(-ENODEV); } +static inline struct phy_device *fixed_phy_register_100fd(void) +{ + return ERR_PTR(-ENODEV); +} + static inline void fixed_phy_unregister(struct phy_device *phydev) { } From dc86b621e1b4129cc9ceea09ee449ae97fcf106f Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 30 Oct 2025 22:42:30 +0100 Subject: [PATCH 566/867] net: fec: register a fixed phy using fixed_phy_register_100fd if needed In case of coldfire/5272 a fixed phy is used, which so far is created by platform code, using fixed_phy_add(). This function has a number of problems, therefore create a potentially needed fixed phy here, using fixed_phy_register_100fd. Note 1: This includes a small functional change, as coldfire/5272 created a fixed phy in half-duplex mode. Likely this was by mistake, because the fec MAC is 100FD-capable, and connection is to a switch. Note 2: Usage of phy_find_next() makes use of the fact that dev_id can only be 0 or 1. Due to lack of hardware, this is compile-tested only. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/adf4dc5c-5fa3-4ae6-a75c-a73954dede73@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/Kconfig | 1 + drivers/net/ethernet/freescale/fec_main.c | 52 +++++++++++------------ 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig index bbef47c3480c5..e2a591cf9601f 100644 --- a/drivers/net/ethernet/freescale/Kconfig +++ b/drivers/net/ethernet/freescale/Kconfig @@ -28,6 +28,7 @@ config FEC depends on PTP_1588_CLOCK_OPTIONAL select CRC32 select PHYLIB + select FIXED_PHY if M5272 select PAGE_POOL imply PAGE_POOL_STATS imply NET_SELFTESTS diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 024dd443bfbc3..742f3e81cc7cc 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -2472,11 +2473,8 @@ static int fec_enet_parse_rgmii_delay(struct fec_enet_private *fep, static int fec_enet_mii_probe(struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); - struct phy_device *phy_dev = NULL; - char mdio_bus_id[MII_BUS_ID_SIZE]; - char phy_name[MII_BUS_ID_SIZE + 3]; - int phy_id; - int dev_id = fep->dev_id; + struct phy_device *phy_dev; + int ret; if (fep->phy_node) { phy_dev = of_phy_connect(ndev, fep->phy_node, @@ -2488,30 +2486,28 @@ static int fec_enet_mii_probe(struct net_device *ndev) } } else { /* check for attached phy */ - for (phy_id = 0; (phy_id < PHY_MAX_ADDR); phy_id++) { - if (!mdiobus_is_registered_device(fep->mii_bus, phy_id)) - continue; - if (dev_id--) - continue; - strscpy(mdio_bus_id, fep->mii_bus->id, MII_BUS_ID_SIZE); - break; - } + phy_dev = phy_find_first(fep->mii_bus); + if (fep->dev_id && phy_dev) + phy_dev = phy_find_next(fep->mii_bus, phy_dev); - if (phy_id >= PHY_MAX_ADDR) { + if (!phy_dev) { netdev_info(ndev, "no PHY, assuming direct connection to switch\n"); - strscpy(mdio_bus_id, "fixed-0", MII_BUS_ID_SIZE); - phy_id = 0; + phy_dev = fixed_phy_register_100fd(); + if (IS_ERR(phy_dev)) { + netdev_err(ndev, "could not register fixed PHY\n"); + return PTR_ERR(phy_dev); + } } - snprintf(phy_name, sizeof(phy_name), - PHY_ID_FMT, mdio_bus_id, phy_id); - phy_dev = phy_connect(ndev, phy_name, &fec_enet_adjust_link, - fep->phy_interface); - } + ret = phy_connect_direct(ndev, phy_dev, &fec_enet_adjust_link, + fep->phy_interface); + if (ret) { + if (phy_is_pseudo_fixed_link(phy_dev)) + fixed_phy_unregister(phy_dev); + netdev_err(ndev, "could not attach to PHY\n"); + return ret; + } - if (IS_ERR(phy_dev)) { - netdev_err(ndev, "could not attach to PHY\n"); - return PTR_ERR(phy_dev); } /* mask with MAC supported features */ @@ -3616,8 +3612,9 @@ static int fec_enet_close(struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); + struct phy_device *phy_dev = ndev->phydev; - phy_stop(ndev->phydev); + phy_stop(phy_dev); if (netif_device_present(ndev)) { napi_disable(&fep->napi); @@ -3625,7 +3622,10 @@ fec_enet_close(struct net_device *ndev) fec_stop(ndev); } - phy_disconnect(ndev->phydev); + phy_disconnect(phy_dev); + + if (!fep->phy_node && phy_is_pseudo_fixed_link(phy_dev)) + fixed_phy_unregister(phy_dev); if (fep->quirks & FEC_QUIRK_ERR006687) imx6q_cpuidle_fec_irqs_unused(); From 0ee21f39c5d844e0b30ea323542b39ce73b3dd86 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 30 Oct 2025 22:43:36 +0100 Subject: [PATCH 567/867] m68k: coldfire: remove creating a fixed phy Now that the fec ethernet driver creates a fixed phy if needed, we can remove this here. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/212e0cb5-a2f5-460f-8e03-3c3369d0acf1@gmail.com Signed-off-by: Jakub Kicinski --- arch/m68k/coldfire/m5272.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/arch/m68k/coldfire/m5272.c b/arch/m68k/coldfire/m5272.c index 918e2a3236c5b..28b3ffa25ba0b 100644 --- a/arch/m68k/coldfire/m5272.c +++ b/arch/m68k/coldfire/m5272.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -103,23 +102,9 @@ void __init config_BSP(char *commandp, int size) /***************************************************************************/ -/* - * Some 5272 based boards have the FEC ethernet directly connected to - * an ethernet switch. In this case we need to use the fixed phy type, - * and we need to declare it early in boot. - */ -static const struct fixed_phy_status nettel_fixed_phy_status __initconst = { - .link = 1, - .speed = 100, - .duplex = 0, -}; - -/***************************************************************************/ - static int __init init_BSP(void) { m5272_uarts_init(); - fixed_phy_add(&nettel_fixed_phy_status); clkdev_add_table(m5272_clk_lookup, ARRAY_SIZE(m5272_clk_lookup)); return 0; } From 10d2f15afba2391471576846fd22d49631e34b21 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 30 Oct 2025 22:44:35 +0100 Subject: [PATCH 568/867] net: b44: register a fixed phy using fixed_phy_register_100fd if needed In case of bcm47xx a fixed phy is used, which so far is created by platform code, using fixed_phy_add(). This function has a number of problems, therefore create a potentially needed fixed phy here, using fixed_phy_register_100fd. Due to lack of hardware, this is compile-tested only. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/53e4e74d-a49e-4f37-b970-5543a35041db@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/Kconfig | 1 + drivers/net/ethernet/broadcom/b44.c | 37 +++++++++++++++------------ 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index 9fdef874f5ca3..666522d647751 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -25,6 +25,7 @@ config B44 select SSB select MII select PHYLIB + select FIXED_PHY if BCM47XX help If you have a network (Ethernet) controller of this type, say Y or M here. diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 0353359c3fe96..888f28f11406f 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -2233,7 +2234,6 @@ static int b44_register_phy_one(struct b44 *bp) struct mii_bus *mii_bus; struct ssb_device *sdev = bp->sdev; struct phy_device *phydev; - char bus_id[MII_BUS_ID_SIZE + 3]; struct ssb_sprom *sprom = &sdev->bus->sprom; int err; @@ -2260,27 +2260,26 @@ static int b44_register_phy_one(struct b44 *bp) goto err_out_mdiobus; } - if (!mdiobus_is_registered_device(bp->mii_bus, bp->phy_addr) && - (sprom->boardflags_lo & (B44_BOARDFLAG_ROBO | B44_BOARDFLAG_ADM))) { - + phydev = mdiobus_get_phy(bp->mii_bus, bp->phy_addr); + if (!phydev && + sprom->boardflags_lo & (B44_BOARDFLAG_ROBO | B44_BOARDFLAG_ADM)) { dev_info(sdev->dev, "could not find PHY at %i, use fixed one\n", bp->phy_addr); - bp->phy_addr = 0; - snprintf(bus_id, sizeof(bus_id), PHY_ID_FMT, "fixed-0", - bp->phy_addr); - } else { - snprintf(bus_id, sizeof(bus_id), PHY_ID_FMT, mii_bus->id, - bp->phy_addr); + phydev = fixed_phy_register_100fd(); + if (!IS_ERR(phydev)) + bp->phy_addr = phydev->mdio.addr; } - phydev = phy_connect(bp->dev, bus_id, &b44_adjust_link, - PHY_INTERFACE_MODE_MII); - if (IS_ERR(phydev)) { + if (IS_ERR_OR_NULL(phydev)) + err = -ENODEV; + else + err = phy_connect_direct(bp->dev, phydev, &b44_adjust_link, + PHY_INTERFACE_MODE_MII); + if (err) { dev_err(sdev->dev, "could not attach PHY at %i\n", bp->phy_addr); - err = PTR_ERR(phydev); goto err_out_mdiobus_unregister; } @@ -2293,7 +2292,6 @@ static int b44_register_phy_one(struct b44 *bp) linkmode_copy(phydev->advertising, phydev->supported); bp->old_link = 0; - bp->phy_addr = phydev->mdio.addr; phy_attached_info(phydev); @@ -2311,10 +2309,15 @@ static int b44_register_phy_one(struct b44 *bp) static void b44_unregister_phy_one(struct b44 *bp) { - struct net_device *dev = bp->dev; struct mii_bus *mii_bus = bp->mii_bus; + struct net_device *dev = bp->dev; + struct phy_device *phydev; + + phydev = dev->phydev; - phy_disconnect(dev->phydev); + phy_disconnect(phydev); + if (phy_is_pseudo_fixed_link(phydev)) + fixed_phy_unregister(phydev); mdiobus_unregister(mii_bus); mdiobus_free(mii_bus); } From 458639c42b7e6927a746bbbf0954ce3dd738c468 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 30 Oct 2025 22:45:30 +0100 Subject: [PATCH 569/867] MIPS: BCM47XX: remove creating a fixed phy Now that b44 ethernet driver creates a fixed phy if needed, we can remove this here. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/8983b705-6bca-4728-9283-7aa60f49340f@gmail.com Signed-off-by: Jakub Kicinski --- arch/mips/bcm47xx/setup.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c index a93a4266dc1ef..38ed61b4bd962 100644 --- a/arch/mips/bcm47xx/setup.c +++ b/arch/mips/bcm47xx/setup.c @@ -256,12 +256,6 @@ static int __init bcm47xx_cpu_fixes(void) } arch_initcall(bcm47xx_cpu_fixes); -static const struct fixed_phy_status bcm47xx_fixed_phy_status __initconst = { - .link = 1, - .speed = SPEED_100, - .duplex = DUPLEX_FULL, -}; - static int __init bcm47xx_register_bus_complete(void) { switch (bcm47xx_bus_type) { @@ -282,7 +276,6 @@ static int __init bcm47xx_register_bus_complete(void) bcm47xx_leds_register(); bcm47xx_workarounds(); - fixed_phy_add(&bcm47xx_fixed_phy_status); return 0; } device_initcall(bcm47xx_register_bus_complete); From 5de9ea1c50f061892625388880e83fdc50a4ef66 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 30 Oct 2025 22:46:32 +0100 Subject: [PATCH 570/867] net: phy: fixed_phy: remove fixed_phy_add fixed_phy_add() has a number of problems/disadvantages: - It uses phy address 0 w/o checking whether a fixed phy with this address exists already. - A subsequent call to fixed_phy_register() would also use phy address 0, because fixed_phy_add() doesn't mark it as used. - fixed_phy_add() is used from platform code, therefore requires that fixed_phy code is built-in. Now that for the only two users (coldfire/5272 and bcm47xx) fixed_phy creation has been moved to the respective ethernet driver (fec, b44), we can remove fixed_phy_add(). Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/bee046a1-1e77-4057-8b04-fdb2a1bbbd08@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/fixed_phy.c | 6 ------ include/linux/phy_fixed.h | 2 -- 2 files changed, 8 deletions(-) diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index bdc3a4bffeded..d498d8a9bba6f 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -131,12 +131,6 @@ static int __fixed_phy_add(int phy_addr, return 0; } -void fixed_phy_add(const struct fixed_phy_status *status) -{ - __fixed_phy_add(0, status); -} -EXPORT_SYMBOL_GPL(fixed_phy_add); - static DEFINE_IDA(phy_fixed_ida); static void fixed_phy_del(int phy_addr) diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 08275ef641478..8bade999831c8 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -17,7 +17,6 @@ struct net_device; #if IS_ENABLED(CONFIG_FIXED_PHY) extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier); -void fixed_phy_add(const struct fixed_phy_status *status); struct phy_device *fixed_phy_register(const struct fixed_phy_status *status, struct device_node *np); struct phy_device *fixed_phy_register_100fd(void); @@ -27,7 +26,6 @@ extern int fixed_phy_set_link_update(struct phy_device *phydev, int (*link_update)(struct net_device *, struct fixed_phy_status *)); #else -static inline void fixed_phy_add(const struct fixed_phy_status *status) {} static inline struct phy_device * fixed_phy_register(const struct fixed_phy_status *status, struct device_node *np) From 0c716703965ffc5ef4311b65cb5d84a703784717 Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Thu, 30 Oct 2025 21:44:38 +0700 Subject: [PATCH 571/867] virtio-net: fix received length check in big packets Since commit 4959aebba8c0 ("virtio-net: use mtu size as buffer length for big packets"), when guest gso is off, the allocated size for big packets is not MAX_SKB_FRAGS * PAGE_SIZE anymore but depends on negotiated MTU. The number of allocated frags for big packets is stored in vi->big_packets_num_skbfrags. Because the host announced buffer length can be malicious (e.g. the host vhost_net driver's get_rx_bufs is modified to announce incorrect length), we need a check in virtio_net receive path. Currently, the check is not adapted to the new change which can lead to NULL page pointer dereference in the below while loop when receiving length that is larger than the allocated one. This commit fixes the received length check corresponding to the new change. Fixes: 4959aebba8c0 ("virtio-net: use mtu size as buffer length for big packets") Cc: stable@vger.kernel.org Signed-off-by: Bui Quang Minh Reviewed-by: Xuan Zhuo Tested-by: Lei Yang Link: https://patch.msgid.link/20251030144438.7582-1-minhquangbui99@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index e6e650bc3bc32..8855a994e12b8 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -910,17 +910,6 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, goto ok; } - /* - * Verify that we can indeed put this data into a skb. - * This is here to handle cases when the device erroneously - * tries to receive more than is possible. This is usually - * the case of a broken device. - */ - if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) { - net_dbg_ratelimited("%s: too much data\n", skb->dev->name); - dev_kfree_skb(skb); - return NULL; - } BUG_ON(offset >= PAGE_SIZE); while (len) { unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); @@ -2112,9 +2101,19 @@ static struct sk_buff *receive_big(struct net_device *dev, struct virtnet_rq_stats *stats) { struct page *page = buf; - struct sk_buff *skb = - page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); + struct sk_buff *skb; + + /* Make sure that len does not exceed the size allocated in + * add_recvbuf_big. + */ + if (unlikely(len > (vi->big_packets_num_skbfrags + 1) * PAGE_SIZE)) { + pr_debug("%s: rx error: len %u exceeds allocated size %lu\n", + dev->name, len, + (vi->big_packets_num_skbfrags + 1) * PAGE_SIZE); + goto err; + } + skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); u64_stats_add(&stats->bytes, len - vi->hdr_len); if (unlikely(!skb)) goto err; From bf33247a90d3e85d53a9b55bb276b725456ff0bf Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 3 Nov 2025 16:26:09 -0800 Subject: [PATCH 572/867] net: Add struct sockaddr_unsized for sockaddr of unknown length Add flexible sockaddr structure to support addresses longer than the traditional 14-byte struct sockaddr::sa_data limitation without requiring the full 128-byte sa_data of struct sockaddr_storage. This allows the network APIs to pass around a pointer to an object that isn't lying to the compiler about how big it is, but must be accompanied by its actual size as an additional parameter. It's possible we may way to migrate to including the size with the struct in the future, e.g.: struct sockaddr_unsized { u16 sa_data_len; u16 sa_family; u8 sa_data[] __counted_by(sa_data_len); }; Signed-off-by: Kees Cook Link: https://patch.msgid.link/20251104002617.2752303-1-kees@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/socket.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/include/linux/socket.h b/include/linux/socket.h index 3b262487ec060..7b1a01be29da8 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -40,6 +40,23 @@ struct sockaddr { }; }; +/** + * struct sockaddr_unsized - Unspecified size sockaddr for callbacks + * @sa_family: Address family (AF_UNIX, AF_INET, AF_INET6, etc.) + * @sa_data: Flexible array for address data + * + * This structure is designed for callback interfaces where the + * total size is known via the sockaddr_len parameter. Unlike struct + * sockaddr which has a fixed 14-byte sa_data limit or struct + * sockaddr_storage which has a fixed 128-byte sa_data limit, this + * structure can accommodate addresses of any size, but must be used + * carefully. + */ +struct sockaddr_unsized { + __kernel_sa_family_t sa_family; /* address family, AF_xxx */ + char sa_data[]; /* flexible address data */ +}; + struct linger { int l_onoff; /* Linger active */ int l_linger; /* How long to linger for */ From 0e50474fa514822e9d990874e554bf8043a201d7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 3 Nov 2025 16:26:10 -0800 Subject: [PATCH 573/867] net: Convert proto_ops bind() callbacks to use sockaddr_unsized Update all struct proto_ops bind() callback function prototypes from "struct sockaddr *" to "struct sockaddr_unsized *" to avoid lying to the compiler about object sizes. Calls into struct proto handlers gain casts that will be removed in the struct proto conversion patch. No binary changes expected. Signed-off-by: Kees Cook Link: https://patch.msgid.link/20251104002617.2752303-2-kees@kernel.org Signed-off-by: Jakub Kicinski --- crypto/af_alg.c | 2 +- drivers/block/drbd/drbd_receiver.c | 4 ++-- drivers/infiniband/hw/erdma/erdma_cm.c | 4 ++-- drivers/infiniband/sw/siw/siw_cm.c | 6 +++--- drivers/isdn/mISDN/l1oip_core.c | 2 +- drivers/isdn/mISDN/socket.c | 4 ++-- drivers/net/ppp/pptp.c | 4 ++-- drivers/nvme/host/tcp.c | 2 +- drivers/nvme/target/tcp.c | 2 +- drivers/target/iscsi/iscsi_target_login.c | 2 +- drivers/xen/pvcalls-back.c | 2 +- fs/afs/rxrpc.c | 6 +++--- fs/dlm/lowcomms.c | 6 +++--- fs/ocfs2/cluster/tcp.c | 4 ++-- fs/smb/client/connect.c | 2 +- fs/smb/server/transport_tcp.c | 4 ++-- include/linux/net.h | 4 ++-- include/net/inet_common.h | 2 +- include/net/ipv6.h | 2 +- include/net/sock.h | 2 +- net/9p/trans_fd.c | 2 +- net/appletalk/ddp.c | 2 +- net/atm/pvc.c | 4 ++-- net/atm/svc.c | 2 +- net/ax25/af_ax25.c | 2 +- net/bluetooth/hci_sock.c | 2 +- net/bluetooth/iso.c | 4 ++-- net/bluetooth/l2cap_sock.c | 2 +- net/bluetooth/rfcomm/core.c | 4 ++-- net/bluetooth/rfcomm/sock.c | 2 +- net/bluetooth/sco.c | 2 +- net/can/isotp.c | 2 +- net/can/j1939/socket.c | 2 +- net/can/raw.c | 2 +- net/core/sock.c | 2 +- net/ieee802154/socket.c | 4 ++-- net/ipv4/af_inet.c | 4 ++-- net/ipv4/udp_tunnel_core.c | 2 +- net/ipv6/af_inet6.c | 4 ++-- net/ipv6/ip6_udp_tunnel.c | 2 +- net/iucv/af_iucv.c | 2 +- net/l2tp/l2tp_core.c | 4 ++-- net/llc/af_llc.c | 2 +- net/mctp/af_mctp.c | 2 +- net/mctp/test/route-test.c | 2 +- net/mptcp/protocol.c | 6 +++--- net/mptcp/subflow.c | 2 +- net/netfilter/ipvs/ip_vs_sync.c | 4 ++-- net/netlink/af_netlink.c | 2 +- net/netrom/af_netrom.c | 2 +- net/nfc/llcp_sock.c | 4 ++-- net/packet/af_packet.c | 11 ++++++----- net/phonet/socket.c | 8 ++++---- net/qrtr/af_qrtr.c | 2 +- net/qrtr/ns.c | 2 +- net/rds/bind.c | 2 +- net/rds/rds.h | 2 +- net/rds/tcp_connect.c | 2 +- net/rds/tcp_listen.c | 2 +- net/rose/af_rose.c | 2 +- net/rxrpc/af_rxrpc.c | 2 +- net/rxrpc/rxperf.c | 2 +- net/smc/af_smc.c | 2 +- net/smc/smc.h | 2 +- net/socket.c | 6 +++--- net/sunrpc/clnt.c | 4 ++-- net/sunrpc/svcsock.c | 2 +- net/sunrpc/xprtsock.c | 4 ++-- net/tipc/socket.c | 4 ++-- net/unix/af_unix.c | 4 ++-- net/vmw_vsock/af_vsock.c | 4 ++-- net/x25/af_x25.c | 2 +- net/xdp/xsk.c | 2 +- tools/testing/selftests/bpf/test_kmods/bpf_testmod.c | 2 +- 74 files changed, 113 insertions(+), 112 deletions(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index ca6fdcc6c54ac..5e760ab626183 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -145,7 +145,7 @@ void af_alg_release_parent(struct sock *sk) } EXPORT_SYMBOL_GPL(af_alg_release_parent); -static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +static int alg_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) { const u32 allowed = CRYPTO_ALG_KERN_DRIVER_ONLY; struct sock *sk = sock->sk; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index caaf2781136d0..d9296f74f9025 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -450,7 +450,7 @@ static struct socket *drbd_try_connect(struct drbd_connection *connection) * a free one dynamically. */ what = "bind before connect"; - err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len); + err = sock->ops->bind(sock, (struct sockaddr_unsized *) &src_in6, my_addr_len); if (err < 0) goto out; @@ -537,7 +537,7 @@ static int prepare_listen_socket(struct drbd_connection *connection, struct acce drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size); what = "bind before listen"; - err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len); + err = s_listen->ops->bind(s_listen, (struct sockaddr_unsized *)&my_addr, my_addr_len); if (err < 0) goto out; diff --git a/drivers/infiniband/hw/erdma/erdma_cm.c b/drivers/infiniband/hw/erdma/erdma_cm.c index e0acc185e7193..ef66a6359eb9f 100644 --- a/drivers/infiniband/hw/erdma/erdma_cm.c +++ b/drivers/infiniband/hw/erdma/erdma_cm.c @@ -993,7 +993,7 @@ static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr, int ret; sock_set_reuseaddr(s->sk); - ret = s->ops->bind(s, laddr, laddrlen); + ret = s->ops->bind(s, (struct sockaddr_unsized *)laddr, laddrlen); if (ret) return ret; ret = s->ops->connect(s, raddr, raddrlen, flags); @@ -1315,7 +1315,7 @@ int erdma_create_listen(struct iw_cm_id *id, int backlog) if (ipv4_is_zeronet(laddr->sin_addr.s_addr)) s->sk->sk_bound_dev_if = dev->netdev->ifindex; - ret = s->ops->bind(s, (struct sockaddr *)laddr, + ret = s->ops->bind(s, (struct sockaddr_unsized *)laddr, sizeof(struct sockaddr_in)); if (ret) goto error; diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 708b13993fdfd..7fe118cacb3ff 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -1340,7 +1340,7 @@ static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr, return rv; } - rv = s->ops->bind(s, laddr, size); + rv = s->ops->bind(s, (struct sockaddr_unsized *)laddr, size); if (rv < 0) return rv; @@ -1789,7 +1789,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) goto error; } } - rv = s->ops->bind(s, (struct sockaddr *)laddr, + rv = s->ops->bind(s, (struct sockaddr_unsized *)laddr, sizeof(struct sockaddr_in)); } else { struct sockaddr_in6 *laddr = &to_sockaddr_in6(id->local_addr); @@ -1813,7 +1813,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) goto error; } } - rv = s->ops->bind(s, (struct sockaddr *)laddr, + rv = s->ops->bind(s, (struct sockaddr_unsized *)laddr, sizeof(struct sockaddr_in6)); } if (rv) { diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c index f732f6614d370..6ab036e4a35fb 100644 --- a/drivers/isdn/mISDN/l1oip_core.c +++ b/drivers/isdn/mISDN/l1oip_core.c @@ -676,7 +676,7 @@ l1oip_socket_thread(void *data) hc->sin_remote.sin_port = htons((unsigned short)hc->remoteport); /* bind to incoming port */ - if (socket->ops->bind(socket, (struct sockaddr *)&hc->sin_local, + if (socket->ops->bind(socket, (struct sockaddr_unsized *)&hc->sin_local, sizeof(hc->sin_local))) { printk(KERN_ERR "%s: Failed to bind socket to port %d.\n", __func__, hc->localport); diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index b215b28cad7b7..77b900db1cac2 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -462,7 +462,7 @@ static int data_sock_getsockopt(struct socket *sock, int level, int optname, } static int -data_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) +data_sock_bind(struct socket *sock, struct sockaddr_unsized *addr, int addr_len) { struct sockaddr_mISDN *maddr = (struct sockaddr_mISDN *) addr; struct sock *sk = sock->sk; @@ -696,7 +696,7 @@ base_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } static int -base_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) +base_sock_bind(struct socket *sock, struct sockaddr_unsized *addr, int addr_len) { struct sockaddr_mISDN *maddr = (struct sockaddr_mISDN *) addr; struct sock *sk = sock->sk; diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 90737cb718928..d07e87a0974c0 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -382,8 +382,8 @@ static int pptp_rcv(struct sk_buff *skb) return NET_RX_DROP; } -static int pptp_bind(struct socket *sock, struct sockaddr *uservaddr, - int sockaddr_len) +static int pptp_bind(struct socket *sock, struct sockaddr_unsized *uservaddr, + int sockaddr_len) { struct sock *sk = sock->sk; struct sockaddr_pppox *sp = (struct sockaddr_pppox *) uservaddr; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 9a96df1a511c0..35d0bd91f6fd7 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1834,7 +1834,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, sk_set_memalloc(queue->sock->sk); if (nctrl->opts->mask & NVMF_OPT_HOST_TRADDR) { - ret = kernel_bind(queue->sock, (struct sockaddr *)&ctrl->src_addr, + ret = kernel_bind(queue->sock, (struct sockaddr_unsized *)&ctrl->src_addr, sizeof(ctrl->src_addr)); if (ret) { dev_err(nctrl->device, diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 470bf37e5a637..d543da09ef8e2 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -2055,7 +2055,7 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport) if (so_priority > 0) sock_set_priority(port->sock->sk, so_priority); - ret = kernel_bind(port->sock, (struct sockaddr *)&port->addr, + ret = kernel_bind(port->sock, (struct sockaddr_unsized *)&port->addr, sizeof(port->addr)); if (ret) { pr_err("failed to bind port socket %d\n", ret); diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index c2ac9a99ebbb2..53aca059dc16d 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -822,7 +822,7 @@ int iscsit_setup_np( sock_set_reuseaddr(sock->sk); ip_sock_set_freebind(sock->sk); - ret = kernel_bind(sock, (struct sockaddr *)&np->np_sockaddr, len); + ret = kernel_bind(sock, (struct sockaddr_unsized *)&np->np_sockaddr, len); if (ret < 0) { pr_err("kernel_bind() failed: %d\n", ret); goto fail; diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index fd7ed65e0197d..da1b516b9cfdd 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -650,7 +650,7 @@ static int pvcalls_back_bind(struct xenbus_device *dev, if (ret < 0) goto out; - ret = inet_bind(map->sock, (struct sockaddr *)&req->u.bind.addr, + ret = inet_bind(map->sock, (struct sockaddr_unsized *)&req->u.bind.addr, req->u.bind.len); if (ret < 0) goto out; diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index c1cadf8fb346a..bf0e4ea0aafdf 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -82,16 +82,16 @@ int afs_open_socket(struct afs_net *net) if (ret < 0) pr_err("Couldn't create RxGK CM key: %d\n", ret); - ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); + ret = kernel_bind(socket, (struct sockaddr_unsized *) &srx, sizeof(srx)); if (ret == -EADDRINUSE) { srx.transport.sin6.sin6_port = 0; - ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); + ret = kernel_bind(socket, (struct sockaddr_unsized *) &srx, sizeof(srx)); } if (ret < 0) goto error_2; srx.srx_service = YFS_CM_SERVICE; - ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); + ret = kernel_bind(socket, (struct sockaddr_unsized *) &srx, sizeof(srx)); if (ret < 0) goto error_2; diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 9a0b6c2b6b01e..0500421b6e3bd 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1134,7 +1134,7 @@ static int sctp_bind_addrs(struct socket *sock, __be16 port) make_sockaddr(&localaddr, port, &addr_len); if (!i) - result = kernel_bind(sock, addr, addr_len); + result = kernel_bind(sock, (struct sockaddr_unsized *)addr, addr_len); else result = sock_bind_add(sock->sk, addr, addr_len); @@ -1813,7 +1813,7 @@ static int dlm_tcp_bind(struct socket *sock) memcpy(&src_addr, &dlm_local_addr[0], sizeof(src_addr)); make_sockaddr(&src_addr, 0, &addr_len); - result = kernel_bind(sock, (struct sockaddr *)&src_addr, + result = kernel_bind(sock, (struct sockaddr_unsized *)&src_addr, addr_len); if (result < 0) { /* This *may* not indicate a critical error */ @@ -1852,7 +1852,7 @@ static int dlm_tcp_listen_bind(struct socket *sock) /* Bind to our port */ make_sockaddr(&dlm_local_addr[0], dlm_config.ci_tcp_port, &addr_len); - return kernel_bind(sock, (struct sockaddr *)&dlm_local_addr[0], + return kernel_bind(sock, (struct sockaddr_unsized *)&dlm_local_addr[0], addr_len); } diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index b05d4e9d13b28..c7734193d8d7b 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1615,7 +1615,7 @@ static void o2net_start_connect(struct work_struct *work) myaddr.sin_addr.s_addr = mynode->nd_ipv4_address; myaddr.sin_port = htons(0); /* any port */ - ret = sock->ops->bind(sock, (struct sockaddr *)&myaddr, + ret = sock->ops->bind(sock, (struct sockaddr_unsized *)&myaddr, sizeof(myaddr)); if (ret) { mlog(ML_ERROR, "bind failed with %d at address %pI4\n", @@ -2002,7 +2002,7 @@ static int o2net_open_listening_sock(__be32 addr, __be16 port) INIT_WORK(&o2net_listen_work, o2net_accept_many); sock->sk->sk_reuse = SK_CAN_REUSE; - ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin)); + ret = sock->ops->bind(sock, (struct sockaddr_unsized *)&sin, sizeof(sin)); if (ret < 0) { printk(KERN_ERR "o2net: Error %d while binding socket at " "%pI4:%u\n", ret, &addr, ntohs(port)); diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index dd12f3eb61dcb..96d9722630203 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -3112,7 +3112,7 @@ bind_socket(struct TCP_Server_Info *server) struct socket *socket = server->ssocket; rc = kernel_bind(socket, - (struct sockaddr *) &server->srcaddr, + (struct sockaddr_unsized *) &server->srcaddr, sizeof(server->srcaddr)); if (rc < 0) { struct sockaddr_in *saddr4; diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index 7a1e3dcc2cdee..bf694bc78c655 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -519,10 +519,10 @@ static int create_socket(struct interface *iface) } if (ipv4) - ret = kernel_bind(ksmbd_socket, (struct sockaddr *)&sin, + ret = kernel_bind(ksmbd_socket, (struct sockaddr_unsized *)&sin, sizeof(sin)); else - ret = kernel_bind(ksmbd_socket, (struct sockaddr *)&sin6, + ret = kernel_bind(ksmbd_socket, (struct sockaddr_unsized *)&sin6, sizeof(sin6)); if (ret) { pr_err("Failed to bind socket: %d\n", ret); diff --git a/include/linux/net.h b/include/linux/net.h index ec09620f40f70..0e316f0631130 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -163,7 +163,7 @@ struct proto_ops { struct module *owner; int (*release) (struct socket *sock); int (*bind) (struct socket *sock, - struct sockaddr *myaddr, + struct sockaddr_unsized *myaddr, int sockaddr_len); int (*connect) (struct socket *sock, struct sockaddr *vaddr, @@ -345,7 +345,7 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len, int flags); -int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen); +int kernel_bind(struct socket *sock, struct sockaddr_unsized *addr, int addrlen); int kernel_listen(struct socket *sock, int backlog); int kernel_accept(struct socket *sock, struct socket **newsock, int flags); int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, diff --git a/include/net/inet_common.h b/include/net/inet_common.h index c17a6585d0b0b..1666cf6f539e1 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -42,7 +42,7 @@ int inet_shutdown(struct socket *sock, int how); int inet_listen(struct socket *sock, int backlog); int __inet_listen_sk(struct sock *sk, int backlog); void inet_sock_destruct(struct sock *sk); -int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); +int inet_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len); int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len); /* Don't allocate port at this moment, defer to connect. */ #define BIND_FORCE_ADDRESS_NO_PORT (1 << 0) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 2ccdf85f34f16..2188bad9a687d 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1208,7 +1208,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu); void inet6_cleanup_sock(struct sock *sk); void inet6_sock_destruct(struct sock *sk); int inet6_release(struct socket *sock); -int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); +int inet6_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len); int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len); int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int peer); diff --git a/include/net/sock.h b/include/net/sock.h index c7e58b8e8a907..acbb78c96d695 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1920,7 +1920,7 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg, * Functions to fill in entries in struct proto_ops when a protocol * does not implement a particular function. */ -int sock_no_bind(struct socket *, struct sockaddr *, int); +int sock_no_bind(struct socket *sock, struct sockaddr_unsized *saddr, int len); int sock_no_connect(struct socket *, struct sockaddr *, int, int); int sock_no_socketpair(struct socket *, struct socket *); int sock_no_accept(struct socket *, struct socket *, struct proto_accept_arg *); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index a516745f732f7..ef517bb307e27 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -966,7 +966,7 @@ static int p9_bind_privport(struct socket *sock) ((struct sockaddr_in *)&stor)->sin_port = htons((ushort)port); else ((struct sockaddr_in6 *)&stor)->sin6_port = htons((ushort)port); - err = kernel_bind(sock, (struct sockaddr *)&stor, sizeof(stor)); + err = kernel_bind(sock, (struct sockaddr_unsized *)&stor, sizeof(stor)); if (err != -EADDRINUSE) break; } diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 30242fe103419..45db43cde67f1 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1149,7 +1149,7 @@ static int atalk_autobind(struct sock *sk) } /* Set the address 'our end' of the connection */ -static int atalk_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +static int atalk_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) { struct sockaddr_at *addr = (struct sockaddr_at *)uaddr; struct sock *sk = sock->sk; diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 66d9a9bd58967..62fdf07c53dea 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -24,7 +24,7 @@ static int pvc_shutdown(struct socket *sock, int how) return 0; } -static int pvc_bind(struct socket *sock, struct sockaddr *sockaddr, +static int pvc_bind(struct socket *sock, struct sockaddr_unsized *sockaddr, int sockaddr_len) { struct sock *sk = sock->sk; @@ -59,7 +59,7 @@ static int pvc_bind(struct socket *sock, struct sockaddr *sockaddr, static int pvc_connect(struct socket *sock, struct sockaddr *sockaddr, int sockaddr_len, int flags) { - return pvc_bind(sock, sockaddr, sockaddr_len); + return pvc_bind(sock, (struct sockaddr_unsized *)sockaddr, sockaddr_len); } static int pvc_setsockopt(struct socket *sock, int level, int optname, diff --git a/net/atm/svc.c b/net/atm/svc.c index f8137ae693b08..1906a493c8aa0 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -97,7 +97,7 @@ static int svc_release(struct socket *sock) return 0; } -static int svc_bind(struct socket *sock, struct sockaddr *sockaddr, +static int svc_bind(struct socket *sock, struct sockaddr_unsized *sockaddr, int sockaddr_len) { DEFINE_WAIT(wait); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 6ef8b2a57a9bf..23c558ff96822 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1094,7 +1094,7 @@ static int ax25_release(struct socket *sock) * that we've implemented support for SO_BINDTODEVICE. It is however small * and trivially backward compatible. */ -static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +static int ax25_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) { struct sock *sk = sock->sk; struct full_sockaddr_ax25 *addr = (struct full_sockaddr_ax25 *)uaddr; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index fc866759910d9..ba9f48771e11d 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1185,7 +1185,7 @@ static int hci_sock_compat_ioctl(struct socket *sock, unsigned int cmd, } #endif -static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, +static int hci_sock_bind(struct socket *sock, struct sockaddr_unsized *addr, int addr_len) { struct sockaddr_hci haddr; diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 3d98cb6291da6..6a7e1b4a8701b 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -944,7 +944,7 @@ static int iso_sock_create(struct net *net, struct socket *sock, int protocol, return 0; } -static int iso_sock_bind_bc(struct socket *sock, struct sockaddr *addr, +static int iso_sock_bind_bc(struct socket *sock, struct sockaddr_unsized *addr, int addr_len) { struct sockaddr_iso *sa = (struct sockaddr_iso *)addr; @@ -1022,7 +1022,7 @@ static int iso_sock_bind_pa_sk(struct sock *sk, struct sockaddr_iso *sa, return err; } -static int iso_sock_bind(struct socket *sock, struct sockaddr *addr, +static int iso_sock_bind(struct socket *sock, struct sockaddr_unsized *addr, int addr_len) { struct sockaddr_iso *sa = (struct sockaddr_iso *)addr; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 814fb8610ac43..ca7394d8fa4e2 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -80,7 +80,7 @@ static int l2cap_validate_le_psm(u16 psm) return 0; } -static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) +static int l2cap_sock_bind(struct socket *sock, struct sockaddr_unsized *addr, int alen) { struct sock *sk = sock->sk; struct l2cap_chan *chan = l2cap_pi(sk)->chan; diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 96250807b32b4..d62fd6c576175 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -781,7 +781,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, addr.l2_psm = 0; addr.l2_cid = 0; addr.l2_bdaddr_type = BDADDR_BREDR; - *err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); + *err = kernel_bind(sock, (struct sockaddr_unsized *)&addr, sizeof(addr)); if (*err < 0) goto failed; @@ -2068,7 +2068,7 @@ static int rfcomm_add_listener(bdaddr_t *ba) addr.l2_psm = cpu_to_le16(L2CAP_PSM_RFCOMM); addr.l2_cid = 0; addr.l2_bdaddr_type = BDADDR_BREDR; - err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); + err = kernel_bind(sock, (struct sockaddr_unsized *)&addr, sizeof(addr)); if (err < 0) { BT_ERR("Bind failed %d", err); goto failed; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 913402806fa0d..8c8762bbc6de3 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -324,7 +324,7 @@ static int rfcomm_sock_create(struct net *net, struct socket *sock, return 0; } -static int rfcomm_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) +static int rfcomm_sock_bind(struct socket *sock, struct sockaddr_unsized *addr, int addr_len) { struct sockaddr_rc sa; struct sock *sk = sock->sk; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index ab0cf442d57b9..01d878205e582 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -605,7 +605,7 @@ static int sco_sock_create(struct net *net, struct socket *sock, int protocol, return 0; } -static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, +static int sco_sock_bind(struct socket *sock, struct sockaddr_unsized *addr, int addr_len) { struct sockaddr_sco *sa = (struct sockaddr_sco *) addr; diff --git a/net/can/isotp.c b/net/can/isotp.c index 74ee1e52249b2..ce588b85665a0 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1246,7 +1246,7 @@ static int isotp_release(struct socket *sock) return 0; } -static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) +static int isotp_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int len) { struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct sock *sk = sock->sk; diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 88e7160d42489..a2abedc757d0f 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -440,7 +440,7 @@ static int j1939_sk_sanity_check(struct sockaddr_can *addr, int len) return 0; } -static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len) +static int j1939_sk_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int len) { struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct j1939_sock *jsk = j1939_sk(sock->sk); diff --git a/net/can/raw.c b/net/can/raw.c index a53853f5e9afc..f36a83d3447cf 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -449,7 +449,7 @@ static int raw_release(struct socket *sock) return 0; } -static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) +static int raw_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int len) { struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct sock *sk = sock->sk; diff --git a/net/core/sock.c b/net/core/sock.c index 7a9bbc2afcf08..1e1ce18bba161 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3462,7 +3462,7 @@ EXPORT_SYMBOL_GPL(sk_set_peek_off); * function, some default processing is provided. */ -int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len) +int sock_no_bind(struct socket *sock, struct sockaddr_unsized *saddr, int len) { return -EOPNOTSUPP; } diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index 18d267921bb53..99ddfad9bb88f 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -96,13 +96,13 @@ static int ieee802154_sock_sendmsg(struct socket *sock, struct msghdr *msg, return sk->sk_prot->sendmsg(sk, msg, len); } -static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr, +static int ieee802154_sock_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) { struct sock *sk = sock->sk; if (sk->sk_prot->bind) - return sk->sk_prot->bind(sk, uaddr, addr_len); + return sk->sk_prot->bind(sk, (struct sockaddr *)uaddr, addr_len); return sock_no_bind(sock, uaddr, addr_len); } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 0784e2a873a15..aa43d16e48ff1 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -464,9 +464,9 @@ int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) return __inet_bind(sk, uaddr, addr_len, flags); } -int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +int inet_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) { - return inet_bind_sk(sock->sk, uaddr, addr_len); + return inet_bind_sk(sock->sk, (struct sockaddr *)uaddr, addr_len); } EXPORT_SYMBOL(inet_bind); diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index 54386e06a8136..11e5a88c923de 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -29,7 +29,7 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, udp_addr.sin_family = AF_INET; udp_addr.sin_addr = cfg->local_ip; udp_addr.sin_port = cfg->local_udp_port; - err = kernel_bind(sock, (struct sockaddr *)&udp_addr, + err = kernel_bind(sock, (struct sockaddr_unsized *)&udp_addr, sizeof(udp_addr)); if (err < 0) goto error; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 44d7de1eec4f7..c92d27e35fbcc 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -465,9 +465,9 @@ int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) } /* bind for INET6 API */ -int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +int inet6_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) { - return inet6_bind_sk(sock->sk, uaddr, addr_len); + return inet6_bind_sk(sock->sk, (struct sockaddr *)uaddr, addr_len); } EXPORT_SYMBOL(inet6_bind); diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index 0ff547a4bff71..b0d9286b33c88 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -40,7 +40,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6, sizeof(udp6_addr.sin6_addr)); udp6_addr.sin6_port = cfg->local_udp_port; - err = kernel_bind(sock, (struct sockaddr *)&udp6_addr, + err = kernel_bind(sock, (struct sockaddr_unsized *)&udp6_addr, sizeof(udp6_addr)); if (err < 0) goto error; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 4ddfc633d30cf..3941e32cda690 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -563,7 +563,7 @@ static void __iucv_auto_name(struct iucv_sock *iucv) } /* Bind an unbound socket */ -static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, +static int iucv_sock_bind(struct socket *sock, struct sockaddr_unsized *addr, int addr_len) { DECLARE_SOCKADDR(struct sockaddr_iucv *, sa, addr); diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 369a2f2e459cd..4b5e372a5cd43 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1503,7 +1503,7 @@ static int l2tp_tunnel_sock_create(struct net *net, memcpy(&ip6_addr.l2tp_addr, cfg->local_ip6, sizeof(ip6_addr.l2tp_addr)); ip6_addr.l2tp_conn_id = tunnel_id; - err = kernel_bind(sock, (struct sockaddr *)&ip6_addr, + err = kernel_bind(sock, (struct sockaddr_unsized *)&ip6_addr, sizeof(ip6_addr)); if (err < 0) goto out; @@ -1530,7 +1530,7 @@ static int l2tp_tunnel_sock_create(struct net *net, ip_addr.l2tp_family = AF_INET; ip_addr.l2tp_addr = cfg->local_ip; ip_addr.l2tp_conn_id = tunnel_id; - err = kernel_bind(sock, (struct sockaddr *)&ip_addr, + err = kernel_bind(sock, (struct sockaddr_unsized *)&ip_addr, sizeof(ip_addr)); if (err < 0) goto out; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 5958a80fe14cf..e5bb0c0d708cc 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -337,7 +337,7 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) * otherwise all hell will break loose. * Returns: 0 upon success, negative otherwise. */ -static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) +static int llc_ui_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addrlen) { struct sockaddr_llc *addr = (struct sockaddr_llc *)uaddr; struct sock *sk = sock->sk; diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index b99ba14f39d2b..5b1ef50637b70 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -49,7 +49,7 @@ static bool mctp_sockaddr_ext_is_ok(const struct sockaddr_mctp_ext *addr) !addr->__smctp_pad0[2]; } -static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen) +static int mctp_bind(struct socket *sock, struct sockaddr_unsized *addr, int addrlen) { struct sock *sk = sock->sk; struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 69a3ccfc6310c..be9149ac79dd2 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -205,7 +205,7 @@ static void __mctp_route_test_init(struct kunit *test, addr.smctp_network = netid; addr.smctp_addr.s_addr = 8; addr.smctp_type = 0; - rc = kernel_bind(sock, (struct sockaddr *)&addr, sizeof(addr)); + rc = kernel_bind(sock, (struct sockaddr_unsized *)&addr, sizeof(addr)); KUNIT_ASSERT_EQ(test, rc, 0); *devp = dev; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index d568575cdcb59..53e2b095dfb12 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3856,7 +3856,7 @@ static struct proto mptcp_prot = { .no_autobind = true, }; -static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +static int mptcp_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) { struct mptcp_sock *msk = mptcp_sk(sock->sk); struct sock *ssk, *sk = sock->sk; @@ -3870,10 +3870,10 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } if (sk->sk_family == AF_INET) - err = inet_bind_sk(ssk, uaddr, addr_len); + err = inet_bind_sk(ssk, (struct sockaddr *)uaddr, addr_len); #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (sk->sk_family == AF_INET6) - err = inet6_bind_sk(ssk, uaddr, addr_len); + err = inet6_bind_sk(ssk, (struct sockaddr *)uaddr, addr_len); #endif if (!err) mptcp_copy_inaddrs(sk, ssk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index e8325890a3223..d90237bf433cd 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1660,7 +1660,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local, addrlen = sizeof(struct sockaddr_in6); #endif ssk->sk_bound_dev_if = local->ifindex; - err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen); + err = kernel_bind(sf, (struct sockaddr_unsized *)&addr, addrlen); if (err) { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTXBINDERR); pr_debug("msk=%p local=%d remote=%d bind error: %d\n", diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 3402675bf5215..d8c089ef387cf 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1435,7 +1435,7 @@ static int bind_mcastif_addr(struct socket *sock, struct net_device *dev) sin.sin_addr.s_addr = addr; sin.sin_port = 0; - return kernel_bind(sock, (struct sockaddr *)&sin, sizeof(sin)); + return kernel_bind(sock, (struct sockaddr_unsized *)&sin, sizeof(sin)); } static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen, @@ -1542,7 +1542,7 @@ static int make_receive_sock(struct netns_ipvs *ipvs, int id, get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id); sock->sk->sk_bound_dev_if = dev->ifindex; - result = kernel_bind(sock, (struct sockaddr *)&mcast_addr, salen); + result = kernel_bind(sock, (struct sockaddr_unsized *)&mcast_addr, salen); if (result < 0) { pr_err("Error binding to the multicast addr\n"); goto error; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 687a84c48882a..18490a56edd02 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -966,7 +966,7 @@ static void netlink_undo_bind(int group, long unsigned int groups, nlk->netlink_unbind(sock_net(sk), undo + 1); } -static int netlink_bind(struct socket *sock, struct sockaddr *addr, +static int netlink_bind(struct socket *sock, struct sockaddr_unsized *addr, int addr_len) { struct sock *sk = sock->sk; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 3331669d8e33a..33468124d53db 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -561,7 +561,7 @@ static int nr_release(struct socket *sock) return 0; } -static int nr_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +static int nr_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) { struct sock *sk = sock->sk; struct nr_sock *nr = nr_sk(sk); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 57a2f97004e17..26e6ceb48a82e 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -56,7 +56,7 @@ static struct proto llcp_sock_proto = { .obj_size = sizeof(struct nfc_llcp_sock), }; -static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) +static int llcp_sock_bind(struct socket *sock, struct sockaddr_unsized *addr, int alen) { struct sock *sk = sock->sk; struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); @@ -146,7 +146,7 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) return ret; } -static int llcp_raw_sock_bind(struct socket *sock, struct sockaddr *addr, +static int llcp_raw_sock_bind(struct socket *sock, struct sockaddr_unsized *addr, int alen) { struct sock *sk = sock->sk; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 173e6edda08f8..fccad2a529cc1 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3279,11 +3279,12 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, * Bind a packet socket to a device */ -static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, +static int packet_bind_spkt(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) { struct sock *sk = sock->sk; - char name[sizeof(uaddr->sa_data_min) + 1]; + struct sockaddr *sa = (struct sockaddr *)uaddr; + char name[sizeof(sa->sa_data_min) + 1]; /* * Check legality @@ -3294,13 +3295,13 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, /* uaddr->sa_data comes from the userspace, it's not guaranteed to be * zero-terminated. */ - memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data_min)); - name[sizeof(uaddr->sa_data_min)] = 0; + memcpy(name, sa->sa_data, sizeof(sa->sa_data_min)); + name[sizeof(sa->sa_data_min)] = 0; return packet_do_bind(sk, name, 0, 0); } -static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +static int packet_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) { struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr; struct sock *sk = sock->sk; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index db2d552e9b32e..478b026477335 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -153,7 +153,7 @@ EXPORT_SYMBOL(pn_sock_unhash); static DEFINE_MUTEX(port_mutex); -static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len) +static int pn_socket_bind(struct socket *sock, struct sockaddr_unsized *addr, int len) { struct sock *sk = sock->sk; struct pn_sock *pn = pn_sk(sk); @@ -163,7 +163,7 @@ static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len) u8 saddr; if (sk->sk_prot->bind) - return sk->sk_prot->bind(sk, addr, len); + return sk->sk_prot->bind(sk, (struct sockaddr *)addr, len); if (len < sizeof(struct sockaddr_pn)) return -EINVAL; @@ -206,8 +206,8 @@ static int pn_socket_autobind(struct socket *sock) memset(&sa, 0, sizeof(sa)); sa.spn_family = AF_PHONET; - err = pn_socket_bind(sock, (struct sockaddr *)&sa, - sizeof(struct sockaddr_pn)); + err = pn_socket_bind(sock, (struct sockaddr_unsized *)&sa, + sizeof(struct sockaddr_pn)); if (err != -EINVAL) return err; BUG_ON(!pn_port(pn_sk(sock->sk)->sobject)); diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c index 00c51cf693f3d..00bd3dd9f0f95 100644 --- a/net/qrtr/af_qrtr.c +++ b/net/qrtr/af_qrtr.c @@ -824,7 +824,7 @@ static int qrtr_autobind(struct socket *sock) } /* Bind socket to specified sockaddr. */ -static int qrtr_bind(struct socket *sock, struct sockaddr *saddr, int len) +static int qrtr_bind(struct socket *sock, struct sockaddr_unsized *saddr, int len) { DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr); struct qrtr_sock *ipc = qrtr_sk(sock->sk); diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index 3de9350cbf307..bfcc1a453f238 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -714,7 +714,7 @@ int qrtr_ns_init(void) sq.sq_port = QRTR_PORT_CTRL; qrtr_ns.local_node = sq.sq_node; - ret = kernel_bind(qrtr_ns.sock, (struct sockaddr *)&sq, sizeof(sq)); + ret = kernel_bind(qrtr_ns.sock, (struct sockaddr_unsized *)&sq, sizeof(sq)); if (ret < 0) { pr_err("failed to bind to socket\n"); goto err_wq; diff --git a/net/rds/bind.c b/net/rds/bind.c index 97a29172a8eec..f800d920d9690 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -160,7 +160,7 @@ void rds_remove_bound(struct rds_sock *rs) rs->rs_bound_addr = in6addr_any; } -int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +int rds_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) { struct sock *sk = sock->sk; struct rds_sock *rs = rds_sk_to_rs(sk); diff --git a/net/rds/rds.h b/net/rds/rds.h index 5b1c072e2e7ff..a029e5fcdea72 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -735,7 +735,7 @@ extern wait_queue_head_t rds_poll_waitq; /* bind.c */ -int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); +int rds_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len); void rds_remove_bound(struct rds_sock *rs); struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port, __u32 scope_id); diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index a0046e99d6df7..1eff3b03ab774 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -145,7 +145,7 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp) addrlen = sizeof(sin); } - ret = kernel_bind(sock, addr, addrlen); + ret = kernel_bind(sock, (struct sockaddr_unsized *)addr, addrlen); if (ret) { rdsdebug("bind failed with %d at address %pI6c\n", ret, &conn->c_laddr); diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 91e34af3fe5d5..820d3e20de195 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -290,7 +290,7 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6) addr_len = sizeof(*sin); } - ret = kernel_bind(sock, (struct sockaddr *)&ss, addr_len); + ret = kernel_bind(sock, (struct sockaddr_unsized *)&ss, addr_len); if (ret < 0) { rdsdebug("could not bind %s listener socket: %d\n", isv6 ? "IPv6" : "IPv4", ret); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 543f9e8ebb693..47369eab5aecb 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -693,7 +693,7 @@ static int rose_release(struct socket *sock) return 0; } -static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +static int rose_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 36df0274d7b74..245f37a743943 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -127,7 +127,7 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx, /* * bind a local address to an RxRPC socket */ -static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) +static int rxrpc_bind(struct socket *sock, struct sockaddr_unsized *saddr, int len) { struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr; struct rxrpc_local *local; diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c index 2ea71e3831f75..98ea76fae70f3 100644 --- a/net/rxrpc/rxperf.c +++ b/net/rxrpc/rxperf.c @@ -211,7 +211,7 @@ static int rxperf_open_socket(void) ret = rxrpc_sock_set_security_keyring(socket->sk, rxperf_sec_keyring); - ret = kernel_bind(socket, (struct sockaddr *)&srx, sizeof(srx)); + ret = kernel_bind(socket, (struct sockaddr_unsized *)&srx, sizeof(srx)); if (ret < 0) goto error_2; diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index e9d0e62e0b1bc..be18ab08f15d6 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -421,7 +421,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, return sk; } -int smc_bind(struct socket *sock, struct sockaddr *uaddr, +int smc_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) { struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; diff --git a/net/smc/smc.h b/net/smc/smc.h index 2c90849637398..a008dbe6d6f63 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -42,7 +42,7 @@ void smc_unhash_sk(struct sock *sk); void smc_release_cb(struct sock *sk); int smc_release(struct socket *sock); -int smc_bind(struct socket *sock, struct sockaddr *uaddr, +int smc_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len); int smc_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags); diff --git a/net/socket.c b/net/socket.c index e8892b2187087..aaefb2e519a74 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1872,7 +1872,7 @@ int __sys_bind_socket(struct socket *sock, struct sockaddr_storage *address, addrlen); if (!err) err = READ_ONCE(sock->ops)->bind(sock, - (struct sockaddr *)address, + (struct sockaddr_unsized *)address, addrlen); return err; } @@ -3583,13 +3583,13 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd, * Returns 0 or an error. */ -int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) +int kernel_bind(struct socket *sock, struct sockaddr_unsized *addr, int addrlen) { struct sockaddr_storage address; memcpy(&address, addr, addrlen); - return READ_ONCE(sock->ops)->bind(sock, (struct sockaddr *)&address, + return READ_ONCE(sock->ops)->bind(sock, (struct sockaddr_unsized *)&address, addrlen); } EXPORT_SYMBOL(kernel_bind); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8ca354ecfd02a..318ee24ad900a 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1457,12 +1457,12 @@ static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen, switch (sap->sa_family) { case AF_INET: err = kernel_bind(sock, - (struct sockaddr *)&rpc_inaddr_loopback, + (struct sockaddr_unsized *)&rpc_inaddr_loopback, sizeof(rpc_inaddr_loopback)); break; case AF_INET6: err = kernel_bind(sock, - (struct sockaddr *)&rpc_in6addr_loopback, + (struct sockaddr_unsized *)&rpc_in6addr_loopback, sizeof(rpc_in6addr_loopback)); break; default: diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 7b90abc5cf0ee..16ff6c100821a 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1557,7 +1557,7 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, ip6_sock_set_v6only(sock->sk); if (type == SOCK_STREAM) sock->sk->sk_reuse = SK_CAN_REUSE; /* allow address reuse */ - error = kernel_bind(sock, sin, len); + error = kernel_bind(sock, (struct sockaddr_unsized *)sin, len); if (error < 0) goto bummer; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 3aa987e7f0724..95732a45b059b 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1845,8 +1845,8 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen); do { rpc_set_port((struct sockaddr *)&myaddr, port); - err = kernel_bind(sock, (struct sockaddr *)&myaddr, - transport->xprt.addrlen); + err = kernel_bind(sock, (struct sockaddr_unsized *)&myaddr, + transport->xprt.addrlen); if (err == 0) { if (transport->xprt.reuseport) transport->srcport = port; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index bc614a1f019c2..3903a97ada7d4 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -710,7 +710,7 @@ int tipc_sk_bind(struct socket *sock, struct sockaddr *skaddr, int alen) return res; } -static int tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen) +static int tipc_bind(struct socket *sock, struct sockaddr_unsized *skaddr, int alen) { struct tipc_uaddr *ua = (struct tipc_uaddr *)skaddr; u32 atype = ua->addrtype; @@ -726,7 +726,7 @@ static int tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen) return -EACCES; } } - return tipc_sk_bind(sock, skaddr, alen); + return tipc_sk_bind(sock, (struct sockaddr *)skaddr, alen); } /** diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 54177caa9c12c..788775f0eea72 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -843,7 +843,7 @@ static int unix_listen(struct socket *sock, int backlog) } static int unix_release(struct socket *); -static int unix_bind(struct socket *, struct sockaddr *, int); +static int unix_bind(struct socket *, struct sockaddr_unsized *, int); static int unix_stream_connect(struct socket *, struct sockaddr *, int addr_len, int flags); static int unix_socketpair(struct socket *, struct socket *); @@ -1466,7 +1466,7 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, return err; } -static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +static int unix_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) { struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; struct sock *sk = sock->sk; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 76763247a377a..0e5609e7284bb 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -987,7 +987,7 @@ static int vsock_release(struct socket *sock) } static int -vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) +vsock_bind(struct socket *sock, struct sockaddr_unsized *addr, int addr_len) { int err; struct sock *sk; @@ -995,7 +995,7 @@ vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) sk = sock->sk; - if (vsock_addr_cast(addr, addr_len, &vm_addr) != 0) + if (vsock_addr_cast((struct sockaddr *)addr, addr_len, &vm_addr) != 0) return -EINVAL; lock_sock(sk); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 655d1e0ae25f7..ca8006d8f7929 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -670,7 +670,7 @@ static int x25_release(struct socket *sock) return 0; } -static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +static int x25_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) { struct sock *sk = sock->sk; struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 2f26c918d4484..ed8b612ec29d0 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1238,7 +1238,7 @@ static bool xsk_validate_queues(struct xdp_sock *xs) return xs->fq_tmp && xs->cq_tmp; } -static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) +static int xsk_bind(struct socket *sock, struct sockaddr_unsized *addr, int addr_len) { struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr; struct sock *sk = sock->sk; diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c index 8074bc5f6f200..0497b5dea25c0 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c @@ -923,7 +923,7 @@ __bpf_kfunc int bpf_kfunc_call_kernel_bind(struct addr_args *args) goto out; } - err = kernel_bind(sock, (struct sockaddr *)&args->addr, args->addrlen); + err = kernel_bind(sock, (struct sockaddr_unsized *)&args->addr, args->addrlen); out: mutex_unlock(&sock_lock); From 85cb0757d7e1f9370a8b52a8b8144c37941cba0a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 3 Nov 2025 16:26:11 -0800 Subject: [PATCH 574/867] net: Convert proto_ops connect() callbacks to use sockaddr_unsized Update all struct proto_ops connect() callback function prototypes from "struct sockaddr *" to "struct sockaddr_unsized *" to avoid lying to the compiler about object sizes. Calls into struct proto handlers gain casts that will be removed in the struct proto conversion patch. No binary changes expected. Signed-off-by: Kees Cook Link: https://patch.msgid.link/20251104002617.2752303-3-kees@kernel.org Signed-off-by: Jakub Kicinski --- drivers/block/drbd/drbd_receiver.c | 2 +- drivers/infiniband/hw/erdma/erdma_cm.c | 2 +- drivers/infiniband/sw/siw/siw_cm.c | 2 +- drivers/net/ppp/pppoe.c | 4 ++-- drivers/net/ppp/pptp.c | 4 ++-- drivers/net/wireless/ath/ath10k/qmi.c | 2 +- drivers/net/wireless/ath/ath11k/qmi.c | 2 +- drivers/net/wireless/ath/ath12k/qmi.c | 2 +- drivers/nvme/host/tcp.c | 2 +- drivers/slimbus/qcom-ngd-ctrl.c | 2 +- drivers/xen/pvcalls-back.c | 2 +- fs/coredump.c | 2 +- fs/dlm/lowcomms.c | 2 +- fs/ocfs2/cluster/tcp.c | 2 +- fs/smb/client/connect.c | 2 +- include/linux/bpf-cgroup.h | 6 +++--- include/linux/net.h | 4 ++-- include/net/inet_common.h | 6 +++--- include/net/sctp/sctp.h | 2 +- include/net/sock.h | 2 +- include/net/vsock_addr.h | 2 +- net/9p/trans_fd.c | 6 +++--- net/appletalk/ddp.c | 2 +- net/atm/pvc.c | 4 ++-- net/atm/svc.c | 2 +- net/ax25/af_ax25.c | 2 +- net/bluetooth/iso.c | 2 +- net/bluetooth/l2cap_sock.c | 2 +- net/bluetooth/rfcomm/core.c | 2 +- net/bluetooth/rfcomm/sock.c | 3 ++- net/bluetooth/sco.c | 2 +- net/caif/caif_socket.c | 2 +- net/can/bcm.c | 2 +- net/can/j1939/socket.c | 2 +- net/ceph/messenger.c | 2 +- net/core/sock.c | 2 +- net/ieee802154/socket.c | 4 ++-- net/ipv4/af_inet.c | 14 +++++++------- net/ipv4/tcp.c | 2 +- net/ipv4/udp_tunnel_core.c | 2 +- net/ipv6/ip6_udp_tunnel.c | 2 +- net/iucv/af_iucv.c | 4 ++-- net/l2tp/l2tp_core.c | 4 ++-- net/l2tp/l2tp_ppp.c | 2 +- net/llc/af_llc.c | 2 +- net/mctp/af_mctp.c | 2 +- net/mctp/test/utils.c | 5 +++-- net/mptcp/subflow.c | 2 +- net/netfilter/ipvs/ip_vs_sync.c | 2 +- net/netlink/af_netlink.c | 2 +- net/netrom/af_netrom.c | 4 ++-- net/nfc/llcp_sock.c | 2 +- net/nfc/rawsock.c | 2 +- net/phonet/socket.c | 6 +++--- net/qrtr/af_qrtr.c | 2 +- net/rds/af_rds.c | 2 +- net/rds/tcp_connect.c | 2 +- net/rose/af_rose.c | 3 ++- net/rxrpc/af_rxrpc.c | 2 +- net/sctp/socket.c | 4 ++-- net/smc/af_smc.c | 4 ++-- net/smc/smc.h | 2 +- net/socket.c | 8 ++++---- net/sunrpc/clnt.c | 2 +- net/sunrpc/xprtsock.c | 5 +++-- net/tipc/socket.c | 2 +- net/unix/af_unix.c | 8 ++++---- net/vmw_vsock/af_vsock.c | 6 +++--- net/vmw_vsock/vsock_addr.c | 2 +- net/x25/af_x25.c | 2 +- samples/qmi/qmi_sample_client.c | 2 +- .../testing/selftests/bpf/test_kmods/bpf_testmod.c | 2 +- 72 files changed, 110 insertions(+), 106 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index d9296f74f9025..33bc91665fe82 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -458,7 +458,7 @@ static struct socket *drbd_try_connect(struct drbd_connection *connection) * stay C_WF_CONNECTION, don't go Disconnecting! */ disconnect_on_error = 0; what = "connect"; - err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0); + err = sock->ops->connect(sock, (struct sockaddr_unsized *) &peer_in6, peer_addr_len, 0); out: if (err < 0) { diff --git a/drivers/infiniband/hw/erdma/erdma_cm.c b/drivers/infiniband/hw/erdma/erdma_cm.c index ef66a6359eb9f..ed21ba0037a4a 100644 --- a/drivers/infiniband/hw/erdma/erdma_cm.c +++ b/drivers/infiniband/hw/erdma/erdma_cm.c @@ -996,7 +996,7 @@ static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr, ret = s->ops->bind(s, (struct sockaddr_unsized *)laddr, laddrlen); if (ret) return ret; - ret = s->ops->connect(s, raddr, raddrlen, flags); + ret = s->ops->connect(s, (struct sockaddr_unsized *)raddr, raddrlen, flags); return ret < 0 ? ret : 0; } diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 7fe118cacb3ff..eb0bd4f79a855 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -1344,7 +1344,7 @@ static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr, if (rv < 0) return rv; - rv = s->ops->connect(s, raddr, size, flags); + rv = s->ops->connect(s, (struct sockaddr_unsized *)raddr, size, flags); return rv < 0 ? rv : 0; } diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 4ac6afce267b9..4275b393a4544 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -608,8 +608,8 @@ static int pppoe_release(struct socket *sock) return 0; } -static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, - int sockaddr_len, int flags) +static int pppoe_connect(struct socket *sock, struct sockaddr_unsized *uservaddr, + int sockaddr_len, int flags) { struct sock *sk = sock->sk; struct sockaddr_pppox *sp = (struct sockaddr_pppox *)uservaddr; diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index d07e87a0974c0..b18acd8105619 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -415,8 +415,8 @@ static int pptp_bind(struct socket *sock, struct sockaddr_unsized *uservaddr, return error; } -static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr, - int sockaddr_len, int flags) +static int pptp_connect(struct socket *sock, struct sockaddr_unsized *uservaddr, + int sockaddr_len, int flags) { struct sock *sk = sock->sk; struct sockaddr_pppox *sp = (struct sockaddr_pppox *) uservaddr; diff --git a/drivers/net/wireless/ath/ath10k/qmi.c b/drivers/net/wireless/ath/ath10k/qmi.c index f1f33af0170a0..8275345631a0b 100644 --- a/drivers/net/wireless/ath/ath10k/qmi.c +++ b/drivers/net/wireless/ath/ath10k/qmi.c @@ -986,7 +986,7 @@ static int ath10k_qmi_new_server(struct qmi_handle *qmi_hdl, ath10k_dbg(ar, ATH10K_DBG_QMI, "wifi fw qmi service found\n"); - ret = kernel_connect(qmi_hdl->sock, (struct sockaddr *)&qmi->sq, + ret = kernel_connect(qmi_hdl->sock, (struct sockaddr_unsized *)&qmi->sq, sizeof(qmi->sq), 0); if (ret) { ath10k_err(ar, "failed to connect to a remote QMI service port\n"); diff --git a/drivers/net/wireless/ath/ath11k/qmi.c b/drivers/net/wireless/ath/ath11k/qmi.c index aea56c38bf8f3..ff6a97e328b88 100644 --- a/drivers/net/wireless/ath/ath11k/qmi.c +++ b/drivers/net/wireless/ath/ath11k/qmi.c @@ -3177,7 +3177,7 @@ static int ath11k_qmi_ops_new_server(struct qmi_handle *qmi_hdl, sq->sq_node = service->node; sq->sq_port = service->port; - ret = kernel_connect(qmi_hdl->sock, (struct sockaddr *)sq, + ret = kernel_connect(qmi_hdl->sock, (struct sockaddr_unsized *)sq, sizeof(*sq), 0); if (ret) { ath11k_warn(ab, "failed to connect to qmi remote service: %d\n", ret); diff --git a/drivers/net/wireless/ath/ath12k/qmi.c b/drivers/net/wireless/ath/ath12k/qmi.c index 36325e62aa242..cf9c25df3ffd0 100644 --- a/drivers/net/wireless/ath/ath12k/qmi.c +++ b/drivers/net/wireless/ath/ath12k/qmi.c @@ -3740,7 +3740,7 @@ static int ath12k_qmi_ops_new_server(struct qmi_handle *qmi_hdl, sq->sq_node = service->node; sq->sq_port = service->port; - ret = kernel_connect(qmi_hdl->sock, (struct sockaddr *)sq, + ret = kernel_connect(qmi_hdl->sock, (struct sockaddr_unsized *)sq, sizeof(*sq), 0); if (ret) { ath12k_warn(ab, "qmi failed to connect to remote service %d\n", ret); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 35d0bd91f6fd7..6795b8286c354 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1872,7 +1872,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, dev_dbg(nctrl->device, "connecting queue %d\n", nvme_tcp_queue_id(queue)); - ret = kernel_connect(queue->sock, (struct sockaddr *)&ctrl->addr, + ret = kernel_connect(queue->sock, (struct sockaddr_unsized *)&ctrl->addr, sizeof(ctrl->addr), 0); if (ret) { dev_err(nctrl->device, diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c index 4fb66986cc22e..fdb94dc4a7307 100644 --- a/drivers/slimbus/qcom-ngd-ctrl.c +++ b/drivers/slimbus/qcom-ngd-ctrl.c @@ -463,7 +463,7 @@ static int qcom_slim_qmi_init(struct qcom_slim_ngd_ctrl *ctrl, } rc = kernel_connect(handle->sock, - (struct sockaddr *)&ctrl->qmi.svc_info, + (struct sockaddr_unsized *)&ctrl->qmi.svc_info, sizeof(ctrl->qmi.svc_info), 0); if (rc < 0) { dev_err(ctrl->dev, "Remote Service connect failed: %d\n", rc); diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index da1b516b9cfdd..c5b6f6fa11eb7 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -409,7 +409,7 @@ static int pvcalls_back_connect(struct xenbus_device *dev, ret = sock_create(AF_INET, SOCK_STREAM, 0, &sock); if (ret < 0) goto out; - ret = inet_stream_connect(sock, sa, req->u.connect.len, 0); + ret = inet_stream_connect(sock, (struct sockaddr_unsized *)sa, req->u.connect.len, 0); if (ret < 0) { sock_release(sock); goto out; diff --git a/fs/coredump.c b/fs/coredump.c index 5c1c381ee3806..14837d9e2abbe 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -708,7 +708,7 @@ static bool coredump_sock_connect(struct core_name *cn, struct coredump_params * */ pidfs_coredump(cprm); - retval = kernel_connect(socket, (struct sockaddr *)(&addr), addr_len, + retval = kernel_connect(socket, (struct sockaddr_unsized *)(&addr), addr_len, O_NONBLOCK | SOCK_COREDUMP); if (retval) { diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 0500421b6e3bd..f832dafdaca82 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1599,7 +1599,7 @@ static int dlm_connect(struct connection *con) log_print_ratelimited("connecting to %d", con->nodeid); make_sockaddr(&addr, dlm_config.ci_tcp_port, &addr_len); - result = kernel_connect(sock, (struct sockaddr *)&addr, addr_len, 0); + result = kernel_connect(sock, (struct sockaddr_unsized *)&addr, addr_len, 0); switch (result) { case -EINPROGRESS: /* not an error */ diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index c7734193d8d7b..79b281e32f4ce 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1638,7 +1638,7 @@ static void o2net_start_connect(struct work_struct *work) remoteaddr.sin_port = node->nd_ipv4_port; ret = sc->sc_sock->ops->connect(sc->sc_sock, - (struct sockaddr *)&remoteaddr, + (struct sockaddr_unsized *)&remoteaddr, sizeof(remoteaddr), O_NONBLOCK); if (ret == -EINPROGRESS) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 96d9722630203..73120988661a9 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -3411,7 +3411,7 @@ generic_ip_connect(struct TCP_Server_Info *server) socket->sk->sk_sndbuf, socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo); - rc = kernel_connect(socket, saddr, slen, + rc = kernel_connect(socket, (struct sockaddr_unsized *)saddr, slen, server->noblockcnt ? O_NONBLOCK : 0); /* * When mounting SMB root file systems, we do not want to block in diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index aedf573bdb426..a7fb4f46974f2 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -238,7 +238,7 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, (struct sockaddr *)uaddr, uaddrlen, \ atype, NULL, NULL); \ __ret; \ }) @@ -248,7 +248,7 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, (struct sockaddr *)uaddr, uaddrlen, \ atype, t_ctx, NULL); \ release_sock(sk); \ } \ @@ -266,7 +266,7 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, (struct sockaddr *)uaddr, uaddrlen, \ atype, NULL, &__flags); \ release_sock(sk); \ if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \ diff --git a/include/linux/net.h b/include/linux/net.h index 0e316f0631130..db6bc997ca5b0 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -166,7 +166,7 @@ struct proto_ops { struct sockaddr_unsized *myaddr, int sockaddr_len); int (*connect) (struct socket *sock, - struct sockaddr *vaddr, + struct sockaddr_unsized *vaddr, int sockaddr_len, int flags); int (*socketpair)(struct socket *sock1, struct socket *sock2); @@ -348,7 +348,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, int kernel_bind(struct socket *sock, struct sockaddr_unsized *addr, int addrlen); int kernel_listen(struct socket *sock, int backlog); int kernel_accept(struct socket *sock, struct socket **newsock, int flags); -int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, +int kernel_connect(struct socket *sock, struct sockaddr_unsized *addr, int addrlen, int flags); int kernel_getsockname(struct socket *sock, struct sockaddr *addr); int kernel_getpeername(struct socket *sock, struct sockaddr *addr); diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 1666cf6f539e1..ebafd96912bb1 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -23,11 +23,11 @@ struct sockaddr; struct socket; int inet_release(struct socket *sock); -int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, +int inet_stream_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len, int flags); -int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, +int __inet_stream_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len, int flags, int is_sendmsg); -int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, +int inet_dgram_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len, int flags); int inet_accept(struct socket *sock, struct socket *newsock, struct proto_accept_arg *arg); diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index bb4b80c12541a..58242b37b47a1 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -85,7 +85,7 @@ void sctp_udp_sock_stop(struct net *net); /* * sctp/socket.c */ -int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr, +int sctp_inet_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len, int flags); int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb); int sctp_inet_listen(struct socket *sock, int backlog); diff --git a/include/net/sock.h b/include/net/sock.h index acbb78c96d695..589fbce77217c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1921,7 +1921,7 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg, * does not implement a particular function. */ int sock_no_bind(struct socket *sock, struct sockaddr_unsized *saddr, int len); -int sock_no_connect(struct socket *, struct sockaddr *, int, int); +int sock_no_connect(struct socket *sock, struct sockaddr_unsized *saddr, int len, int flags); int sock_no_socketpair(struct socket *, struct socket *); int sock_no_accept(struct socket *, struct socket *, struct proto_accept_arg *); int sock_no_getname(struct socket *, struct sockaddr *, int); diff --git a/include/net/vsock_addr.h b/include/net/vsock_addr.h index cf8cc140d68db..c3f4cc206198f 100644 --- a/include/net/vsock_addr.h +++ b/include/net/vsock_addr.h @@ -16,7 +16,7 @@ bool vsock_addr_bound(const struct sockaddr_vm *addr); void vsock_addr_unbind(struct sockaddr_vm *addr); bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, const struct sockaddr_vm *other); -int vsock_addr_cast(const struct sockaddr *addr, size_t len, +int vsock_addr_cast(const struct sockaddr_unsized *addr, size_t len, struct sockaddr_vm **out_addr); #endif diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index ef517bb307e27..49d674f5e73a8 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -1018,7 +1018,7 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) } err = READ_ONCE(csocket->ops)->connect(csocket, - (struct sockaddr *)&stor, + (struct sockaddr_unsized *)&stor, sizeof(stor), 0); if (err < 0) { pr_err("%s (%d): problem connecting socket to %s\n", @@ -1058,8 +1058,8 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) return err; } - err = READ_ONCE(csocket->ops)->connect(csocket, (struct sockaddr *)&sun_server, - sizeof(struct sockaddr_un) - 1, 0); + err = READ_ONCE(csocket->ops)->connect(csocket, (struct sockaddr_unsized *)&sun_server, + sizeof(struct sockaddr_un) - 1, 0); if (err < 0) { pr_err("%s (%d): problem connecting socket: %s: %d\n", __func__, task_pid_nr(current), addr, err); diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 45db43cde67f1..2a01fff46c9da 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1204,7 +1204,7 @@ static int atalk_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int a } /* Set the address we talk to */ -static int atalk_connect(struct socket *sock, struct sockaddr *uaddr, +static int atalk_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 62fdf07c53dea..8f5e76f5dd9e8 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -56,10 +56,10 @@ static int pvc_bind(struct socket *sock, struct sockaddr_unsized *sockaddr, return error; } -static int pvc_connect(struct socket *sock, struct sockaddr *sockaddr, +static int pvc_connect(struct socket *sock, struct sockaddr_unsized *sockaddr, int sockaddr_len, int flags) { - return pvc_bind(sock, (struct sockaddr_unsized *)sockaddr, sockaddr_len); + return pvc_bind(sock, sockaddr, sockaddr_len); } static int pvc_setsockopt(struct socket *sock, int level, int optname, diff --git a/net/atm/svc.c b/net/atm/svc.c index 1906a493c8aa0..005964250ecd2 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -153,7 +153,7 @@ static int svc_bind(struct socket *sock, struct sockaddr_unsized *sockaddr, return error; } -static int svc_connect(struct socket *sock, struct sockaddr *sockaddr, +static int svc_connect(struct socket *sock, struct sockaddr_unsized *sockaddr, int sockaddr_len, int flags) { DEFINE_WAIT(wait); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 23c558ff96822..7ebbff2f00201 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1175,7 +1175,7 @@ static int ax25_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int ad * FIXME: nonblock behaviour looks like it may have a bug. */ static int __must_check ax25_connect(struct socket *sock, - struct sockaddr *uaddr, int addr_len, int flags) + struct sockaddr_unsized *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; ax25_cb *ax25 = sk_to_ax25(sk), *ax25t; diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 6a7e1b4a8701b..243505b897331 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -1080,7 +1080,7 @@ static int iso_sock_bind(struct socket *sock, struct sockaddr_unsized *addr, return err; } -static int iso_sock_connect(struct socket *sock, struct sockaddr *addr, +static int iso_sock_connect(struct socket *sock, struct sockaddr_unsized *addr, int alen, int flags) { struct sockaddr_iso *sa = (struct sockaddr_iso *)addr; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index ca7394d8fa4e2..9ee189c815d49 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -178,7 +178,7 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr_unsized *addr, i return err; } -static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, +static int l2cap_sock_connect(struct socket *sock, struct sockaddr_unsized *addr, int alen, int flags) { struct sock *sk = sock->sk; diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index d62fd6c576175..57b1dca8141f0 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -808,7 +808,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, addr.l2_psm = cpu_to_le16(L2CAP_PSM_RFCOMM); addr.l2_cid = 0; addr.l2_bdaddr_type = BDADDR_BREDR; - *err = kernel_connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK); + *err = kernel_connect(sock, (struct sockaddr_unsized *)&addr, sizeof(addr), O_NONBLOCK); if (*err == 0 || *err == -EINPROGRESS) return s; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 8c8762bbc6de3..be6639cd6f590 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -371,7 +371,8 @@ static int rfcomm_sock_bind(struct socket *sock, struct sockaddr_unsized *addr, return err; } -static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) +static int rfcomm_sock_connect(struct socket *sock, struct sockaddr_unsized *addr, + int alen, int flags) { struct sockaddr_rc *sa = (struct sockaddr_rc *) addr; struct sock *sk = sock->sk; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 01d878205e582..7afe65e7ff371 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -639,7 +639,7 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr_unsized *addr, return err; } -static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) +static int sco_sock_connect(struct socket *sock, struct sockaddr_unsized *addr, int alen, int flags) { struct sockaddr_sco *sa = (struct sockaddr_sco *) addr; struct sock *sk = sock->sk; diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 039dfbd367c97..af218742af5ac 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -734,7 +734,7 @@ static int setsockopt(struct socket *sock, int lvl, int opt, sockptr_t ov, * o sock->state: holds the SS_* socket state and is updated by connect and * disconnect. */ -static int caif_connect(struct socket *sock, struct sockaddr *uaddr, +static int caif_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; diff --git a/net/can/bcm.c b/net/can/bcm.c index 5e690a2377e48..7eba8ae01a5b1 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1657,7 +1657,7 @@ static int bcm_release(struct socket *sock) return 0; } -static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, +static int bcm_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int len, int flags) { struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index a2abedc757d0f..6272326dd614a 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -535,7 +535,7 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr_unsized *uaddr, in return ret; } -static int j1939_sk_connect(struct socket *sock, struct sockaddr *uaddr, +static int j1939_sk_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int len, int flags) { struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index f8181acaf8704..70b25f4ecba67 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -460,7 +460,7 @@ int ceph_tcp_connect(struct ceph_connection *con) set_sock_callbacks(sock, con); con_sock_state_connecting(con); - ret = kernel_connect(sock, (struct sockaddr *)&ss, sizeof(ss), + ret = kernel_connect(sock, (struct sockaddr_unsized *)&ss, sizeof(ss), O_NONBLOCK); if (ret == -EINPROGRESS) { dout("connect %s EINPROGRESS sk_state = %u\n", diff --git a/net/core/sock.c b/net/core/sock.c index 1e1ce18bba161..f97a0e9589914 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3468,7 +3468,7 @@ int sock_no_bind(struct socket *sock, struct sockaddr_unsized *saddr, int len) } EXPORT_SYMBOL(sock_no_bind); -int sock_no_connect(struct socket *sock, struct sockaddr *saddr, +int sock_no_connect(struct socket *sock, struct sockaddr_unsized *saddr, int len, int flags) { return -EOPNOTSUPP; diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index 99ddfad9bb88f..b93fd85f248a6 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -107,7 +107,7 @@ static int ieee802154_sock_bind(struct socket *sock, struct sockaddr_unsized *ua return sock_no_bind(sock, uaddr, addr_len); } -static int ieee802154_sock_connect(struct socket *sock, struct sockaddr *uaddr, +static int ieee802154_sock_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; @@ -118,7 +118,7 @@ static int ieee802154_sock_connect(struct socket *sock, struct sockaddr *uaddr, if (uaddr->sa_family == AF_UNSPEC) return sk->sk_prot->disconnect(sk, flags); - return sk->sk_prot->connect(sk, uaddr, addr_len); + return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len); } static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index aa43d16e48ff1..0844de9ac6a48 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -567,7 +567,7 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, return err; } -int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, +int inet_dgram_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; @@ -584,14 +584,14 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, return prot->disconnect(sk, flags); if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) { - err = prot->pre_connect(sk, uaddr, addr_len); + err = prot->pre_connect(sk, (struct sockaddr *)uaddr, addr_len); if (err) return err; } if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk)) return -EAGAIN; - return prot->connect(sk, uaddr, addr_len); + return prot->connect(sk, (struct sockaddr *)uaddr, addr_len); } EXPORT_SYMBOL(inet_dgram_connect); @@ -623,7 +623,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) * Connect to a remote host. There is regrettably still a little * TCP 'magic' in here. */ -int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, +int __inet_stream_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len, int flags, int is_sendmsg) { struct sock *sk = sock->sk; @@ -671,12 +671,12 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, goto out; if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) { - err = sk->sk_prot->pre_connect(sk, uaddr, addr_len); + err = sk->sk_prot->pre_connect(sk, (struct sockaddr *)uaddr, addr_len); if (err) goto out; } - err = sk->sk_prot->connect(sk, uaddr, addr_len); + err = sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len); if (err < 0) goto out; @@ -741,7 +741,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, } EXPORT_SYMBOL(__inet_stream_connect); -int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, +int inet_stream_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len, int flags) { int err; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a9345aa5a2e5f..dee578aad690d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1061,7 +1061,7 @@ int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied, } } flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; - err = __inet_stream_connect(sk->sk_socket, uaddr, + err = __inet_stream_connect(sk->sk_socket, (struct sockaddr_unsized *)uaddr, msg->msg_namelen, flags, 1); /* fastopen_req could already be freed in __inet_stream_connect * if the connection times out or gets rst diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index 11e5a88c923de..b1f667c52cb21 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -38,7 +38,7 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, udp_addr.sin_family = AF_INET; udp_addr.sin_addr = cfg->peer_ip; udp_addr.sin_port = cfg->peer_udp_port; - err = kernel_connect(sock, (struct sockaddr *)&udp_addr, + err = kernel_connect(sock, (struct sockaddr_unsized *)&udp_addr, sizeof(udp_addr), 0); if (err < 0) goto error; diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index b0d9286b33c88..cef3e02107444 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -52,7 +52,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, sizeof(udp6_addr.sin6_addr)); udp6_addr.sin6_port = cfg->peer_udp_port; err = kernel_connect(sock, - (struct sockaddr *)&udp6_addr, + (struct sockaddr_unsized *)&udp6_addr, sizeof(udp6_addr), 0); } if (err < 0) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 3941e32cda690..a4f1df92417d9 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -668,7 +668,7 @@ static int iucv_sock_autobind(struct sock *sk) return err; } -static int afiucv_path_connect(struct socket *sock, struct sockaddr *addr) +static int afiucv_path_connect(struct socket *sock, struct sockaddr_unsized *addr) { DECLARE_SOCKADDR(struct sockaddr_iucv *, sa, addr); struct sock *sk = sock->sk; @@ -714,7 +714,7 @@ static int afiucv_path_connect(struct socket *sock, struct sockaddr *addr) } /* Connect an unconnected socket */ -static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, +static int iucv_sock_connect(struct socket *sock, struct sockaddr_unsized *addr, int alen, int flags) { DECLARE_SOCKADDR(struct sockaddr_iucv *, sa, addr); diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 4b5e372a5cd43..c4f4a57cd67c9 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1513,7 +1513,7 @@ static int l2tp_tunnel_sock_create(struct net *net, sizeof(ip6_addr.l2tp_addr)); ip6_addr.l2tp_conn_id = peer_tunnel_id; err = kernel_connect(sock, - (struct sockaddr *)&ip6_addr, + (struct sockaddr_unsized *)&ip6_addr, sizeof(ip6_addr), 0); if (err < 0) goto out; @@ -1538,7 +1538,7 @@ static int l2tp_tunnel_sock_create(struct net *net, ip_addr.l2tp_family = AF_INET; ip_addr.l2tp_addr = cfg->peer_ip; ip_addr.l2tp_conn_id = peer_tunnel_id; - err = kernel_connect(sock, (struct sockaddr *)&ip_addr, + err = kernel_connect(sock, (struct sockaddr_unsized *)&ip_addr, sizeof(ip_addr), 0); if (err < 0) goto out; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 5e12e7ce17d8a..ae4543d5597b6 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -684,7 +684,7 @@ static struct l2tp_tunnel *pppol2tp_tunnel_get(struct net *net, /* connect() handler. Attach a PPPoX socket to a tunnel UDP socket */ -static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, +static int pppol2tp_connect(struct socket *sock, struct sockaddr_unsized *uservaddr, int sockaddr_len, int flags) { struct sock *sk = sock->sk; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index e5bb0c0d708cc..59d593bb5d186 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -477,7 +477,7 @@ static int llc_ui_shutdown(struct socket *sock, int how) * This function will autobind if user did not previously call bind. * Returns: 0 upon success, negative otherwise. */ -static int llc_ui_connect(struct socket *sock, struct sockaddr *uaddr, +static int llc_ui_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addrlen, int flags) { struct sock *sk = sock->sk; diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index 5b1ef50637b70..209a963112e3a 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -128,7 +128,7 @@ static int mctp_bind(struct socket *sock, struct sockaddr_unsized *addr, int add /* Used to set a specific peer prior to bind. Not used for outbound * connections (Tag Owner set) since MCTP is a datagram protocol. */ -static int mctp_connect(struct socket *sock, struct sockaddr *addr, +static int mctp_connect(struct socket *sock, struct sockaddr_unsized *addr, int addrlen, int flags) { struct sock *sk = sock->sk; diff --git a/net/mctp/test/utils.c b/net/mctp/test/utils.c index 953d419027718..35f6be8145674 100644 --- a/net/mctp/test/utils.c +++ b/net/mctp/test/utils.c @@ -279,7 +279,7 @@ void mctp_test_bind_run(struct kunit *test, addr.smctp_addr.s_addr = setup->peer_addr; /* connect() type must match bind() type */ addr.smctp_type = setup->bind_type; - rc = kernel_connect(*sock, (struct sockaddr *)&addr, + rc = kernel_connect(*sock, (struct sockaddr_unsized *)&addr, sizeof(addr), 0); KUNIT_EXPECT_EQ(test, rc, 0); } @@ -292,5 +292,6 @@ void mctp_test_bind_run(struct kunit *test, addr.smctp_type = setup->bind_type; *ret_bind_errno = - kernel_bind(*sock, (struct sockaddr *)&addr, sizeof(addr)); + kernel_bind(*sock, (struct sockaddr_unsized *)&addr, + sizeof(addr)); } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index d90237bf433cd..30961b3d17028 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1680,7 +1680,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local, sock_hold(ssk); list_add_tail(&subflow->node, &msk->conn_list); - err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK); + err = kernel_connect(sf, (struct sockaddr_unsized *)&addr, addrlen, O_NONBLOCK); if (err && err != -EINPROGRESS) { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTXCONNECTERR); pr_debug("msk=%p local=%d remote=%d connect error: %d\n", diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index d8c089ef387cf..5a0c6f42bd8f2 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1501,7 +1501,7 @@ static int make_send_sock(struct netns_ipvs *ipvs, int id, } get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id); - result = kernel_connect(sock, (struct sockaddr *)&mcast_addr, + result = kernel_connect(sock, (struct sockaddr_unsized *)&mcast_addr, salen, 0); if (result < 0) { pr_err("Error connecting to the multicast addr\n"); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 18490a56edd02..8e5151f0c6e46 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1054,7 +1054,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr_unsized *addr, return err; } -static int netlink_connect(struct socket *sock, struct sockaddr *addr, +static int netlink_connect(struct socket *sock, struct sockaddr_unsized *addr, int alen, int flags) { int err = 0; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 33468124d53db..5ed1a71ceec1e 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -632,8 +632,8 @@ static int nr_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr return 0; } -static int nr_connect(struct socket *sock, struct sockaddr *uaddr, - int addr_len, int flags) +static int nr_connect(struct socket *sock, struct sockaddr_unsized *uaddr, + int addr_len, int flags) { struct sock *sk = sock->sk; struct nr_sock *nr = nr_sk(sk); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 26e6ceb48a82e..f1be1e84f6653 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -648,7 +648,7 @@ static int llcp_sock_release(struct socket *sock) return err; } -static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, +static int llcp_sock_connect(struct socket *sock, struct sockaddr_unsized *_addr, int len, int flags) { struct sock *sk = sock->sk; diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 5125392bb68eb..b049022399aea 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -73,7 +73,7 @@ static int rawsock_release(struct socket *sock) return 0; } -static int rawsock_connect(struct socket *sock, struct sockaddr *_addr, +static int rawsock_connect(struct socket *sock, struct sockaddr_unsized *_addr, int len, int flags) { struct sock *sk = sock->sk; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 478b026477335..9391378083a41 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -214,8 +214,8 @@ static int pn_socket_autobind(struct socket *sock) return 0; /* socket was already bound */ } -static int pn_socket_connect(struct socket *sock, struct sockaddr *addr, - int len, int flags) +static int pn_socket_connect(struct socket *sock, struct sockaddr_unsized *addr, + int len, int flags) { struct sock *sk = sock->sk; struct pn_sock *pn = pn_sk(sk); @@ -252,7 +252,7 @@ static int pn_socket_connect(struct socket *sock, struct sockaddr *addr, pn->resource = pn_sockaddr_get_resource(spn); sock->state = SS_CONNECTING; - err = sk->sk_prot->connect(sk, addr, len); + err = sk->sk_prot->connect(sk, (struct sockaddr *)addr, len); if (err) { sock->state = SS_UNCONNECTED; pn->dobject = 0; diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c index 00bd3dd9f0f95..dab839f61ee93 100644 --- a/net/qrtr/af_qrtr.c +++ b/net/qrtr/af_qrtr.c @@ -1084,7 +1084,7 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg, return rc; } -static int qrtr_connect(struct socket *sock, struct sockaddr *saddr, +static int qrtr_connect(struct socket *sock, struct sockaddr_unsized *saddr, int len, int flags) { DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr); diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 4a7217fbeab6f..b396c673dfaf6 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -533,7 +533,7 @@ static int rds_getsockopt(struct socket *sock, int level, int optname, } -static int rds_connect(struct socket *sock, struct sockaddr *uaddr, +static int rds_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 1eff3b03ab774..92891b0d224d3 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -173,7 +173,7 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp) * own the socket */ rds_tcp_set_callbacks(sock, cp); - ret = kernel_connect(sock, addr, addrlen, O_NONBLOCK); + ret = kernel_connect(sock, (struct sockaddr_unsized *)addr, addrlen, O_NONBLOCK); rdsdebug("connect to address %pI6c returned %d\n", &conn->c_faddr, ret); if (ret == -EINPROGRESS) diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 47369eab5aecb..fd67494f2815e 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -765,7 +765,8 @@ static int rose_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int ad return err; } -static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) +static int rose_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len, + int flags) { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 245f37a743943..0c2c68c4b07e4 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -481,7 +481,7 @@ EXPORT_SYMBOL(rxrpc_kernel_set_notifications); * - this just targets it at a specific destination; no actual connection * negotiation takes place */ -static int rxrpc_connect(struct socket *sock, struct sockaddr *addr, +static int rxrpc_connect(struct socket *sock, struct sockaddr_unsized *addr, int addr_len, int flags) { struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)addr; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ac737e60829b9..940abbced191d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4820,7 +4820,7 @@ static int sctp_connect(struct sock *sk, struct sockaddr *addr, return err; } -int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr, +int sctp_inet_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len, int flags) { if (addr_len < sizeof(uaddr->sa_family)) @@ -4829,7 +4829,7 @@ int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr, if (uaddr->sa_family == AF_UNSPEC) return -EOPNOTSUPP; - return sctp_connect(sock->sk, uaddr, addr_len, flags); + return sctp_connect(sock->sk, (struct sockaddr *)uaddr, addr_len, flags); } /* Only called when shutdown a listening SCTP socket. */ diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index be18ab08f15d6..0ef3e16a8517a 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1642,7 +1642,7 @@ static void smc_connect_work(struct work_struct *work) release_sock(&smc->sk); } -int smc_connect(struct socket *sock, struct sockaddr *addr, +int smc_connect(struct socket *sock, struct sockaddr_unsized *addr, int alen, int flags) { struct sock *sk = sock->sk; @@ -1694,7 +1694,7 @@ int smc_connect(struct socket *sock, struct sockaddr *addr, rc = -EALREADY; goto out; } - rc = kernel_connect(smc->clcsock, addr, alen, flags); + rc = kernel_connect(smc->clcsock, (struct sockaddr_unsized *)addr, alen, flags); if (rc && rc != -EINPROGRESS) goto out; diff --git a/net/smc/smc.h b/net/smc/smc.h index a008dbe6d6f63..9e6af72784baa 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -44,7 +44,7 @@ void smc_release_cb(struct sock *sk); int smc_release(struct socket *sock); int smc_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len); -int smc_connect(struct socket *sock, struct sockaddr *addr, +int smc_connect(struct socket *sock, struct sockaddr_unsized *addr, int alen, int flags); int smc_accept(struct socket *sock, struct socket *new_sock, struct proto_accept_arg *arg); diff --git a/net/socket.c b/net/socket.c index aaefb2e519a74..101a7ed574e75 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2099,8 +2099,8 @@ int __sys_connect_file(struct file *file, struct sockaddr_storage *address, if (err) goto out; - err = READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)address, - addrlen, sock->file->f_flags | file_flags); + err = READ_ONCE(sock->ops)->connect(sock, (struct sockaddr_unsized *)address, + addrlen, sock->file->f_flags | file_flags); out: return err; } @@ -3662,14 +3662,14 @@ EXPORT_SYMBOL(kernel_accept); * Returns 0 or an error code. */ -int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, +int kernel_connect(struct socket *sock, struct sockaddr_unsized *addr, int addrlen, int flags) { struct sockaddr_storage address; memcpy(&address, addr, addrlen); - return READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)&address, + return READ_ONCE(sock->ops)->connect(sock, (struct sockaddr_unsized *)&address, addrlen, flags); } EXPORT_SYMBOL(kernel_connect); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 318ee24ad900a..58442ae1c2da2 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1474,7 +1474,7 @@ static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen, goto out_release; } - err = kernel_connect(sock, sap, salen, 0); + err = kernel_connect(sock, (struct sockaddr_unsized *)sap, salen, 0); if (err < 0) { dprintk("RPC: can't connect UDP socket (%d)\n", err); goto out_release; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 95732a45b059b..2e1fe60133615 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2005,7 +2005,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, xs_stream_start_connect(transport); - return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0); + return kernel_connect(sock, (struct sockaddr_unsized *)xs_addr(xprt), xprt->addrlen, 0); } /** @@ -2405,7 +2405,8 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) /* Tell the socket layer to start connecting... */ set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); - return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); + return kernel_connect(sock, (struct sockaddr_unsized *)xs_addr(xprt), + xprt->addrlen, O_NONBLOCK); } /** diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3903a97ada7d4..817b07d95a914 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2565,7 +2565,7 @@ static bool tipc_sockaddr_is_sane(struct sockaddr_tipc *addr) * * Return: 0 on success, errno otherwise */ -static int tipc_connect(struct socket *sock, struct sockaddr *dest, +static int tipc_connect(struct socket *sock, struct sockaddr_unsized *dest, int destlen, int flags) { struct sock *sk = sock->sk; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 788775f0eea72..3b44cadaed96b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -844,7 +844,7 @@ static int unix_listen(struct socket *sock, int backlog) static int unix_release(struct socket *); static int unix_bind(struct socket *, struct sockaddr_unsized *, int); -static int unix_stream_connect(struct socket *, struct sockaddr *, +static int unix_stream_connect(struct socket *, struct sockaddr_unsized *, int addr_len, int flags); static int unix_socketpair(struct socket *, struct socket *); static int unix_accept(struct socket *, struct socket *, struct proto_accept_arg *arg); @@ -866,7 +866,7 @@ static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor); static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor); -static int unix_dgram_connect(struct socket *, struct sockaddr *, +static int unix_dgram_connect(struct socket *, struct sockaddr_unsized *, int, int); static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, @@ -1512,7 +1512,7 @@ static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) unix_state_unlock(sk2); } -static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, +static int unix_dgram_connect(struct socket *sock, struct sockaddr_unsized *addr, int alen, int flags) { struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; @@ -1631,7 +1631,7 @@ static long unix_wait_for_peer(struct sock *other, long timeo) return timeo; } -static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, +static int unix_stream_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len, int flags) { struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 0e5609e7284bb..72bb6b7ed386b 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -995,7 +995,7 @@ vsock_bind(struct socket *sock, struct sockaddr_unsized *addr, int addr_len) sk = sock->sk; - if (vsock_addr_cast((struct sockaddr *)addr, addr_len, &vm_addr) != 0) + if (vsock_addr_cast(addr, addr_len, &vm_addr) != 0) return -EINVAL; lock_sock(sk); @@ -1328,7 +1328,7 @@ static int vsock_dgram_sendmsg(struct socket *sock, struct msghdr *msg, } static int vsock_dgram_connect(struct socket *sock, - struct sockaddr *addr, int addr_len, int flags) + struct sockaddr_unsized *addr, int addr_len, int flags) { int err; struct sock *sk; @@ -1528,7 +1528,7 @@ static void vsock_connect_timeout(struct work_struct *work) sock_put(sk); } -static int vsock_connect(struct socket *sock, struct sockaddr *addr, +static int vsock_connect(struct socket *sock, struct sockaddr_unsized *addr, int addr_len, int flags) { int err; diff --git a/net/vmw_vsock/vsock_addr.c b/net/vmw_vsock/vsock_addr.c index 223b9660a759f..a986aa6fff9bb 100644 --- a/net/vmw_vsock/vsock_addr.c +++ b/net/vmw_vsock/vsock_addr.c @@ -57,7 +57,7 @@ bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, } EXPORT_SYMBOL_GPL(vsock_addr_equals_addr); -int vsock_addr_cast(const struct sockaddr *addr, +int vsock_addr_cast(const struct sockaddr_unsized *addr, size_t len, struct sockaddr_vm **out_addr) { if (len < sizeof(**out_addr)) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index ca8006d8f7929..af8762b24039d 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -743,7 +743,7 @@ static int x25_wait_for_connection_establishment(struct sock *sk) return rc; } -static int x25_connect(struct socket *sock, struct sockaddr *uaddr, +static int x25_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; diff --git a/samples/qmi/qmi_sample_client.c b/samples/qmi/qmi_sample_client.c index b27d861f354f5..d1814582319bc 100644 --- a/samples/qmi/qmi_sample_client.c +++ b/samples/qmi/qmi_sample_client.c @@ -468,7 +468,7 @@ static int qmi_sample_probe(struct platform_device *pdev) return ret; sq = dev_get_platdata(&pdev->dev); - ret = kernel_connect(sample->qmi.sock, (struct sockaddr *)sq, + ret = kernel_connect(sample->qmi.sock, (struct sockaddr_unsized *)sq, sizeof(*sq), 0); if (ret < 0) { pr_err("failed to connect to remote service port\n"); diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c index 0497b5dea25c0..8eeebaa951f03 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c @@ -900,7 +900,7 @@ __bpf_kfunc int bpf_kfunc_call_kernel_connect(struct addr_args *args) goto out; } - err = kernel_connect(sock, (struct sockaddr *)&args->addr, + err = kernel_connect(sock, (struct sockaddr_unsized *)&args->addr, args->addrlen, 0); out: mutex_unlock(&sock_lock); From 3d39d34146f2b38127eadf36a0513e130eaa7eec Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 3 Nov 2025 16:26:12 -0800 Subject: [PATCH 575/867] net: Remove struct sockaddr from net.h Now that struct sockaddr is no longer used by net.h, remove it. Signed-off-by: Kees Cook Link: https://patch.msgid.link/20251104002617.2752303-4-kees@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/net.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/net.h b/include/linux/net.h index db6bc997ca5b0..f58b38ab37f8a 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -148,7 +148,6 @@ typedef struct { struct vm_area_struct; struct page; -struct sockaddr; struct msghdr; struct module; struct sk_buff; From 449f68f8fffa2c41fc265730bd05a3c4947916c1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 3 Nov 2025 16:26:13 -0800 Subject: [PATCH 576/867] net: Convert proto callbacks from sockaddr to sockaddr_unsized Convert struct proto pre_connect(), connect(), bind(), and bind_add() callback function prototypes from struct sockaddr to struct sockaddr_unsized. This does not change per-implementation use of sockaddr for passing around an arbitrarily sized sockaddr struct. Those will be addressed in future patches. Additionally removes the no longer referenced struct sockaddr from include/net/inet_common.h. No binary changes expected. Signed-off-by: Kees Cook Link: https://patch.msgid.link/20251104002617.2752303-5-kees@kernel.org Signed-off-by: Jakub Kicinski --- fs/dlm/lowcomms.c | 4 ++-- include/net/inet_common.h | 5 ++--- include/net/ip.h | 4 ++-- include/net/ipv6.h | 8 ++++---- include/net/ipv6_stubs.h | 2 +- include/net/ping.h | 2 +- include/net/sock.h | 10 +++++----- include/net/tcp.h | 2 +- include/net/udp.h | 2 +- net/core/filter.c | 5 +++-- net/core/sock.c | 2 +- net/ieee802154/socket.c | 12 ++++++------ net/ipv4/af_inet.c | 14 +++++++------- net/ipv4/datagram.c | 4 ++-- net/ipv4/ping.c | 8 ++++---- net/ipv4/raw.c | 3 ++- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv4/udp.c | 6 ++++-- net/ipv6/af_inet6.c | 6 +++--- net/ipv6/datagram.c | 8 ++++---- net/ipv6/ping.c | 2 +- net/ipv6/raw.c | 3 ++- net/ipv6/tcp_ipv6.c | 6 +++--- net/ipv6/udp.c | 5 +++-- net/l2tp/l2tp_ip.c | 6 ++++-- net/l2tp/l2tp_ip6.c | 5 +++-- net/mptcp/pm_kernel.c | 4 ++-- net/mptcp/protocol.c | 7 ++++--- net/phonet/pep.c | 3 ++- net/phonet/socket.c | 4 ++-- net/sctp/socket.c | 9 +++++---- 31 files changed, 88 insertions(+), 77 deletions(-) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index f832dafdaca82..b3958008ba3f7 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1126,7 +1126,7 @@ static void writequeue_entry_complete(struct writequeue_entry *e, int completed) static int sctp_bind_addrs(struct socket *sock, __be16 port) { struct sockaddr_storage localaddr; - struct sockaddr *addr = (struct sockaddr *)&localaddr; + struct sockaddr_unsized *addr = (struct sockaddr_unsized *)&localaddr; int i, addr_len, result = 0; for (i = 0; i < dlm_local_count; i++) { @@ -1134,7 +1134,7 @@ static int sctp_bind_addrs(struct socket *sock, __be16 port) make_sockaddr(&localaddr, port, &addr_len); if (!i) - result = kernel_bind(sock, (struct sockaddr_unsized *)addr, addr_len); + result = kernel_bind(sock, addr, addr_len); else result = sock_bind_add(sock->sk, addr, addr_len); diff --git a/include/net/inet_common.h b/include/net/inet_common.h index ebafd96912bb1..5dd2bf24449ef 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -19,7 +19,6 @@ struct msghdr; struct net; struct page; struct sock; -struct sockaddr; struct socket; int inet_release(struct socket *sock); @@ -43,7 +42,7 @@ int inet_listen(struct socket *sock, int backlog); int __inet_listen_sk(struct sock *sk, int backlog); void inet_sock_destruct(struct sock *sk); int inet_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len); -int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len); +int inet_bind_sk(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len); /* Don't allocate port at this moment, defer to connect. */ #define BIND_FORCE_ADDRESS_NO_PORT (1 << 0) /* Grab and release socket lock. */ @@ -52,7 +51,7 @@ int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len); #define BIND_FROM_BPF (1 << 2) /* Skip CAP_NET_BIND_SERVICE check. */ #define BIND_NO_CAP_NET_BIND_SERVICE (1 << 3) -int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, +int __inet_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len, u32 flags); int inet_getname(struct socket *sock, struct sockaddr *uaddr, int peer); diff --git a/include/net/ip.h b/include/net/ip.h index 380afb691c419..69d5cef460040 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -261,8 +261,8 @@ static inline u8 ip_sendmsg_scope(const struct inet_sock *inet, } /* datagram.c */ -int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); -int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); +int __ip4_datagram_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len); +int ip4_datagram_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len); void ip4_datagram_release_cb(struct sock *sk); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 2188bad9a687d..74fbf1ad8065a 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1188,10 +1188,10 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, int ipv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); -int __ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, +int __ip6_datagram_connect(struct sock *sk, struct sockaddr_unsized *addr, int addr_len); -int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len); -int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *addr, +int ip6_datagram_connect(struct sock *sk, struct sockaddr_unsized *addr, int addr_len); +int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr_unsized *addr, int addr_len); int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr); void ip6_datagram_release_cb(struct sock *sk); @@ -1209,7 +1209,7 @@ void inet6_cleanup_sock(struct sock *sk); void inet6_sock_destruct(struct sock *sk); int inet6_release(struct socket *sock); int inet6_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len); -int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len); +int inet6_bind_sk(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len); int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int peer); int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 8a3465c8c2c5c..d3013e721b144 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -80,7 +80,7 @@ extern const struct ipv6_stub *ipv6_stub __read_mostly; /* A stub used by bpf helpers. Similarly ugly as ipv6_stub */ struct ipv6_bpf_stub { - int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len, + int (*inet6_bind)(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len, u32 flags); struct sock *(*udp6_lib_lookup)(const struct net *net, const struct in6_addr *saddr, __be16 sport, diff --git a/include/net/ping.h b/include/net/ping.h index 9634b8800814d..05bfd594a64c0 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -58,7 +58,7 @@ void ping_unhash(struct sock *sk); int ping_init_sock(struct sock *sk); void ping_close(struct sock *sk, long timeout); -int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len); +int ping_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len); void ping_err(struct sk_buff *skb, int offset, u32 info); int ping_getfrag(void *from, char *to, int offset, int fraglen, int odd, struct sk_buff *); diff --git a/include/net/sock.h b/include/net/sock.h index 589fbce77217c..a5f36ea9d46f0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1274,10 +1274,10 @@ struct proto { void (*close)(struct sock *sk, long timeout); int (*pre_connect)(struct sock *sk, - struct sockaddr *uaddr, + struct sockaddr_unsized *uaddr, int addr_len); int (*connect)(struct sock *sk, - struct sockaddr *uaddr, + struct sockaddr_unsized *uaddr, int addr_len); int (*disconnect)(struct sock *sk, int flags); @@ -1306,9 +1306,9 @@ struct proto { size_t len, int flags, int *addr_len); void (*splice_eof)(struct socket *sock); int (*bind)(struct sock *sk, - struct sockaddr *addr, int addr_len); + struct sockaddr_unsized *addr, int addr_len); int (*bind_add)(struct sock *sk, - struct sockaddr *addr, int addr_len); + struct sockaddr_unsized *addr, int addr_len); int (*backlog_rcv) (struct sock *sk, struct sk_buff *skb); @@ -3105,7 +3105,7 @@ void sock_set_reuseaddr(struct sock *sk); void sock_set_reuseport(struct sock *sk); void sock_set_sndtimeo(struct sock *sk, s64 secs); -int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len); +int sock_bind_add(struct sock *sk, struct sockaddr_unsized *addr, int addr_len); int sock_get_timeout(long timeo, void *optval, bool old_timeval); int sock_copy_user_timeval(struct __kernel_sock_timeval *tv, diff --git a/include/net/tcp.h b/include/net/tcp.h index 4fd6d8d1230d0..0aa1f07d036a6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -535,7 +535,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req_unhash, bool *own_req); int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); -int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); +int tcp_v4_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len); int tcp_connect(struct sock *sk); enum tcp_synack_type { TCP_SYNACK_NORMAL, diff --git a/include/net/udp.h b/include/net/udp.h index cffedb3e40f24..a061d1b22ddc2 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -424,7 +424,7 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst); int udp_rcv(struct sk_buff *skb); int udp_ioctl(struct sock *sk, int cmd, int *karg); int udp_init_sock(struct sock *sk); -int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); +int udp_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len); int __udp_disconnect(struct sock *sk, int flags); int udp_disconnect(struct sock *sk, int flags); __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait); diff --git a/net/core/filter.c b/net/core/filter.c index 16105f52927da..90273da748073 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5978,7 +5978,7 @@ BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr, return err; if (((struct sockaddr_in *)addr)->sin_port == htons(0)) flags |= BIND_FORCE_ADDRESS_NO_PORT; - return __inet_bind(sk, addr, addr_len, flags); + return __inet_bind(sk, (struct sockaddr_unsized *)addr, addr_len, flags); #if IS_ENABLED(CONFIG_IPV6) } else if (addr->sa_family == AF_INET6) { if (addr_len < SIN6_LEN_RFC2133) @@ -5988,7 +5988,8 @@ BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr, /* ipv6_bpf_stub cannot be NULL, since it's called from * bpf_cgroup_inet6_connect hook and ipv6 is already loaded */ - return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, flags); + return ipv6_bpf_stub->inet6_bind(sk, (struct sockaddr_unsized *)addr, + addr_len, flags); #endif /* CONFIG_IPV6 */ } #endif /* CONFIG_INET */ diff --git a/net/core/sock.c b/net/core/sock.c index f97a0e9589914..3b74fc71f51c1 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -4395,7 +4395,7 @@ bool sk_busy_loop_end(void *p, unsigned long start_time) EXPORT_SYMBOL(sk_busy_loop_end); #endif /* CONFIG_NET_RX_BUSY_POLL */ -int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len) +int sock_bind_add(struct sock *sk, struct sockaddr_unsized *addr, int addr_len) { if (!sk->sk_prot->bind_add) return -EOPNOTSUPP; diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index b93fd85f248a6..e542fbe113e7b 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -102,7 +102,7 @@ static int ieee802154_sock_bind(struct socket *sock, struct sockaddr_unsized *ua struct sock *sk = sock->sk; if (sk->sk_prot->bind) - return sk->sk_prot->bind(sk, (struct sockaddr *)uaddr, addr_len); + return sk->sk_prot->bind(sk, uaddr, addr_len); return sock_no_bind(sock, uaddr, addr_len); } @@ -118,7 +118,7 @@ static int ieee802154_sock_connect(struct socket *sock, struct sockaddr_unsized if (uaddr->sa_family == AF_UNSPEC) return sk->sk_prot->disconnect(sk, flags); - return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len); + return sk->sk_prot->connect(sk, uaddr, addr_len); } static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, @@ -193,7 +193,7 @@ static void raw_close(struct sock *sk, long timeout) sk_common_release(sk); } -static int raw_bind(struct sock *sk, struct sockaddr *_uaddr, int len) +static int raw_bind(struct sock *sk, struct sockaddr_unsized *_uaddr, int len) { struct ieee802154_addr addr; struct sockaddr_ieee802154 *uaddr = (struct sockaddr_ieee802154 *)_uaddr; @@ -227,7 +227,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *_uaddr, int len) return err; } -static int raw_connect(struct sock *sk, struct sockaddr *uaddr, +static int raw_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) { return -ENOTSUPP; @@ -485,7 +485,7 @@ static void dgram_close(struct sock *sk, long timeout) sk_common_release(sk); } -static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len) +static int dgram_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int len) { struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr; struct ieee802154_addr haddr; @@ -563,7 +563,7 @@ static int dgram_ioctl(struct sock *sk, int cmd, int *karg) } /* FIXME: autobind */ -static int dgram_connect(struct sock *sk, struct sockaddr *uaddr, +static int dgram_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int len) { struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 0844de9ac6a48..d5ac089356ebc 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -441,7 +441,7 @@ int inet_release(struct socket *sock) } EXPORT_SYMBOL(inet_release); -int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) +int inet_bind_sk(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) { u32 flags = BIND_WITH_LOCK; int err; @@ -466,11 +466,11 @@ int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) int inet_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) { - return inet_bind_sk(sock->sk, (struct sockaddr *)uaddr, addr_len); + return inet_bind_sk(sock->sk, uaddr, addr_len); } EXPORT_SYMBOL(inet_bind); -int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, +int __inet_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len, u32 flags) { struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; @@ -584,14 +584,14 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr_unsized *uaddr, return prot->disconnect(sk, flags); if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) { - err = prot->pre_connect(sk, (struct sockaddr *)uaddr, addr_len); + err = prot->pre_connect(sk, uaddr, addr_len); if (err) return err; } if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk)) return -EAGAIN; - return prot->connect(sk, (struct sockaddr *)uaddr, addr_len); + return prot->connect(sk, uaddr, addr_len); } EXPORT_SYMBOL(inet_dgram_connect); @@ -671,12 +671,12 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr_unsized *uaddr, goto out; if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) { - err = sk->sk_prot->pre_connect(sk, (struct sockaddr *)uaddr, addr_len); + err = sk->sk_prot->pre_connect(sk, uaddr, addr_len); if (err) goto out; } - err = sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len); + err = sk->sk_prot->connect(sk, uaddr, addr_len); if (err < 0) goto out; diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index c2b2cda1a7e50..1614593b6d727 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -16,7 +16,7 @@ #include #include -int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +int __ip4_datagram_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; @@ -84,7 +84,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len } EXPORT_SYMBOL(__ip4_datagram_connect); -int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +int ip4_datagram_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) { int res; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 5321c5801c64d..ad56588107cc8 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -286,7 +286,7 @@ void ping_close(struct sock *sk, long timeout) } EXPORT_IPV6_MOD_GPL(ping_close); -static int ping_pre_connect(struct sock *sk, struct sockaddr *uaddr, +static int ping_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) { /* This check is replicated from __ip4_datagram_connect() and @@ -301,7 +301,7 @@ static int ping_pre_connect(struct sock *sk, struct sockaddr *uaddr, /* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, - struct sockaddr *uaddr, int addr_len) + struct sockaddr_unsized *uaddr, int addr_len) { struct net *net = sock_net(sk); if (sk->sk_family == AF_INET) { @@ -387,7 +387,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, return 0; } -static void ping_set_saddr(struct sock *sk, struct sockaddr *saddr) +static void ping_set_saddr(struct sock *sk, struct sockaddr_unsized *saddr) { if (saddr->sa_family == AF_INET) { struct inet_sock *isk = inet_sk(sk); @@ -407,7 +407,7 @@ static void ping_set_saddr(struct sock *sk, struct sockaddr *saddr) * Moreover, we don't allow binding to multi- and broadcast addresses. */ -int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) +int ping_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) { struct inet_sock *isk = inet_sk(sk); unsigned short snum; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index d54ebb7df966d..5998c4cc6f47b 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -697,7 +697,8 @@ static void raw_destroy(struct sock *sk) } /* This gets rid of all the nasties in af_inet. -DaveM */ -static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) +static int raw_bind(struct sock *sk, struct sockaddr_unsized *uaddr, + int addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 40a76da5364a1..b7526a7888cbe 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -205,7 +205,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) } EXPORT_IPV6_MOD_GPL(tcp_twsk_unique); -static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr, +static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) { /* This check is replicated from tcp_v4_connect() and intended to @@ -221,7 +221,7 @@ static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr, } /* This will initiate an outgoing connection. */ -int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +int tcp_v4_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) { struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct inet_timewait_death_row *tcp_death_row; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 30dfbf73729da..ffe074cb58658 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2159,7 +2159,8 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, goto try_again; } -int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +int udp_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, + int addr_len) { /* This check is replicated from __ip4_datagram_connect() and * intended to prevent BPF program called below from accessing bytes @@ -2172,7 +2173,8 @@ int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) } EXPORT_IPV6_MOD(udp_pre_connect); -static int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +static int udp_connect(struct sock *sk, struct sockaddr_unsized *uaddr, + int addr_len) { int res; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c92d27e35fbcc..b705751eb73c6 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -277,7 +277,7 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, goto out; } -static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, +static int __inet6_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len, u32 flags) { struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr; @@ -438,7 +438,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, goto out; } -int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) +int inet6_bind_sk(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) { u32 flags = BIND_WITH_LOCK; const struct proto *prot; @@ -467,7 +467,7 @@ int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* bind for INET6 API */ int inet6_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) { - return inet6_bind_sk(sock->sk, (struct sockaddr *)uaddr, addr_len); + return inet6_bind_sk(sock->sk, uaddr, addr_len); } EXPORT_SYMBOL(inet6_bind); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 33ebe93d80e3c..83e03176819ce 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -138,7 +138,7 @@ void ip6_datagram_release_cb(struct sock *sk) } EXPORT_SYMBOL_GPL(ip6_datagram_release_cb); -int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, +int __ip6_datagram_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; @@ -194,7 +194,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, sin.sin_port = usin->sin6_port; err = __ip4_datagram_connect(sk, - (struct sockaddr *) &sin, + (struct sockaddr_unsized *)&sin, sizeof(sin)); ipv4_connected: @@ -271,7 +271,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, } EXPORT_SYMBOL_GPL(__ip6_datagram_connect); -int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +int ip6_datagram_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) { int res; @@ -282,7 +282,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) } EXPORT_SYMBOL_GPL(ip6_datagram_connect); -int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *uaddr, +int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) { DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, uaddr); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index d7a2cdaa26312..e4afc651731a3 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -45,7 +45,7 @@ static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, return 0; } -static int ping_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, +static int ping_v6_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) { /* This check is replicated from __ip6_datagram_connect() and diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e369f54844dd9..b4cd05dba9b6d 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -214,7 +214,8 @@ bool raw6_local_deliver(struct sk_buff *skb, int nexthdr) } /* This cleans up af_inet6 a bit. -DaveM */ -static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) +static int rawv6_bind(struct sock *sk, struct sockaddr_unsized *uaddr, + int addr_len) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 06eb90e4078e5..7df21c1cba213 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -118,7 +118,7 @@ static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) ipv6_hdr(skb)->saddr.s6_addr32); } -static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, +static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) { /* This check is replicated from tcp_v6_connect() and intended to @@ -133,7 +133,7 @@ static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len); } -static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, +static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; @@ -238,7 +238,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, tp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif - err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); + err = tcp_v4_connect(sk, (struct sockaddr_unsized *)&sin, sizeof(sin)); if (err) { icsk->icsk_ext_hdr_len = exthdrlen; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 813a2ba75824d..794c13674e8ab 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1282,7 +1282,7 @@ static void udp_v6_flush_pending_frames(struct sock *sk) } } -static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr, +static int udpv6_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) { if (addr_len < offsetofend(struct sockaddr, sa_family)) @@ -1303,7 +1303,8 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr, return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len); } -static int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +static int udpv6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, + int addr_len) { int res; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 29795d2839e8b..cac1ff59cb83f 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -267,7 +267,8 @@ static void l2tp_ip_destroy_sock(struct sock *sk) } } -static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) +static int l2tp_ip_bind(struct sock *sk, struct sockaddr_unsized *uaddr, + int addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *)uaddr; @@ -328,7 +329,8 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) return ret; } -static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +static int l2tp_ip_connect(struct sock *sk, struct sockaddr_unsized *uaddr, + int addr_len) { struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *)uaddr; struct l2tp_ip_net *pn = l2tp_ip_pernet(sock_net(sk)); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index ea232f338dcb6..05a396ba6a3eb 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -280,7 +280,8 @@ static void l2tp_ip6_destroy_sock(struct sock *sk) } } -static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) +static int l2tp_ip6_bind(struct sock *sk, struct sockaddr_unsized *uaddr, + int addr_len) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -383,7 +384,7 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) return err; } -static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr, +static int l2tp_ip6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) { struct sockaddr_l2tpip6 *lsa = (struct sockaddr_l2tpip6 *)uaddr; diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c index e50721c670d00..598f01a573c1b 100644 --- a/net/mptcp/pm_kernel.c +++ b/net/mptcp/pm_kernel.c @@ -867,10 +867,10 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, addrlen = sizeof(struct sockaddr_in6); #endif if (ssk->sk_family == AF_INET) - err = inet_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); + err = inet_bind_sk(ssk, (struct sockaddr_unsized *)&addr, addrlen); #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (ssk->sk_family == AF_INET6) - err = inet6_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); + err = inet6_bind_sk(ssk, (struct sockaddr_unsized *)&addr, addrlen); #endif if (err) return err; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 53e2b095dfb12..4cd5df01446e3 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3746,7 +3746,8 @@ static int mptcp_ioctl(struct sock *sk, int cmd, int *karg) return 0; } -static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +static int mptcp_connect(struct sock *sk, struct sockaddr_unsized *uaddr, + int addr_len) { struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); @@ -3870,10 +3871,10 @@ static int mptcp_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int a } if (sk->sk_family == AF_INET) - err = inet_bind_sk(ssk, (struct sockaddr *)uaddr, addr_len); + err = inet_bind_sk(ssk, uaddr, addr_len); #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (sk->sk_family == AF_INET6) - err = inet6_bind_sk(ssk, (struct sockaddr *)uaddr, addr_len); + err = inet6_bind_sk(ssk, uaddr, addr_len); #endif if (!err) mptcp_copy_inaddrs(sk, ssk); diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 4db564d9d522b..120e711ea78cb 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -882,7 +882,8 @@ static struct sock *pep_sock_accept(struct sock *sk, return newsk; } -static int pep_sock_connect(struct sock *sk, struct sockaddr *addr, int len) +static int pep_sock_connect(struct sock *sk, struct sockaddr_unsized *addr, + int len) { struct pep_sock *pn = pep_sk(sk); int err; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 9391378083a41..4423d483c630a 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -163,7 +163,7 @@ static int pn_socket_bind(struct socket *sock, struct sockaddr_unsized *addr, in u8 saddr; if (sk->sk_prot->bind) - return sk->sk_prot->bind(sk, (struct sockaddr *)addr, len); + return sk->sk_prot->bind(sk, addr, len); if (len < sizeof(struct sockaddr_pn)) return -EINVAL; @@ -252,7 +252,7 @@ static int pn_socket_connect(struct socket *sock, struct sockaddr_unsized *addr, pn->resource = pn_sockaddr_get_resource(spn); sock->state = SS_CONNECTING; - err = sk->sk_prot->connect(sk, (struct sockaddr *)addr, len); + err = sk->sk_prot->connect(sk, addr, len); if (err) { sock->state = SS_UNCONNECTED; pn->dobject = 0; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 940abbced191d..38d2932acebfc 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -306,7 +306,8 @@ static struct sctp_transport *sctp_addr_id2transport(struct sock *sk, * sockaddr_in6 [RFC 2553]), * addr_len - the size of the address structure. */ -static int sctp_bind(struct sock *sk, struct sockaddr *addr, int addr_len) +static int sctp_bind(struct sock *sk, struct sockaddr_unsized *addr, + int addr_len) { int retval = 0; @@ -1053,13 +1054,13 @@ static int sctp_setsockopt_bindx(struct sock *sk, struct sockaddr *addrs, } } -static int sctp_bind_add(struct sock *sk, struct sockaddr *addrs, - int addrlen) +static int sctp_bind_add(struct sock *sk, struct sockaddr_unsized *addrs, + int addrlen) { int err; lock_sock(sk); - err = sctp_setsockopt_bindx(sk, addrs, addrlen, SCTP_BINDX_ADD_ADDR); + err = sctp_setsockopt_bindx(sk, (struct sockaddr *)addrs, addrlen, SCTP_BINDX_ADD_ADDR); release_sock(sk); return err; } From 8116d803e7f8f20bf00ce23ff8bd0baab41e1635 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 3 Nov 2025 16:26:14 -0800 Subject: [PATCH 577/867] bpf: Convert cgroup sockaddr filters to use sockaddr_unsized consistently Update BPF cgroup sockaddr filtering infrastructure to use sockaddr_unsized consistently throughout the call chain, removing redundant explicit casts from callers. No binary changes expected. Signed-off-by: Kees Cook Link: https://patch.msgid.link/20251104002617.2752303-6-kees@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/bpf-cgroup.h | 17 ++++++++++------- kernel/bpf/cgroup.c | 4 ++-- net/ipv4/af_inet.c | 4 ++-- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index a7fb4f46974f2..d1eb5c7729cb8 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -120,7 +120,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, enum cgroup_bpf_attach_type atype); int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, - struct sockaddr *uaddr, + struct sockaddr_unsized *uaddr, int *uaddrlen, enum cgroup_bpf_attach_type atype, void *t_ctx, @@ -238,8 +238,9 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, (struct sockaddr *)uaddr, uaddrlen, \ - atype, NULL, NULL); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, \ + (struct sockaddr_unsized *)uaddr, uaddrlen, \ + atype, NULL, NULL); \ __ret; \ }) @@ -248,8 +249,9 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, (struct sockaddr *)uaddr, uaddrlen, \ - atype, t_ctx, NULL); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, \ + (struct sockaddr_unsized *)uaddr, uaddrlen, \ + atype, t_ctx, NULL); \ release_sock(sk); \ } \ __ret; \ @@ -266,8 +268,9 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, (struct sockaddr *)uaddr, uaddrlen, \ - atype, NULL, &__flags); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, \ + (struct sockaddr_unsized *)uaddr, uaddrlen, \ + atype, NULL, &__flags); \ release_sock(sk); \ if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \ *bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \ diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 248f517d66d04..497aedc9afa1c 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1665,7 +1665,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); * returned value != 1 during execution. In all other cases, 0 is returned. */ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, - struct sockaddr *uaddr, + struct sockaddr_unsized *uaddr, int *uaddrlen, enum cgroup_bpf_attach_type atype, void *t_ctx, @@ -1673,7 +1673,7 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, { struct bpf_sock_addr_kern ctx = { .sk = sk, - .uaddr = uaddr, + .uaddr = (struct sockaddr *)uaddr, .t_ctx = t_ctx, }; struct sockaddr_storage unspec; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d5ac089356ebc..a31b94ce8968a 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -834,7 +834,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, } sin->sin_port = inet->inet_dport; sin->sin_addr.s_addr = inet->inet_daddr; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, + BPF_CGROUP_RUN_SA_PROG(sk, sin, &sin_addr_len, CGROUP_INET4_GETPEERNAME); } else { __be32 addr = inet->inet_rcv_saddr; @@ -842,7 +842,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, addr = inet->inet_saddr; sin->sin_port = inet->inet_sport; sin->sin_addr.s_addr = addr; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, + BPF_CGROUP_RUN_SA_PROG(sk, sin, &sin_addr_len, CGROUP_INET4_GETSOCKNAME); } release_sock(sk); From c1a799eef62b8c3298a4d82753fe0f2a448e5e4f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 3 Nov 2025 16:26:15 -0800 Subject: [PATCH 578/867] bpf: Convert bpf_sock_addr_kern "uaddr" to sockaddr_unsized Change struct bpf_sock_addr_kern to use sockaddr_unsized for the "uaddr" field instead of sockaddr. This improves type safety in the BPF cgroup socket address filtering code. The casting in __cgroup_bpf_run_filter_sock_addr() is updated to match the new type, removing an unnecessary cast in the initialization and updating the conditional assignment to use the appropriate sockaddr_unsized cast. Additionally rename the "unspec" variable to "storage" to better align with its usage. No binary changes expected. Signed-off-by: Kees Cook Link: https://patch.msgid.link/20251104002617.2752303-7-kees@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/filter.h | 2 +- kernel/bpf/cgroup.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index f5c859b8131a3..e116de7edc587 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1515,7 +1515,7 @@ static inline int bpf_tell_extensions(void) struct bpf_sock_addr_kern { struct sock *sk; - struct sockaddr *uaddr; + struct sockaddr_unsized *uaddr; /* Temporary "register" to make indirect stores to nested structures * defined above. We need three registers to make such a store, but * only two (src and dst) are available at convert_ctx_access time diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 497aedc9afa1c..69988af44b378 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1673,10 +1673,10 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, { struct bpf_sock_addr_kern ctx = { .sk = sk, - .uaddr = (struct sockaddr *)uaddr, + .uaddr = uaddr, .t_ctx = t_ctx, }; - struct sockaddr_storage unspec; + struct sockaddr_storage storage; struct cgroup *cgrp; int ret; @@ -1688,8 +1688,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, return 0; if (!ctx.uaddr) { - memset(&unspec, 0, sizeof(unspec)); - ctx.uaddr = (struct sockaddr *)&unspec; + memset(&storage, 0, sizeof(storage)); + ctx.uaddr = (struct sockaddr_unsized *)&storage; ctx.uaddrlen = 0; } else { ctx.uaddrlen = *uaddrlen; From 2b5e9f9b7e414c5eeb20dd7a7b80816ff55cf57b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 3 Nov 2025 16:26:16 -0800 Subject: [PATCH 579/867] net: Convert struct sockaddr to fixed-size "sa_data[14]" Revert struct sockaddr from flexible array to fixed 14-byte "sa_data", to solve over 36,000 -Wflex-array-member-not-at-end warnings, since struct sockaddr is embedded within many network structs. With socket/proto sockaddr-based internal APIs switched to use struct sockaddr_unsized, there should be no more uses of struct sockaddr that depend on reading beyond the end of struct sockaddr::sa_data that might trigger bounds checking. Comparing an x86_64 "allyesconfig" vmlinux build before and after this patch showed no new "ud1" instructions from CONFIG_UBSAN_BOUNDS nor any new "field-spanning" memcpy CONFIG_FORTIFY_SOURCE instrumentations. Cc: Gustavo A. R. Silva Signed-off-by: Kees Cook Link: https://patch.msgid.link/20251104002617.2752303-8-kees@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/socket.h | 6 ++---- net/core/dev.c | 2 +- net/core/dev_ioctl.c | 2 +- net/ipv4/arp.c | 2 +- net/packet/af_packet.c | 10 +++++----- tools/perf/trace/beauty/include/linux/socket.h | 5 +---- 6 files changed, 11 insertions(+), 16 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index 7b1a01be29da8..944027f9765e7 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -32,12 +32,10 @@ typedef __kernel_sa_family_t sa_family_t; * 1003.1g requires sa_family_t and that sa_data is char. */ +/* Deprecated for in-kernel use. Use struct sockaddr_unsized instead. */ struct sockaddr { sa_family_t sa_family; /* address family, AF_xxx */ - union { - char sa_data_min[14]; /* Minimum 14 bytes of protocol address */ - DECLARE_FLEX_ARRAY(char, sa_data); - }; + char sa_data[14]; /* 14 bytes of protocol address */ }; /** diff --git a/net/core/dev.c b/net/core/dev.c index ba39146bbd25f..537aa43edff0e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9973,7 +9973,7 @@ DECLARE_RWSEM(dev_addr_sem); /* "sa" is a true struct sockaddr with limited "sa_data" member. */ int netif_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name) { - size_t size = sizeof(sa->sa_data_min); + size_t size = sizeof(sa->sa_data); struct net_device *dev; int ret = 0; diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index ad54b12d4b4c8..b3ce0fb24a69b 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -596,7 +596,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, if (ifr->ifr_hwaddr.sa_family != dev->type) return -EINVAL; memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, - min(sizeof(ifr->ifr_hwaddr.sa_data_min), + min(sizeof(ifr->ifr_hwaddr.sa_data), (size_t)dev->addr_len)); netdev_lock_ops(dev); call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index f3bfecf8a2341..7f3863daaa407 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1189,7 +1189,7 @@ static int arp_req_get(struct net *net, struct arpreq *r) read_lock_bh(&neigh->lock); memcpy(r->arp_ha.sa_data, neigh->ha, - min(dev->addr_len, sizeof(r->arp_ha.sa_data_min))); + min(dev->addr_len, sizeof(r->arp_ha.sa_data))); r->arp_flags = arp_state_to_flags(neigh); read_unlock_bh(&neigh->lock); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index fccad2a529cc1..494d628d10a51 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3284,7 +3284,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr_unsized *uaddr, { struct sock *sk = sock->sk; struct sockaddr *sa = (struct sockaddr *)uaddr; - char name[sizeof(sa->sa_data_min) + 1]; + char name[sizeof(sa->sa_data) + 1]; /* * Check legality @@ -3295,8 +3295,8 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr_unsized *uaddr, /* uaddr->sa_data comes from the userspace, it's not guaranteed to be * zero-terminated. */ - memcpy(name, sa->sa_data, sizeof(sa->sa_data_min)); - name[sizeof(sa->sa_data_min)] = 0; + memcpy(name, sa->sa_data, sizeof(sa->sa_data)); + name[sizeof(sa->sa_data)] = 0; return packet_do_bind(sk, name, 0, 0); } @@ -3581,11 +3581,11 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; - memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data_min)); + memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data)); rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex)); if (dev) - strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data_min)); + strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); rcu_read_unlock(); return sizeof(*uaddr); diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index 3b262487ec060..77d7c59f5d8b1 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -34,10 +34,7 @@ typedef __kernel_sa_family_t sa_family_t; struct sockaddr { sa_family_t sa_family; /* address family, AF_xxx */ - union { - char sa_data_min[14]; /* Minimum 14 bytes of protocol address */ - DECLARE_FLEX_ARRAY(char, sa_data); - }; + char sa_data[14]; /* 14 bytes of protocol address */ }; struct linger { From 90a88306eb874fe4bbdd860e6c9787f5bbc588b5 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Mon, 3 Nov 2025 10:28:11 -0600 Subject: [PATCH 580/867] net: ethernet: ti: netcp: Standardize knav_dma_open_channel to return NULL on error Make knav_dma_open_channel consistently return NULL on error instead of ERR_PTR. Currently the header include/linux/soc/ti/knav_dma.h returns NULL when the driver is disabled, but the driver implementation does not even return NULL or ERR_PTR on failure, causing inconsistency in the users. This results in a crash in netcp_free_navigator_resources as followed (trimmed): Unhandled fault: alignment exception (0x221) at 0xfffffff2 [fffffff2] *pgd=80000800207003, *pmd=82ffda003, *pte=00000000 Internal error: : 221 [#1] SMP ARM Modules linked in: CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.17.0-rc7 #1 NONE Hardware name: Keystone PC is at knav_dma_close_channel+0x30/0x19c LR is at netcp_free_navigator_resources+0x2c/0x28c [... TRIM...] Call trace: knav_dma_close_channel from netcp_free_navigator_resources+0x2c/0x28c netcp_free_navigator_resources from netcp_ndo_open+0x430/0x46c netcp_ndo_open from __dev_open+0x114/0x29c __dev_open from __dev_change_flags+0x190/0x208 __dev_change_flags from netif_change_flags+0x1c/0x58 netif_change_flags from dev_change_flags+0x38/0xa0 dev_change_flags from ip_auto_config+0x2c4/0x11f0 ip_auto_config from do_one_initcall+0x58/0x200 do_one_initcall from kernel_init_freeable+0x1cc/0x238 kernel_init_freeable from kernel_init+0x1c/0x12c kernel_init from ret_from_fork+0x14/0x38 [... TRIM...] Standardize the error handling by making the function return NULL on all error conditions. The API is used in just the netcp_core.c so the impact is limited. Note, this change, in effect reverts commit 5b6cb43b4d62 ("net: ethernet: ti: netcp_core: return error while dma channel open issue"), but provides a less error prone implementation. Suggested-by: Simon Horman Suggested-by: Jacob Keller Signed-off-by: Nishanth Menon Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20251103162811.3730055-1-nm@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/netcp_core.c | 10 +++++----- drivers/soc/ti/knav_dma.c | 14 +++++++------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 857820657bac5..5ee13db568f08 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1338,10 +1338,10 @@ int netcp_txpipe_open(struct netcp_tx_pipe *tx_pipe) tx_pipe->dma_channel = knav_dma_open_channel(dev, tx_pipe->dma_chan_name, &config); - if (IS_ERR(tx_pipe->dma_channel)) { + if (!tx_pipe->dma_channel) { dev_err(dev, "failed opening tx chan(%s)\n", tx_pipe->dma_chan_name); - ret = PTR_ERR(tx_pipe->dma_channel); + ret = -EINVAL; goto err; } @@ -1359,7 +1359,7 @@ int netcp_txpipe_open(struct netcp_tx_pipe *tx_pipe) return 0; err: - if (!IS_ERR_OR_NULL(tx_pipe->dma_channel)) + if (tx_pipe->dma_channel) knav_dma_close_channel(tx_pipe->dma_channel); tx_pipe->dma_channel = NULL; return ret; @@ -1678,10 +1678,10 @@ static int netcp_setup_navigator_resources(struct net_device *ndev) netcp->rx_channel = knav_dma_open_channel(netcp->netcp_device->device, netcp->dma_chan_name, &config); - if (IS_ERR(netcp->rx_channel)) { + if (!netcp->rx_channel) { dev_err(netcp->ndev_dev, "failed opening rx chan(%s\n", netcp->dma_chan_name); - ret = PTR_ERR(netcp->rx_channel); + ret = -EINVAL; goto fail; } diff --git a/drivers/soc/ti/knav_dma.c b/drivers/soc/ti/knav_dma.c index a25ebe6cd5030..553ae7ee20f16 100644 --- a/drivers/soc/ti/knav_dma.c +++ b/drivers/soc/ti/knav_dma.c @@ -402,7 +402,7 @@ static int of_channel_match_helper(struct device_node *np, const char *name, * @name: slave channel name * @config: dma configuration parameters * - * Returns pointer to appropriate DMA channel on success or error. + * Return: Pointer to appropriate DMA channel on success or NULL on error. */ void *knav_dma_open_channel(struct device *dev, const char *name, struct knav_dma_cfg *config) @@ -414,13 +414,13 @@ void *knav_dma_open_channel(struct device *dev, const char *name, if (!kdev) { pr_err("keystone-navigator-dma driver not registered\n"); - return (void *)-EINVAL; + return NULL; } chan_num = of_channel_match_helper(dev->of_node, name, &instance); if (chan_num < 0) { dev_err(kdev->dev, "No DMA instance with name %s\n", name); - return (void *)-EINVAL; + return NULL; } dev_dbg(kdev->dev, "initializing %s channel %d from DMA %s\n", @@ -431,7 +431,7 @@ void *knav_dma_open_channel(struct device *dev, const char *name, if (config->direction != DMA_MEM_TO_DEV && config->direction != DMA_DEV_TO_MEM) { dev_err(kdev->dev, "bad direction\n"); - return (void *)-EINVAL; + return NULL; } /* Look for correct dma instance */ @@ -443,7 +443,7 @@ void *knav_dma_open_channel(struct device *dev, const char *name, } if (!dma) { dev_err(kdev->dev, "No DMA instance with name %s\n", instance); - return (void *)-EINVAL; + return NULL; } /* Look for correct dma channel from dma instance */ @@ -463,14 +463,14 @@ void *knav_dma_open_channel(struct device *dev, const char *name, if (!chan) { dev_err(kdev->dev, "channel %d is not in DMA %s\n", chan_num, instance); - return (void *)-EINVAL; + return NULL; } if (atomic_read(&chan->ref_count) >= 1) { if (!check_config(chan, config)) { dev_err(kdev->dev, "channel %d config miss-match\n", chan_num); - return (void *)-EINVAL; + return NULL; } } From 327c20c21d80e0d87834b392d83ae73c955ad8ff Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 3 Nov 2025 08:38:17 -0800 Subject: [PATCH 581/867] netpoll: Fix deadlock in memory allocation under spinlock Fix a AA deadlock in refill_skbs() where memory allocation while holding skb_pool->lock can trigger a recursive lock acquisition attempt. The deadlock scenario occurs when the system is under severe memory pressure: 1. refill_skbs() acquires skb_pool->lock (spinlock) 2. alloc_skb() is called while holding the lock 3. Memory allocator fails and calls slab_out_of_memory() 4. This triggers printk() for the OOM warning 5. The console output path calls netpoll_send_udp() 6. netpoll_send_udp() attempts to acquire the same skb_pool->lock 7. Deadlock: the lock is already held by the same CPU Call stack: refill_skbs() spin_lock_irqsave(&skb_pool->lock) <- lock acquired __alloc_skb() kmem_cache_alloc_node_noprof() slab_out_of_memory() printk() console_flush_all() netpoll_send_udp() skb_dequeue() spin_lock_irqsave(&skb_pool->lock) <- deadlock attempt This bug was exposed by commit 248f6571fd4c51 ("netpoll: Optimize skb refilling on critical path") which removed refill_skbs() from the critical path (where nested printk was being deferred), letting nested printk being called from inside refill_skbs() Refactor refill_skbs() to never allocate memory while holding the spinlock. Another possible solution to fix this problem is protecting the refill_skbs() from nested printks, basically calling printk_deferred_{enter,exit}() in refill_skbs(), then, any nested pr_warn() would be deferred. I prefer this approach, given I _think_ it might be a good idea to move the alloc_skb() from GFP_ATOMIC to GFP_KERNEL in the future, so, having the alloc_skb() outside of the lock will be necessary step. There is a possible TOCTOU issue when checking for the pool length, and queueing the new allocated skb, but, this is not an issue, given that an extra SKB in the pool is harmless and it will be eventually used. Signed-off-by: Breno Leitao Fixes: 248f6571fd4c51 ("netpoll: Optimize skb refilling on critical path") Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251103-fix_netpoll_aa-v4-1-4cfecdf6da7c@debian.org Signed-off-by: Jakub Kicinski --- net/core/netpoll.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 60a05d3b7c249..c85f740065fc6 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -228,19 +228,16 @@ static void refill_skbs(struct netpoll *np) { struct sk_buff_head *skb_pool; struct sk_buff *skb; - unsigned long flags; skb_pool = &np->skb_pool; - spin_lock_irqsave(&skb_pool->lock, flags); - while (skb_pool->qlen < MAX_SKBS) { + while (READ_ONCE(skb_pool->qlen) < MAX_SKBS) { skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC); if (!skb) break; - __skb_queue_tail(skb_pool, skb); + skb_queue_tail(skb_pool, skb); } - spin_unlock_irqrestore(&skb_pool->lock, flags); } static void zap_completion_queue(void) From c74619e7602e88a0239cd4999571dd31081e9adf Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Mon, 3 Nov 2025 09:24:36 +0100 Subject: [PATCH 582/867] wifi: mac80211_hwsim: Limit destroy_on_close radio removal to netgroup hwsim radios marked destroy_on_close are removed when the Netlink socket that created them is closed. As the portid is not unique across network namespaces, closing a socket in one namespace may remove radios in another if it has the destroy_on_close flag set. Instead of matching the network namespace, match the netgroup of the radio to limit radio removal to those that have been created by the closing Netlink socket. The netgroup of a radio identifies the network namespace it was created in, and matching on it removes a destroy_on_close radio even if it has been moved to another namespace. Fixes: 100cb9ff40e0 ("mac80211_hwsim: Allow managing radios from non-initial namespaces") Signed-off-by: Martin Willi Link: https://patch.msgid.link/20251103082436.30483-1-martin@strongswan.org Signed-off-by: Johannes Berg --- drivers/net/wireless/virtual/mac80211_hwsim.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index 9f856042a67af..d28bf18d57eca 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -6698,14 +6698,15 @@ static struct genl_family hwsim_genl_family __ro_after_init = { .n_mcgrps = ARRAY_SIZE(hwsim_mcgrps), }; -static void remove_user_radios(u32 portid) +static void remove_user_radios(u32 portid, int netgroup) { struct mac80211_hwsim_data *entry, *tmp; LIST_HEAD(list); spin_lock_bh(&hwsim_radio_lock); list_for_each_entry_safe(entry, tmp, &hwsim_radios, list) { - if (entry->destroy_on_close && entry->portid == portid) { + if (entry->destroy_on_close && entry->portid == portid && + entry->netgroup == netgroup) { list_move(&entry->list, &list); rhashtable_remove_fast(&hwsim_radios_rht, &entry->rht, hwsim_rht_params); @@ -6730,7 +6731,7 @@ static int mac80211_hwsim_netlink_notify(struct notifier_block *nb, if (state != NETLINK_URELEASE) return NOTIFY_DONE; - remove_user_radios(notify->portid); + remove_user_radios(notify->portid, hwsim_net_get_netgroup(notify->net)); if (notify->portid == hwsim_net_get_wmediumd(notify->net)) { printk(KERN_INFO "mac80211_hwsim: wmediumd released netlink" From b1d16f7c0063b7209fd3251ce40c77d37b477b83 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Tue, 4 Nov 2025 09:23:31 -0800 Subject: [PATCH 583/867] libie: depend on DEBUG_FS when building LIBIE_FWLOG LIBIE_FWLOG is unusable without DEBUG_FS. Mark it in Kconfig. Fix build error on ixgbe when DEBUG_FS is not set. To not add another layer of #if IS_ENABLED(LIBIE_FWLOG) in ixgbe fwlog code define debugfs dentry even when DEBUG_FS isn't enabled. In this case the dummy functions of LIBIE_FWLOG will be used, so not initialized dentry isn't a problem. Fixes: 641585bc978e ("ixgbe: fwlog support for e610") Reported-by: Guenter Roeck Closes: https://lore.kernel.org/lkml/f594c621-f9e1-49f2-af31-23fbcb176058@roeck-us.net/ Signed-off-by: Michal Swiatkowski Reviewed-by: Simon Horman Reviewed-by: Aleksandr Loktionov Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20251104172333.752445-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/Kconfig | 4 ++-- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 2 -- include/linux/net/intel/libie/fwlog.h | 12 ++++++++++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index a563a94e27802..122ee23497e6a 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -146,7 +146,7 @@ config IXGBE tristate "Intel(R) 10GbE PCI Express adapters support" depends on PCI depends on PTP_1588_CLOCK_OPTIONAL - select LIBIE_FWLOG + select LIBIE_FWLOG if DEBUG_FS select MDIO select NET_DEVLINK select PLDMFW @@ -298,7 +298,7 @@ config ICE select DIMLIB select LIBIE select LIBIE_ADMINQ - select LIBIE_FWLOG + select LIBIE_FWLOG if DEBUG_FS select NET_DEVLINK select PACKING select PLDMFW diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 14d2752701238..dce4936708eb4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -821,9 +821,7 @@ struct ixgbe_adapter { #ifdef CONFIG_IXGBE_HWMON struct hwmon_buff *ixgbe_hwmon_buff; #endif /* CONFIG_IXGBE_HWMON */ -#ifdef CONFIG_DEBUG_FS struct dentry *ixgbe_dbg_adapter; -#endif /*CONFIG_DEBUG_FS*/ u8 default_up; /* Bitmask indicating in use pools */ diff --git a/include/linux/net/intel/libie/fwlog.h b/include/linux/net/intel/libie/fwlog.h index 36b13fabca9ec..7273c78c826b4 100644 --- a/include/linux/net/intel/libie/fwlog.h +++ b/include/linux/net/intel/libie/fwlog.h @@ -78,8 +78,20 @@ struct libie_fwlog { ); }; +#if IS_ENABLED(CONFIG_LIBIE_FWLOG) int libie_fwlog_init(struct libie_fwlog *fwlog, struct libie_fwlog_api *api); void libie_fwlog_deinit(struct libie_fwlog *fwlog); void libie_fwlog_reregister(struct libie_fwlog *fwlog); void libie_get_fwlog_data(struct libie_fwlog *fwlog, u8 *buf, u16 len); +#else +static inline int libie_fwlog_init(struct libie_fwlog *fwlog, + struct libie_fwlog_api *api) +{ + return -EOPNOTSUPP; +} +static inline void libie_fwlog_deinit(struct libie_fwlog *fwlog) { } +static inline void libie_fwlog_reregister(struct libie_fwlog *fwlog) { } +static inline void libie_get_fwlog_data(struct libie_fwlog *fwlog, u8 *buf, + u16 len) { } +#endif /* CONFIG_LIBIE_FWLOG */ #endif /* _LIBIE_FWLOG_H_ */ From dfb073d32cac28cdb597d16d1e0589b25c841661 Mon Sep 17 00:00:00 2001 From: Tim Hostetler Date: Tue, 4 Nov 2025 14:59:15 -0800 Subject: [PATCH 584/867] ptp: Return -EINVAL on ptp_clock_register if required ops are NULL ptp_clock should never be registered unless it stubs one of gettimex64() or gettime64() and settime64(). WARN_ON_ONCE and error out if either set of function pointers is null. For consistency, n_alarm validation is also folded into the WARN_ON_ONCE. Suggested-by: Kuniyuki Iwashima Reviewed-by: Kuniyuki Iwashima Reviewed-by: Harshitha Ramamurthy Reviewed-by: Vadim Fedorenko Signed-off-by: Tim Hostetler Acked-by: Richard Cochran Link: https://patch.msgid.link/20251104225915.2040080-1-thostet@google.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_clock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index ef020599b7711..b0e167c0b3ebd 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -322,7 +322,9 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, char debugfsname[16]; size_t size; - if (info->n_alarm > PTP_MAX_ALARMS) + if (WARN_ON_ONCE(info->n_alarm > PTP_MAX_ALARMS || + (!info->gettimex64 && !info->gettime64) || + !info->settime64)) return ERR_PTR(-EINVAL); /* Initialize a clock structure. */ From d917c217b612971ea05ae1582e8740b747e0e7e8 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 4 Nov 2025 16:34:35 +0100 Subject: [PATCH 585/867] net: gro_cells: Reduce lock scope in gro_cell_poll One GRO-cell device's NAPI callback can nest into the GRO-cell of another device if the underlying device is also using GRO-cell. This is the case for IPsec over vxlan. These two GRO-cells are separate devices. From lockdep's point of view it is the same because each device is sharing the same lock class and so it reports a possible deadlock assuming one device is nesting into itself. Hold the bh_lock only while accessing gro_cell::napi_skbs in gro_cell_poll(). This reduces the locking scope and avoids acquiring the same lock class multiple times. Fixes: 25718fdcbdd2 ("net: gro_cells: Use nested-BH locking for gro_cell") Reported-by: Gal Pressman Closes: https://lore.kernel.org/all/66664116-edb8-48dc-ad72-d5223696dd19@nvidia.com/ Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20251104153435.ty88xDQt@linutronix.de Signed-off-by: Jakub Kicinski --- net/core/gro_cells.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index fd57b845de333..a725d21159a6f 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -60,9 +60,10 @@ static int gro_cell_poll(struct napi_struct *napi, int budget) struct sk_buff *skb; int work_done = 0; - __local_lock_nested_bh(&cell->bh_lock); while (work_done < budget) { + __local_lock_nested_bh(&cell->bh_lock); skb = __skb_dequeue(&cell->napi_skbs); + __local_unlock_nested_bh(&cell->bh_lock); if (!skb) break; napi_gro_receive(napi, skb); @@ -71,7 +72,6 @@ static int gro_cell_poll(struct napi_struct *napi, int budget) if (work_done < budget) napi_complete_done(napi, work_done); - __local_unlock_nested_bh(&cell->bh_lock); return work_done; } From d1c94bc5b90c21b65469d30d4a6bc8ed715c1bfe Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 4 Nov 2025 16:15:36 +0200 Subject: [PATCH 586/867] net/mlx5e: Fix return value in case of module EEPROM read error mlx5e_get_module_eeprom_by_page() has weird error handling. First, it is treating -EINVAL as a special case, but it is unclear why. Second, it tries to fail "gracefully" by returning the number of bytes read even in case of an error. This results in wrongly returning success (0 return value) if the error occurs before any bytes were read. Simplify the error handling by returning an error when such occurs. This also aligns with the error handling we have in mlx5e_get_module_eeprom() for the old API. This fixes the following case where the query fails, but userspace ethtool wrongly treats it as success and dumps an output: # ethtool -m eth2 netlink warning: mlx5_core: Query module eeprom by page failed, read 0 bytes, err -5 netlink warning: mlx5_core: Query module eeprom by page failed, read 0 bytes, err -5 Offset Values ------ ------ 0x0000: 00 00 00 00 05 00 04 00 00 00 00 00 05 00 05 00 0x0010: 00 00 00 00 05 00 06 00 50 00 00 00 67 65 20 66 0x0020: 61 69 6c 65 64 2c 20 72 65 61 64 20 30 20 62 79 0x0030: 74 65 73 2c 20 65 72 72 20 2d 35 00 14 00 03 00 0x0040: 08 00 01 00 03 00 00 00 08 00 02 00 1a 00 00 00 0x0050: 14 00 04 00 08 00 01 00 04 00 00 00 08 00 02 00 0x0060: 0e 00 00 00 14 00 05 00 08 00 01 00 05 00 00 00 0x0070: 08 00 02 00 1a 00 00 00 14 00 06 00 08 00 01 00 Fixes: e109d2b204da ("net/mlx5: Implement get_module_eeprom_by_page()") Signed-off-by: Gal Pressman Reviewed-by: Alex Lazar Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/1762265736-1028868-1-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 53e5ae252eac5..893e1380a7c97 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -2125,14 +2125,12 @@ static int mlx5e_get_module_eeprom_by_page(struct net_device *netdev, if (!size_read) return i; - if (size_read == -EINVAL) - return -EINVAL; if (size_read < 0) { NL_SET_ERR_MSG_FMT_MOD( extack, "Query module eeprom by page failed, read %u bytes, err %d", i, size_read); - return i; + return size_read; } i += size_read; From ae4789affd1e181ae46e72e2b5fbe2d6d7b6616a Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 4 Nov 2025 16:14:15 +0530 Subject: [PATCH 587/867] net: ti: icssg-prueth: Fix fdb hash size configuration The ICSSG driver does the initial FDB configuration which includes setting the control registers. Other run time management like learning is managed by the PRU's. The default FDB hash size used by the firmware is 512 slots, which is currently missing in the current driver. Update the driver FDB config to include FDB hash size as well. Please refer trm [1] 6.4.14.12.17 section on how the FDB config register gets configured. From the table 6-1404, there is a reset field for FDB_HAS_SIZE which is 4, meaning 1024 slots. Currently the driver is not updating this reset value from 4(1024 slots) to 3(512 slots). This patch fixes this by updating the reset value to 512 slots. [1]: https://www.ti.com/lit/pdf/spruim2 Fixes: abd5576b9c57f ("net: ti: icssg-prueth: Add support for ICSSG switch firmware") Signed-off-by: Meghana Malladi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251104104415.3110537-1-m-malladi@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_config.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/ti/icssg/icssg_config.c b/drivers/net/ethernet/ti/icssg/icssg_config.c index da53eb04b0a43..3f8237c17d099 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_config.c +++ b/drivers/net/ethernet/ti/icssg/icssg_config.c @@ -66,6 +66,9 @@ #define FDB_GEN_CFG1 0x60 #define SMEM_VLAN_OFFSET 8 #define SMEM_VLAN_OFFSET_MASK GENMASK(25, 8) +#define FDB_HASH_SIZE_MASK GENMASK(6, 3) +#define FDB_HASH_SIZE_SHIFT 3 +#define FDB_HASH_SIZE 3 #define FDB_GEN_CFG2 0x64 #define FDB_VLAN_EN BIT(6) @@ -463,6 +466,8 @@ void icssg_init_emac_mode(struct prueth *prueth) /* Set VLAN TABLE address base */ regmap_update_bits(prueth->miig_rt, FDB_GEN_CFG1, SMEM_VLAN_OFFSET_MASK, addr << SMEM_VLAN_OFFSET); + regmap_update_bits(prueth->miig_rt, FDB_GEN_CFG1, FDB_HASH_SIZE_MASK, + FDB_HASH_SIZE << FDB_HASH_SIZE_SHIFT); /* Set enable VLAN aware mode, and FDBs for all PRUs */ regmap_write(prueth->miig_rt, FDB_GEN_CFG2, (FDB_PRU0_EN | FDB_PRU1_EN | FDB_HOST_EN)); prueth->vlan_tbl = (struct prueth_vlan_tbl __force *)(prueth->shram.va + @@ -484,6 +489,8 @@ void icssg_init_fw_offload_mode(struct prueth *prueth) /* Set VLAN TABLE address base */ regmap_update_bits(prueth->miig_rt, FDB_GEN_CFG1, SMEM_VLAN_OFFSET_MASK, addr << SMEM_VLAN_OFFSET); + regmap_update_bits(prueth->miig_rt, FDB_GEN_CFG1, FDB_HASH_SIZE_MASK, + FDB_HASH_SIZE << FDB_HASH_SIZE_SHIFT); /* Set enable VLAN aware mode, and FDBs for all PRUs */ regmap_write(prueth->miig_rt, FDB_GEN_CFG2, FDB_EN_ALL); prueth->vlan_tbl = (struct prueth_vlan_tbl __force *)(prueth->shram.va + From 665a7e13c220bbde55531a24bd5524320648df10 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 4 Nov 2025 08:48:33 +0200 Subject: [PATCH 588/867] net/mlx5e: SHAMPO, Fix header mapping for 64K pages HW-GRO is broken on mlx5 for 64K page sizes. The patch in the fixes tag didn't take into account larger page sizes when doing an align down of max_ksm_entries. For 64K page size, max_ksm_entries is 0 which will skip mapping header pages via WQE UMR. This breaks header-data split and will result in the following syndrome: mlx5_core 0000:00:08.0 eth2: Error cqe on cqn 0x4c9, ci 0x0, qn 0x1133, opcode 0xe, syndrome 0x4, vendor syndrome 0x32 00000000: 00 00 00 00 04 4a 00 00 00 00 00 00 20 00 93 32 00000010: 55 00 00 00 fb cc 00 00 00 00 00 00 07 18 00 00 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 4a 00000030: 00 00 3b c7 93 01 32 04 00 00 00 00 00 00 bf e0 mlx5_core 0000:00:08.0 eth2: ERR CQE on RQ: 0x1133 Furthermore, the function that fills in WQE UMRs for the headers (mlx5e_build_shampo_hd_umr()) only supports mapping page sizes that fit in a single UMR WQE. This patch goes back to the old non-aligned max_ksm_entries value and it changes mlx5e_build_shampo_hd_umr() to support mapping a large page over multiple UMR WQEs. This means that mlx5e_build_shampo_hd_umr() can now leave a page only partially mapped. The caller, mlx5e_alloc_rx_hd_mpwqe(), ensures that there are enough UMR WQEs to cover complete pages by working on ksm_entries that are multiples of MLX5E_SHAMPO_WQ_HEADER_PER_PAGE. Fixes: 8a0ee54027b1 ("net/mlx5e: SHAMPO, Simplify UMR allocation for headers") Signed-off-by: Dragos Tatulea Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/1762238915-1027590-2-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 36 +++++++++---------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 26621a2972ec2..0c031954ca30a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -671,7 +671,7 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, u16 pi, header_offset, err, wqe_bbs; u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey; struct mlx5e_umr_wqe *umr_wqe; - int headroom, i = 0; + int headroom, i; headroom = rq->buff.headroom; wqe_bbs = MLX5E_KSM_UMR_WQEBBS(ksm_entries); @@ -679,25 +679,24 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); build_ksm_umr(sq, umr_wqe, shampo->mkey_be, index, ksm_entries); - WARN_ON_ONCE(ksm_entries & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)); - while (i < ksm_entries) { - struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, index); + for (i = 0; i < ksm_entries; i++, index++) { + struct mlx5e_frag_page *frag_page; u64 addr; - err = mlx5e_page_alloc_fragmented(rq->hd_page_pool, frag_page); - if (unlikely(err)) - goto err_unmap; + frag_page = mlx5e_shampo_hd_to_frag_page(rq, index); + header_offset = mlx5e_shampo_hd_offset(index); + if (!header_offset) { + err = mlx5e_page_alloc_fragmented(rq->hd_page_pool, + frag_page); + if (err) + goto err_unmap; + } addr = page_pool_get_dma_addr_netmem(frag_page->netmem); - - for (int j = 0; j < MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; j++) { - header_offset = mlx5e_shampo_hd_offset(index++); - - umr_wqe->inline_ksms[i++] = (struct mlx5_ksm) { - .key = cpu_to_be32(lkey), - .va = cpu_to_be64(addr + header_offset + headroom), - }; - } + umr_wqe->inline_ksms[i] = (struct mlx5_ksm) { + .key = cpu_to_be32(lkey), + .va = cpu_to_be64(addr + header_offset + headroom), + }; } sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { @@ -713,7 +712,7 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, return 0; err_unmap: - while (--i) { + while (--i >= 0) { --index; header_offset = mlx5e_shampo_hd_offset(index); if (!header_offset) { @@ -735,8 +734,7 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) struct mlx5e_icosq *sq = rq->icosq; int i, err, max_ksm_entries, len; - max_ksm_entries = ALIGN_DOWN(MLX5E_MAX_KSM_PER_WQE(rq->mdev), - MLX5E_SHAMPO_WQ_HEADER_PER_PAGE); + max_ksm_entries = MLX5E_MAX_KSM_PER_WQE(rq->mdev); ksm_entries = bitmap_find_window(shampo->bitmap, shampo->hd_per_wqe, shampo->hd_per_wq, shampo->pi); From bacd8d80181ebe34b599a39aa26bf73a44c91e55 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 4 Nov 2025 08:48:34 +0200 Subject: [PATCH 589/867] net/mlx5e: SHAMPO, Fix skb size check for 64K pages mlx5e_hw_gro_skb_has_enough_space() uses a formula to check if there is enough space in the skb frags to store more data. This formula is incorrect for 64K page sizes and it triggers early GRO session termination because the first fragment will blow up beyond GRO_LEGACY_MAX_SIZE. This patch adds a special case for page sizes >= GRO_LEGACY_MAX_SIZE (64K) which uses the skb->len instead. Within this context, the check is safe from fragment overflow because the hardware will continuously fill the data up to the reservation size of 64K and the driver will coalesce all data from the same page to the same fragment. This means that the data will span one fragment or at most two for such a large page size. It is expected that the if statement will be optimized out as the check is done with constants. Fixes: 92552d3abd32 ("net/mlx5e: HW_GRO cqe handler implementation") Signed-off-by: Dragos Tatulea Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/1762238915-1027590-3-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 0c031954ca30a..f2a06752ce375 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -2354,7 +2354,10 @@ mlx5e_hw_gro_skb_has_enough_space(struct sk_buff *skb, u16 data_bcnt) { int nr_frags = skb_shinfo(skb)->nr_frags; - return PAGE_SIZE * nr_frags + data_bcnt <= GRO_LEGACY_MAX_SIZE; + if (PAGE_SIZE >= GRO_LEGACY_MAX_SIZE) + return skb->len + data_bcnt <= GRO_LEGACY_MAX_SIZE; + else + return PAGE_SIZE * nr_frags + data_bcnt <= GRO_LEGACY_MAX_SIZE; } static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) From d8a7ed9586c7579a99e9e2d90988c9eceeee61ff Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 4 Nov 2025 08:48:35 +0200 Subject: [PATCH 590/867] net/mlx5e: SHAMPO, Fix header formulas for higher MTUs and 64K pages The MLX5E_SHAMPO_WQ_HEADER_PER_PAGE and MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE macros are used directly in several places under the assumption that there will always be more headers per WQE than headers per page. However, this assumption doesn't hold for 64K page sizes and higher MTUs (> 4K). This can be first observed during header page allocation: ksm_entries will become 0 during alignment to MLX5E_SHAMPO_WQ_HEADER_PER_PAGE. This patch introduces 2 additional members to the mlx5e_shampo_hd struct which are meant to be used instead of the macrose mentioned above. When the number of headers per WQE goes below MLX5E_SHAMPO_WQ_HEADER_PER_PAGE, clamp the number of headers per page and expand the header size accordingly so that the headers for one WQE cover a full page. All the formulas are adapted to use these two new members. Fixes: 945ca432bfd0 ("net/mlx5e: SHAMPO, Drop info array") Signed-off-by: Dragos Tatulea Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/1762238915-1027590-4-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 ++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 24 +++++++++++--- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 33 +++++++++++-------- 3 files changed, 41 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 14e3207b14e74..a163f81f07c13 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -634,7 +634,10 @@ struct mlx5e_dma_info { struct mlx5e_shampo_hd { struct mlx5e_frag_page *pages; u32 hd_per_wq; + u32 hd_per_page; u16 hd_per_wqe; + u8 log_hd_per_page; + u8 log_hd_entry_size; unsigned long *bitmap; u16 pi; u16 ci; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9c46511e7b437..6023bbbf3f39c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -791,8 +791,9 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, int node) { void *wqc = MLX5_ADDR_OF(rqc, rqp->rqc, wq); + u8 log_hd_per_page, log_hd_entry_size; + u16 hd_per_wq, hd_per_wqe; u32 hd_pool_size; - u16 hd_per_wq; int wq_size; int err; @@ -815,11 +816,24 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, if (err) goto err_umr_mkey; - rq->mpwqe.shampo->hd_per_wqe = - mlx5e_shampo_hd_per_wqe(mdev, params, rqp); + hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rqp); wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); - hd_pool_size = (rq->mpwqe.shampo->hd_per_wqe * wq_size) / - MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; + + BUILD_BUG_ON(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE > PAGE_SHIFT); + if (hd_per_wqe >= MLX5E_SHAMPO_WQ_HEADER_PER_PAGE) { + log_hd_per_page = MLX5E_SHAMPO_LOG_WQ_HEADER_PER_PAGE; + log_hd_entry_size = MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE; + } else { + log_hd_per_page = order_base_2(hd_per_wqe); + log_hd_entry_size = order_base_2(PAGE_SIZE / hd_per_wqe); + } + + rq->mpwqe.shampo->hd_per_wqe = hd_per_wqe; + rq->mpwqe.shampo->hd_per_page = BIT(log_hd_per_page); + rq->mpwqe.shampo->log_hd_per_page = log_hd_per_page; + rq->mpwqe.shampo->log_hd_entry_size = log_hd_entry_size; + + hd_pool_size = (hd_per_wqe * wq_size) >> log_hd_per_page; if (netif_rxq_has_unreadable_mp(rq->netdev, rq->ix)) { /* Separate page pool for shampo headers */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index f2a06752ce375..687cf123211d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -648,17 +648,20 @@ static void build_ksm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe, umr_wqe->hdr.uctrl.mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); } -static struct mlx5e_frag_page *mlx5e_shampo_hd_to_frag_page(struct mlx5e_rq *rq, int header_index) +static struct mlx5e_frag_page *mlx5e_shampo_hd_to_frag_page(struct mlx5e_rq *rq, + int header_index) { - BUILD_BUG_ON(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE > PAGE_SHIFT); + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; - return &rq->mpwqe.shampo->pages[header_index >> MLX5E_SHAMPO_LOG_WQ_HEADER_PER_PAGE]; + return &shampo->pages[header_index >> shampo->log_hd_per_page]; } -static u64 mlx5e_shampo_hd_offset(int header_index) +static u64 mlx5e_shampo_hd_offset(struct mlx5e_rq *rq, int header_index) { - return (header_index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) << - MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE; + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + u32 hd_per_page = shampo->hd_per_page; + + return (header_index & (hd_per_page - 1)) << shampo->log_hd_entry_size; } static void mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index); @@ -684,7 +687,7 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, u64 addr; frag_page = mlx5e_shampo_hd_to_frag_page(rq, index); - header_offset = mlx5e_shampo_hd_offset(index); + header_offset = mlx5e_shampo_hd_offset(rq, index); if (!header_offset) { err = mlx5e_page_alloc_fragmented(rq->hd_page_pool, frag_page); @@ -714,7 +717,7 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, err_unmap: while (--i >= 0) { --index; - header_offset = mlx5e_shampo_hd_offset(index); + header_offset = mlx5e_shampo_hd_offset(rq, index); if (!header_offset) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, index); @@ -738,7 +741,7 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) ksm_entries = bitmap_find_window(shampo->bitmap, shampo->hd_per_wqe, shampo->hd_per_wq, shampo->pi); - ksm_entries = ALIGN_DOWN(ksm_entries, MLX5E_SHAMPO_WQ_HEADER_PER_PAGE); + ksm_entries = ALIGN_DOWN(ksm_entries, shampo->hd_per_page); if (!ksm_entries) return 0; @@ -856,7 +859,7 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index) { struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; - if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) { + if (((header_index + 1) & (shampo->hd_per_page - 1)) == 0) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); mlx5e_page_release_fragmented(rq->hd_page_pool, frag_page); @@ -1223,9 +1226,10 @@ static unsigned int mlx5e_lro_update_hdr(struct sk_buff *skb, static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); - u16 head_offset = mlx5e_shampo_hd_offset(header_index) + rq->buff.headroom; + u16 head_offset = mlx5e_shampo_hd_offset(rq, header_index); + void *addr = netmem_address(frag_page->netmem); - return netmem_address(frag_page->netmem) + head_offset; + return addr + head_offset + rq->buff.headroom; } static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4) @@ -2265,7 +2269,8 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, struct mlx5_cqe64 *cqe, u16 header_index) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); - u16 head_offset = mlx5e_shampo_hd_offset(header_index); + u16 head_offset = mlx5e_shampo_hd_offset(rq, header_index); + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; u16 head_size = cqe->shampo.header_size; u16 rx_headroom = rq->buff.headroom; struct sk_buff *skb = NULL; @@ -2281,7 +2286,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, data = hdr + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + head_size); - if (likely(frag_size <= BIT(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE))) { + if (likely(frag_size <= BIT(shampo->log_hd_entry_size))) { /* build SKB around header */ dma_sync_single_range_for_cpu(rq->pdev, dma_addr, 0, frag_size, rq->buff.map_dir); net_prefetchw(hdr); From a04ea57aae375bdda1cb57034d8bcbb351e1f973 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 4 Nov 2025 14:23:21 +0800 Subject: [PATCH 591/867] net: libwx: fix device bus LAN ID The device bus LAN ID was obtained from PCI_FUNC(), but when a PF port is passthrough to a virtual machine, the function number may not match the actual port index on the device. This could cause the driver to perform operations such as LAN reset on the wrong port. Fix this by reading the LAN ID from port status register. Fixes: a34b3e6ed8fb ("net: txgbe: Store PCI info") Cc: stable@vger.kernel.org Signed-off-by: Jiawen Wu Reviewed-by: Simon Horman Link: https://patch.msgid.link/B60A670C1F52CB8E+20251104062321.40059-1-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_hw.c | 3 ++- drivers/net/ethernet/wangxun/libwx/wx_type.h | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index 1e2713f0c9212..b37d6cfbfbe94 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -2427,7 +2427,8 @@ int wx_sw_init(struct wx *wx) wx->oem_svid = pdev->subsystem_vendor; wx->oem_ssid = pdev->subsystem_device; wx->bus.device = PCI_SLOT(pdev->devfn); - wx->bus.func = PCI_FUNC(pdev->devfn); + wx->bus.func = FIELD_GET(WX_CFG_PORT_ST_LANID, + rd32(wx, WX_CFG_PORT_ST)); if (wx->oem_svid == PCI_VENDOR_ID_WANGXUN || pdev->is_virtfn) { diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index d89b9b8a0a2ce..2f8319e031820 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -97,6 +97,8 @@ #define WX_CFG_PORT_CTL_DRV_LOAD BIT(3) #define WX_CFG_PORT_CTL_QINQ BIT(2) #define WX_CFG_PORT_CTL_D_VLAN BIT(0) /* double vlan*/ +#define WX_CFG_PORT_ST 0x14404 +#define WX_CFG_PORT_ST_LANID GENMASK(9, 8) #define WX_CFG_TAG_TPID(_i) (0x14430 + ((_i) * 4)) #define WX_CFG_PORT_CTL_NUM_VT_MASK GENMASK(13, 12) /* number of TVs */ @@ -557,8 +559,6 @@ enum WX_MSCA_CMD_value { #define TXD_USE_COUNT(S) DIV_ROUND_UP((S), WX_MAX_DATA_PER_TXD) #define DESC_NEEDED (MAX_SKB_FRAGS + 4) -#define WX_CFG_PORT_ST 0x14404 - /******************* Receive Descriptor bit definitions **********************/ #define WX_RXD_STAT_DD BIT(0) /* Done */ #define WX_RXD_STAT_EOP BIT(1) /* End of Packet */ From c79a022524577e486220bc9627ccebc706148c1f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 31 Oct 2025 16:05:01 +0300 Subject: [PATCH 592/867] net: dsa: microchip: Fix a link check in ksz9477_pcs_read() The BMSR_LSTATUS define is 0x4 but the "p->phydev.link" variable is a 1 bit bitfield in a u32. Since 4 doesn't fit in 0-1 range it means that ".link" is always set to false. Add a !! to fix this. [Jakub: According to Maxime the phydev struct isn't really used and we should consider removing it completely. So not treating this as a fix.] Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/aQSz_euUg0Ja8ZaH@stanley.mountain Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz9477.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index d747ea1c41a79..cf67d63777194 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -244,7 +244,7 @@ static int ksz9477_pcs_read(struct mii_bus *bus, int phy, int mmd, int reg) p->phydev.link = 0; } } else if (reg == MII_BMSR) { - p->phydev.link = (val & BMSR_LSTATUS); + p->phydev.link = !!(val & BMSR_LSTATUS); } } From 4d6ec3a7932ca5b168426f7b5b40abab2b41d2da Mon Sep 17 00:00:00 2001 From: Haotian Zhang Date: Wed, 5 Nov 2025 11:47:16 +0800 Subject: [PATCH 593/867] net: wan: framer: pef2256: Switch to devm_mfd_add_devices() The driver calls mfd_add_devices() but fails to call mfd_remove_devices() in error paths after successful MFD device registration and in the remove function. This leads to resource leaks where MFD child devices are not properly unregistered. Replace mfd_add_devices with devm_mfd_add_devices to automatically manage the device resources. Fixes: c96e976d9a05 ("net: wan: framer: Add support for the Lantiq PEF2256 framer") Suggested-by: Herve Codina Signed-off-by: Haotian Zhang Acked-by: Herve Codina Link: https://patch.msgid.link/20251105034716.662-1-vulab@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/wan/framer/pef2256/pef2256.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wan/framer/pef2256/pef2256.c b/drivers/net/wan/framer/pef2256/pef2256.c index c5501826db1eb..c058cc79137dd 100644 --- a/drivers/net/wan/framer/pef2256/pef2256.c +++ b/drivers/net/wan/framer/pef2256/pef2256.c @@ -648,7 +648,8 @@ static int pef2256_add_audio_devices(struct pef2256 *pef2256) audio_devs[i].id = i; } - ret = mfd_add_devices(pef2256->dev, 0, audio_devs, count, NULL, 0, NULL); + ret = devm_mfd_add_devices(pef2256->dev, 0, audio_devs, count, + NULL, 0, NULL); kfree(audio_devs); return ret; } @@ -822,8 +823,8 @@ static int pef2256_probe(struct platform_device *pdev) platform_set_drvdata(pdev, pef2256); - ret = mfd_add_devices(pef2256->dev, 0, pef2256_devs, - ARRAY_SIZE(pef2256_devs), NULL, 0, NULL); + ret = devm_mfd_add_devices(pef2256->dev, 0, pef2256_devs, + ARRAY_SIZE(pef2256_devs), NULL, 0, NULL); if (ret) { dev_err(pef2256->dev, "add devices failed (%d)\n", ret); return ret; From 93d46ea3e984323fae0e5d2919cf5817e1297d41 Mon Sep 17 00:00:00 2001 From: Rohan G Thomas Date: Sat, 1 Nov 2025 01:27:07 +0800 Subject: [PATCH 594/867] net: stmmac: socfpga: Agilex5 EMAC platform configuration Agilex5 HPS EMAC uses the dwxgmac-3.10a IP, unlike previous socfpga platforms which use dwmac1000 IP. Due to differences in platform configuration, Agilex5 requires a distinct setup. Introduce a setup_plat_dat() callback in socfpga_dwmac_ops to handle platform-specific setup. This callback is invoked before stmmac_dvr_probe() to ensure the platform data is correctly configured. Also, implemented separate setup_plat_dat() callback for current socfpga platforms and Agilex5. Signed-off-by: Rohan G Thomas Reviewed-by: Maxime Chevallier Tested-by: Maxime Chevallier Link: https://patch.msgid.link/20251101-agilex5_ext-v2-1-a6b51b4dca4d@altera.com Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-socfpga.c | 30 +++++++++++++++++-- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 2ff5db6d41ca0..5666b01723643 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -44,6 +44,7 @@ struct socfpga_dwmac; struct socfpga_dwmac_ops { int (*set_phy_mode)(struct socfpga_dwmac *dwmac_priv); + void (*setup_plat_dat)(struct socfpga_dwmac *dwmac_priv); }; struct socfpga_dwmac { @@ -441,6 +442,23 @@ static int socfpga_dwmac_init(struct platform_device *pdev, void *bsp_priv) return dwmac->ops->set_phy_mode(dwmac); } +static void socfpga_gen5_setup_plat_dat(struct socfpga_dwmac *dwmac) +{ + struct plat_stmmacenet_data *plat_dat = dwmac->plat_dat; + + plat_dat->core_type = DWMAC_CORE_GMAC; + + /* Rx watchdog timer in dwmac is buggy in this hw */ + plat_dat->riwt_off = 1; +} + +static void socfpga_agilex5_setup_plat_dat(struct socfpga_dwmac *dwmac) +{ + struct plat_stmmacenet_data *plat_dat = dwmac->plat_dat; + + plat_dat->core_type = DWMAC_CORE_XGMAC; +} + static int socfpga_dwmac_probe(struct platform_device *pdev) { struct plat_stmmacenet_data *plat_dat; @@ -497,25 +515,31 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) plat_dat->pcs_init = socfpga_dwmac_pcs_init; plat_dat->pcs_exit = socfpga_dwmac_pcs_exit; plat_dat->select_pcs = socfpga_dwmac_select_pcs; - plat_dat->core_type = DWMAC_CORE_GMAC; - plat_dat->riwt_off = 1; + ops->setup_plat_dat(dwmac); return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); } static const struct socfpga_dwmac_ops socfpga_gen5_ops = { .set_phy_mode = socfpga_gen5_set_phy_mode, + .setup_plat_dat = socfpga_gen5_setup_plat_dat, }; static const struct socfpga_dwmac_ops socfpga_gen10_ops = { .set_phy_mode = socfpga_gen10_set_phy_mode, + .setup_plat_dat = socfpga_gen5_setup_plat_dat, +}; + +static const struct socfpga_dwmac_ops socfpga_agilex5_ops = { + .set_phy_mode = socfpga_gen10_set_phy_mode, + .setup_plat_dat = socfpga_agilex5_setup_plat_dat, }; static const struct of_device_id socfpga_dwmac_match[] = { { .compatible = "altr,socfpga-stmmac", .data = &socfpga_gen5_ops }, { .compatible = "altr,socfpga-stmmac-a10-s10", .data = &socfpga_gen10_ops }, - { .compatible = "altr,socfpga-stmmac-agilex5", .data = &socfpga_gen10_ops }, + { .compatible = "altr,socfpga-stmmac-agilex5", .data = &socfpga_agilex5_ops }, { } }; MODULE_DEVICE_TABLE(of, socfpga_dwmac_match); From 4c00476d44804db3c16838299b87a11741cd0dbd Mon Sep 17 00:00:00 2001 From: Rohan G Thomas Date: Sat, 1 Nov 2025 01:27:08 +0800 Subject: [PATCH 595/867] net: stmmac: socfpga: Enable TBS support for Agilex5 Agilex5 supports Time-Based Scheduling(TBS) for Tx queue 6 and Tx queue 7. This commit enables TBS support for these queues. Signed-off-by: Rohan G Thomas Link: https://patch.msgid.link/20251101-agilex5_ext-v2-2-a6b51b4dca4d@altera.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 5666b01723643..4f256f0ae05c1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -457,6 +457,19 @@ static void socfpga_agilex5_setup_plat_dat(struct socfpga_dwmac *dwmac) struct plat_stmmacenet_data *plat_dat = dwmac->plat_dat; plat_dat->core_type = DWMAC_CORE_XGMAC; + + /* Enable TBS */ + switch (plat_dat->tx_queues_to_use) { + case 8: + plat_dat->tx_queues_cfg[7].tbs_en = true; + fallthrough; + case 7: + plat_dat->tx_queues_cfg[6].tbs_en = true; + break; + default: + /* Tx Queues 0 - 5 doesn't support TBS on Agilex5 */ + break; + } } static int socfpga_dwmac_probe(struct platform_device *pdev) From e28988aef70f8f993d93a62161a202e930cfce55 Mon Sep 17 00:00:00 2001 From: Rohan G Thomas Date: Sat, 1 Nov 2025 01:27:09 +0800 Subject: [PATCH 596/867] net: stmmac: socfpga: Enable TSO for Agilex5 platform Agilex5 supports TCP Segmentation Offload(TSO). This commit enables TSO for Agilex5 socfpga platforms. Signed-off-by: Rohan G Thomas Link: https://patch.msgid.link/20251101-agilex5_ext-v2-3-a6b51b4dca4d@altera.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 4f256f0ae05c1..1837346ca2d43 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -458,6 +458,9 @@ static void socfpga_agilex5_setup_plat_dat(struct socfpga_dwmac *dwmac) plat_dat->core_type = DWMAC_CORE_XGMAC; + /* Enable TSO */ + plat_dat->flags |= STMMAC_FLAG_TSO_EN; + /* Enable TBS */ switch (plat_dat->tx_queues_to_use) { case 8: From fd8c4f6454963aa7ea895657472aa57f33779d57 Mon Sep 17 00:00:00 2001 From: Rohan G Thomas Date: Sat, 1 Nov 2025 01:27:10 +0800 Subject: [PATCH 597/867] net: stmmac: socfpga: Add hardware supported cross-timestamp Cross timestamping is supported on Agilex5 platform with Synchronized Multidrop Timestamp Gathering(SMTG) IP. The hardware cross-timestamp result is made available the applications through the ioctl call PTP_SYS_OFFSET_PRECISE, which inturn calls stmmac_getcrosststamp(). Device time is stored in the MAC Auxiliary register. The 64-bit System time (ARM_ARCH_COUNTER) is stored in SMTG IP. SMTG IP is an MDIO device with 0xC - 0xF MDIO register space holds 64-bit system time. This commit is similar to following commit for Intel platforms: Commit 341f67e424e5 ("net: stmmac: Add hardware supported cross-timestamp") Signed-off-by: Rohan G Thomas Link: https://patch.msgid.link/20251101-agilex5_ext-v2-4-a6b51b4dca4d@altera.com Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-socfpga.c | 120 ++++++++++++++++++ .../net/ethernet/stmicro/stmmac/dwxgmac2.h | 5 + 2 files changed, 125 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 1837346ca2d43..49d651948e2bd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -5,6 +5,7 @@ */ #include +#include #include #include #include @@ -15,8 +16,10 @@ #include #include +#include "dwxgmac2.h" #include "stmmac.h" #include "stmmac_platform.h" +#include "stmmac_ptp.h" #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII 0x0 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII 0x1 @@ -41,6 +44,13 @@ #define SGMII_ADAPTER_ENABLE 0x0000 #define SGMII_ADAPTER_DISABLE 0x0001 +#define SMTG_MDIO_ADDR 0x15 +#define SMTG_TSC_WORD0 0xC +#define SMTG_TSC_WORD1 0xD +#define SMTG_TSC_WORD2 0xE +#define SMTG_TSC_WORD3 0xF +#define SMTG_TSC_SHIFT 16 + struct socfpga_dwmac; struct socfpga_dwmac_ops { int (*set_phy_mode)(struct socfpga_dwmac *dwmac_priv); @@ -269,6 +279,112 @@ static int socfpga_set_phy_mode_common(int phymode, u32 *val) return 0; } +static void get_smtgtime(struct mii_bus *mii, int smtg_addr, u64 *smtg_time) +{ + u64 ns; + + ns = mdiobus_read(mii, smtg_addr, SMTG_TSC_WORD3); + ns <<= SMTG_TSC_SHIFT; + ns |= mdiobus_read(mii, smtg_addr, SMTG_TSC_WORD2); + ns <<= SMTG_TSC_SHIFT; + ns |= mdiobus_read(mii, smtg_addr, SMTG_TSC_WORD1); + ns <<= SMTG_TSC_SHIFT; + ns |= mdiobus_read(mii, smtg_addr, SMTG_TSC_WORD0); + + *smtg_time = ns; +} + +static int smtg_crosststamp(ktime_t *device, struct system_counterval_t *system, + void *ctx) +{ + struct stmmac_priv *priv = (struct stmmac_priv *)ctx; + u32 num_snapshot, gpio_value, acr_value; + void __iomem *ptpaddr = priv->ptpaddr; + void __iomem *ioaddr = priv->hw->pcsr; + unsigned long flags; + u64 smtg_time = 0; + u64 ptp_time = 0; + int i, ret; + u32 v; + + /* Both internal crosstimestamping and external triggered event + * timestamping cannot be run concurrently. + */ + if (priv->plat->flags & STMMAC_FLAG_EXT_SNAPSHOT_EN) + return -EBUSY; + + mutex_lock(&priv->aux_ts_lock); + /* Enable Internal snapshot trigger */ + acr_value = readl(ptpaddr + PTP_ACR); + acr_value &= ~PTP_ACR_MASK; + switch (priv->plat->int_snapshot_num) { + case AUX_SNAPSHOT0: + acr_value |= PTP_ACR_ATSEN0; + break; + case AUX_SNAPSHOT1: + acr_value |= PTP_ACR_ATSEN1; + break; + case AUX_SNAPSHOT2: + acr_value |= PTP_ACR_ATSEN2; + break; + case AUX_SNAPSHOT3: + acr_value |= PTP_ACR_ATSEN3; + break; + default: + mutex_unlock(&priv->aux_ts_lock); + return -EINVAL; + } + writel(acr_value, ptpaddr + PTP_ACR); + + /* Clear FIFO */ + acr_value = readl(ptpaddr + PTP_ACR); + acr_value |= PTP_ACR_ATSFC; + writel(acr_value, ptpaddr + PTP_ACR); + /* Release the mutex */ + mutex_unlock(&priv->aux_ts_lock); + + /* Trigger Internal snapshot signal. Create a rising edge by just toggle + * the GPO0 to low and back to high. + */ + gpio_value = readl(ioaddr + XGMAC_GPIO_STATUS); + gpio_value &= ~XGMAC_GPIO_GPO0; + writel(gpio_value, ioaddr + XGMAC_GPIO_STATUS); + gpio_value |= XGMAC_GPIO_GPO0; + writel(gpio_value, ioaddr + XGMAC_GPIO_STATUS); + + /* Poll for time sync operation done */ + ret = readl_poll_timeout(priv->ioaddr + XGMAC_INT_STATUS, v, + (v & XGMAC_INT_TSIS), 100, 10000); + if (ret) { + netdev_err(priv->dev, "%s: Wait for time sync operation timeout\n", + __func__); + return ret; + } + + *system = (struct system_counterval_t) { + .cycles = 0, + .cs_id = CSID_ARM_ARCH_COUNTER, + .use_nsecs = false, + }; + + num_snapshot = (readl(ioaddr + XGMAC_TIMESTAMP_STATUS) & + XGMAC_TIMESTAMP_ATSNS_MASK) >> + XGMAC_TIMESTAMP_ATSNS_SHIFT; + + /* Repeat until the timestamps are from the FIFO last segment */ + for (i = 0; i < num_snapshot; i++) { + read_lock_irqsave(&priv->ptp_lock, flags); + stmmac_get_ptptime(priv, ptpaddr, &ptp_time); + *device = ns_to_ktime(ptp_time); + read_unlock_irqrestore(&priv->ptp_lock, flags); + } + + get_smtgtime(priv->mii, SMTG_MDIO_ADDR, &smtg_time); + system->cycles = smtg_time; + + return 0; +} + static int socfpga_gen5_set_phy_mode(struct socfpga_dwmac *dwmac) { struct regmap *sys_mgr_base_addr = dwmac->sys_mgr_base_addr; @@ -473,6 +589,10 @@ static void socfpga_agilex5_setup_plat_dat(struct socfpga_dwmac *dwmac) /* Tx Queues 0 - 5 doesn't support TBS on Agilex5 */ break; } + + /* Hw supported cross-timestamp */ + plat_dat->int_snapshot_num = AUX_SNAPSHOT0; + plat_dat->crosststamp = smtg_crosststamp; } static int socfpga_dwmac_probe(struct platform_device *pdev) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index 0d408ee17f337..e48cfa05000c0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -79,6 +79,7 @@ #define XGMAC_PSRQ(x) GENMASK((x) * 8 + 7, (x) * 8) #define XGMAC_PSRQ_SHIFT(x) ((x) * 8) #define XGMAC_INT_STATUS 0x000000b0 +#define XGMAC_INT_TSIS BIT(12) #define XGMAC_LPIIS BIT(5) #define XGMAC_PMTIS BIT(4) #define XGMAC_INT_EN 0x000000b4 @@ -173,6 +174,8 @@ #define XGMAC_MDIO_ADDR 0x00000200 #define XGMAC_MDIO_DATA 0x00000204 #define XGMAC_MDIO_C22P 0x00000220 +#define XGMAC_GPIO_STATUS 0x0000027c +#define XGMAC_GPIO_GPO0 BIT(16) #define XGMAC_ADDRx_HIGH(x) (0x00000300 + (x) * 0x8) #define XGMAC_ADDR_MAX 32 #define XGMAC_AE BIT(31) @@ -220,6 +223,8 @@ #define XGMAC_OB BIT(0) #define XGMAC_RSS_DATA 0x00000c8c #define XGMAC_TIMESTAMP_STATUS 0x00000d20 +#define XGMAC_TIMESTAMP_ATSNS_MASK GENMASK(29, 25) +#define XGMAC_TIMESTAMP_ATSNS_SHIFT 25 #define XGMAC_TXTSC BIT(15) #define XGMAC_TXTIMESTAMP_NSEC 0x00000d30 #define XGMAC_TXTSSTSLO GENMASK(30, 0) From 0567c84d683d1f38dc41928eec786ec5c02bf7b4 Mon Sep 17 00:00:00 2001 From: Shangjuan Wei Date: Tue, 4 Nov 2025 15:33:05 +0800 Subject: [PATCH 598/867] dt-bindings: ethernet: eswin: fix yaml schema issues eswin,hsp-sp-csr attribute is one phandle with multiple arguments, so the syntax should be in the form of: items: - items: - description: ... - description: ... - description: ... - description: ... To align with the description of the 'eswin-sp-csr' attribute in the mmc,usb modules, the description of the 'eswin,hsp-sp-csr' attribute has been modified. Fixes: 888bd0eca93c ("dt-bindings: ethernet: eswin: Document for EIC7700 SoC") Reported-by: Rob Herring (Arm) Closes: https://lore.kernel.org/all/176096011380.22917.1988679321096076522.robh@kernel.org/ Signed-off-by: Shangjuan Wei Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20251104073305.299-1-weishangjuan@eswincomputing.com Signed-off-by: Jakub Kicinski --- .../bindings/net/eswin,eic7700-eth.yaml | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml b/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml index 9ddbfe219ae2e..91e8cd1db67b8 100644 --- a/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml +++ b/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml @@ -69,17 +69,19 @@ properties: enum: [0, 200, 600, 1200, 1600, 1800, 2000, 2200, 2400] eswin,hsp-sp-csr: + description: + HSP CSR is to control and get status of different high-speed peripherals + (such as Ethernet, USB, SATA, etc.) via register, which can tune + board-level's parameters of PHY, etc. $ref: /schemas/types.yaml#/definitions/phandle-array items: - - description: Phandle to HSP(High-Speed Peripheral) device - - description: Offset of phy control register for internal - or external clock selection - - description: Offset of AXI clock controller Low-Power request - register - - description: Offset of register controlling TX/RX clock delay - description: | - High-Speed Peripheral device needed to configure clock selection, - clock low-power mode and clock delay. + - items: + - description: Phandle to HSP(High-Speed Peripheral) device + - description: Offset of phy control register for internal + or external clock selection + - description: Offset of AXI clock controller Low-Power request + register + - description: Offset of register controlling TX/RX clock delay required: - compatible From 6b47af35a6dded074ff583361f6d6668dd7a401d Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Fri, 31 Oct 2025 16:48:11 +0530 Subject: [PATCH 599/867] net: selftests: export packet creation helpers for driver use Export the network selftest packet creation infrastructure to allow network drivers to reuse the existing selftest framework instead of duplicating packet creation code. Signed-off-by: Raju Rangoju Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/20251031111811.775434-1-Raju.Rangoju@amd.com Signed-off-by: Paolo Abeni --- include/net/selftests.h | 45 ++++++++++++++++++++++++++++++++++++++ net/core/selftests.c | 48 ++++++----------------------------------- 2 files changed, 52 insertions(+), 41 deletions(-) diff --git a/include/net/selftests.h b/include/net/selftests.h index e65e8d230d33c..c36e07406ad4c 100644 --- a/include/net/selftests.h +++ b/include/net/selftests.h @@ -3,9 +3,48 @@ #define _NET_SELFTESTS #include +#include + +struct net_packet_attrs { + const unsigned char *src; + const unsigned char *dst; + u32 ip_src; + u32 ip_dst; + bool tcp; + u16 sport; + u16 dport; + int timeout; + int size; + int max_size; + u8 id; + u16 queue_mapping; + bool bad_csum; +}; + +struct net_test_priv { + struct net_packet_attrs *packet; + struct packet_type pt; + struct completion comp; + int double_vlan; + int vlan_id; + int ok; +}; + +struct netsfhdr { + __be32 version; + __be64 magic; + u8 id; +} __packed; + +#define NET_TEST_PKT_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ + sizeof(struct netsfhdr)) +#define NET_TEST_PKT_MAGIC 0xdeadcafecafedeadULL +#define NET_LB_TIMEOUT msecs_to_jiffies(200) #if IS_ENABLED(CONFIG_NET_SELFTESTS) +struct sk_buff *net_test_get_skb(struct net_device *ndev, u8 id, + struct net_packet_attrs *attr); void net_selftest(struct net_device *ndev, struct ethtool_test *etest, u64 *buf); int net_selftest_get_count(void); @@ -13,6 +52,12 @@ void net_selftest_get_strings(u8 *data); #else +static inline struct sk_buff *net_test_get_skb(struct net_device *ndev, u8 id, + struct net_packet_attrs *attr) +{ + return NULL; +} + static inline void net_selftest(struct net_device *ndev, struct ethtool_test *etest, u64 *buf) { diff --git a/net/core/selftests.c b/net/core/selftests.c index 3d79133a91a61..8b81feb82c4ae 100644 --- a/net/core/selftests.c +++ b/net/core/selftests.c @@ -14,46 +14,10 @@ #include #include -struct net_packet_attrs { - const unsigned char *src; - const unsigned char *dst; - u32 ip_src; - u32 ip_dst; - bool tcp; - u16 sport; - u16 dport; - int timeout; - int size; - int max_size; - u8 id; - u16 queue_mapping; - bool bad_csum; -}; - -struct net_test_priv { - struct net_packet_attrs *packet; - struct packet_type pt; - struct completion comp; - int double_vlan; - int vlan_id; - int ok; -}; - -struct netsfhdr { - __be32 version; - __be64 magic; - u8 id; -} __packed; - static u8 net_test_next_id; -#define NET_TEST_PKT_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ - sizeof(struct netsfhdr)) -#define NET_TEST_PKT_MAGIC 0xdeadcafecafedeadULL -#define NET_LB_TIMEOUT msecs_to_jiffies(200) - -static struct sk_buff *net_test_get_skb(struct net_device *ndev, - struct net_packet_attrs *attr) +struct sk_buff *net_test_get_skb(struct net_device *ndev, u8 id, + struct net_packet_attrs *attr) { struct sk_buff *skb = NULL; struct udphdr *uhdr = NULL; @@ -142,8 +106,8 @@ static struct sk_buff *net_test_get_skb(struct net_device *ndev, shdr = skb_put(skb, sizeof(*shdr)); shdr->version = 0; shdr->magic = cpu_to_be64(NET_TEST_PKT_MAGIC); - attr->id = net_test_next_id; - shdr->id = net_test_next_id++; + attr->id = id; + shdr->id = id; if (attr->size) { void *payload = skb_put(skb, attr->size); @@ -190,6 +154,7 @@ static struct sk_buff *net_test_get_skb(struct net_device *ndev, return skb; } +EXPORT_SYMBOL_GPL(net_test_get_skb); static int net_test_loopback_validate(struct sk_buff *skb, struct net_device *ndev, @@ -286,12 +251,13 @@ static int __net_test_loopback(struct net_device *ndev, tpriv->packet = attr; dev_add_pack(&tpriv->pt); - skb = net_test_get_skb(ndev, attr); + skb = net_test_get_skb(ndev, net_test_next_id, attr); if (!skb) { ret = -ENOMEM; goto cleanup; } + net_test_next_id++; ret = dev_direct_xmit(skb, attr->queue_mapping); if (ret < 0) { goto cleanup; From 862a64c83faf7708e7e79498193ff5270543a68d Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Fri, 31 Oct 2025 16:45:53 +0530 Subject: [PATCH 600/867] amd-xgbe: introduce support ethtool selftest Add support for ethtool selftest for MAC loopback. This includes the sanity check and helps in finding the misconfiguration of HW. Uses the existing selftest infrastructure to create test packets. Signed-off-by: Raju Rangoju Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/20251031111555.774425-2-Raju.Rangoju@amd.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amd/Kconfig | 1 + drivers/net/ethernet/amd/xgbe/Makefile | 2 +- drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 17 ++ drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c | 7 + drivers/net/ethernet/amd/xgbe/xgbe-selftest.c | 237 ++++++++++++++++++ drivers/net/ethernet/amd/xgbe/xgbe.h | 10 + 6 files changed, 273 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/amd/xgbe/xgbe-selftest.c diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig index b39c6f3e1edaa..d54dca3074eb6 100644 --- a/drivers/net/ethernet/amd/Kconfig +++ b/drivers/net/ethernet/amd/Kconfig @@ -165,6 +165,7 @@ config AMD_XGBE select CRC32 select PHYLIB select AMD_XGBE_HAVE_ECC if X86 + select NET_SELFTESTS help This driver supports the AMD 10GbE Ethernet device found on an AMD SoC. diff --git a/drivers/net/ethernet/amd/xgbe/Makefile b/drivers/net/ethernet/amd/xgbe/Makefile index 980e276522377..5992f7fd4d9b7 100644 --- a/drivers/net/ethernet/amd/xgbe/Makefile +++ b/drivers/net/ethernet/amd/xgbe/Makefile @@ -5,7 +5,7 @@ amd-xgbe-objs := xgbe-main.o xgbe-drv.o xgbe-dev.o \ xgbe-desc.o xgbe-ethtool.o xgbe-mdio.o \ xgbe-hwtstamp.o xgbe-ptp.o xgbe-pps.o \ xgbe-i2c.o xgbe-phy-v1.o xgbe-phy-v2.o \ - xgbe-platform.o + xgbe-platform.o xgbe-selftest.o amd-xgbe-$(CONFIG_PCI) += xgbe-pci.o amd-xgbe-$(CONFIG_AMD_XGBE_DCB) += xgbe-dcb.o diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index e5391a2eca51d..ffc7d83522c7e 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -3578,3 +3578,20 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if) DBGPR("<--xgbe_init_function_ptrs\n"); } + +int xgbe_enable_mac_loopback(struct xgbe_prv_data *pdata) +{ + /* Enable MAC loopback mode */ + XGMAC_IOWRITE_BITS(pdata, MAC_RCR, LM, 1); + + /* Wait for loopback to stabilize */ + usleep_range(10, 15); + + return 0; +} + +void xgbe_disable_mac_loopback(struct xgbe_prv_data *pdata) +{ + /* Disable MAC loopback mode */ + XGMAC_IOWRITE_BITS(pdata, MAC_RCR, LM, 0); +} diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c index b6e1b67a2d0e3..0d19b09497a0f 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c @@ -85,6 +85,9 @@ static void xgbe_get_strings(struct net_device *netdev, u32 stringset, u8 *data) int i; switch (stringset) { + case ETH_SS_TEST: + xgbe_selftest_get_strings(pdata, data); + break; case ETH_SS_STATS: for (i = 0; i < XGBE_STATS_COUNT; i++) ethtool_puts(&data, xgbe_gstring_stats[i].stat_string); @@ -131,6 +134,9 @@ static int xgbe_get_sset_count(struct net_device *netdev, int stringset) int ret; switch (stringset) { + case ETH_SS_TEST: + ret = xgbe_selftest_get_count(pdata); + break; case ETH_SS_STATS: ret = XGBE_STATS_COUNT + (pdata->tx_ring_count * 2) + @@ -760,6 +766,7 @@ static const struct ethtool_ops xgbe_ethtool_ops = { .set_ringparam = xgbe_set_ringparam, .get_channels = xgbe_get_channels, .set_channels = xgbe_set_channels, + .self_test = xgbe_selftest_run, }; const struct ethtool_ops *xgbe_get_ethtool_ops(void) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-selftest.c b/drivers/net/ethernet/amd/xgbe/xgbe-selftest.c new file mode 100644 index 0000000000000..8a3a6279584d3 --- /dev/null +++ b/drivers/net/ethernet/amd/xgbe/xgbe-selftest.c @@ -0,0 +1,237 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause) +/* + * Copyright (c) 2014-2025, Advanced Micro Devices, Inc. + * Copyright (c) 2014, Synopsys, Inc. + * All rights reserved + * + * Author: Raju Rangoju + */ +#include +#include +#include +#include +#include +#include +#include + +#include "xgbe.h" +#include "xgbe-common.h" + +#define XGBE_LOOPBACK_NONE 0 +#define XGBE_LOOPBACK_MAC 1 + +struct xgbe_test { + char name[ETH_GSTRING_LEN]; + int lb; + int (*fn)(struct xgbe_prv_data *pdata); +}; + +static u8 xgbe_test_id; + +static int xgbe_test_loopback_validate(struct sk_buff *skb, + struct net_device *ndev, + struct packet_type *pt, + struct net_device *orig_ndev) +{ + struct net_test_priv *tdata = pt->af_packet_priv; + const unsigned char *dst = tdata->packet->dst; + const unsigned char *src = tdata->packet->src; + struct netsfhdr *hdr; + struct ethhdr *eh; + struct tcphdr *th; + struct udphdr *uh; + struct iphdr *ih; + + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) + goto out; + + if (skb_linearize(skb)) + goto out; + + if (skb_headlen(skb) < (NET_TEST_PKT_SIZE - ETH_HLEN)) + goto out; + + eh = (struct ethhdr *)skb_mac_header(skb); + if (dst) { + if (!ether_addr_equal_unaligned(eh->h_dest, dst)) + goto out; + } + if (src) { + if (!ether_addr_equal_unaligned(eh->h_source, src)) + goto out; + } + + ih = ip_hdr(skb); + + if (tdata->packet->tcp) { + if (ih->protocol != IPPROTO_TCP) + goto out; + + th = (struct tcphdr *)((u8 *)ih + 4 * ih->ihl); + if (th->dest != htons(tdata->packet->dport)) + goto out; + + hdr = (struct netsfhdr *)((u8 *)th + sizeof(*th)); + } else { + if (ih->protocol != IPPROTO_UDP) + goto out; + + uh = (struct udphdr *)((u8 *)ih + 4 * ih->ihl); + if (uh->dest != htons(tdata->packet->dport)) + goto out; + + hdr = (struct netsfhdr *)((u8 *)uh + sizeof(*uh)); + } + + if (hdr->magic != cpu_to_be64(NET_TEST_PKT_MAGIC)) + goto out; + if (tdata->packet->id != hdr->id) + goto out; + + tdata->ok = true; + complete(&tdata->comp); +out: + kfree_skb(skb); + return 0; +} + +static int __xgbe_test_loopback(struct xgbe_prv_data *pdata, + struct net_packet_attrs *attr) +{ + struct net_test_priv *tdata; + struct sk_buff *skb = NULL; + int ret = 0; + + tdata = kzalloc(sizeof(*tdata), GFP_KERNEL); + if (!tdata) + return -ENOMEM; + + tdata->ok = false; + init_completion(&tdata->comp); + + tdata->pt.type = htons(ETH_P_IP); + tdata->pt.func = xgbe_test_loopback_validate; + tdata->pt.dev = pdata->netdev; + tdata->pt.af_packet_priv = tdata; + tdata->packet = attr; + + dev_add_pack(&tdata->pt); + + skb = net_test_get_skb(pdata->netdev, xgbe_test_id, attr); + if (!skb) { + ret = -ENOMEM; + goto cleanup; + } + + xgbe_test_id++; + ret = dev_direct_xmit(skb, attr->queue_mapping); + if (ret) + goto cleanup; + + if (!attr->timeout) + attr->timeout = NET_LB_TIMEOUT; + + wait_for_completion_timeout(&tdata->comp, attr->timeout); + ret = tdata->ok ? 0 : -ETIMEDOUT; + + if (ret) + netdev_err(pdata->netdev, "Response timedout: ret %d\n", ret); +cleanup: + dev_remove_pack(&tdata->pt); + kfree(tdata); + return ret; +} + +static int xgbe_test_mac_loopback(struct xgbe_prv_data *pdata) +{ + struct net_packet_attrs attr = {}; + + attr.dst = pdata->netdev->dev_addr; + return __xgbe_test_loopback(pdata, &attr); +} + +static const struct xgbe_test xgbe_selftests[] = { + { + .name = "MAC Loopback ", + .lb = XGBE_LOOPBACK_MAC, + .fn = xgbe_test_mac_loopback, + }, +}; + +void xgbe_selftest_run(struct net_device *dev, + struct ethtool_test *etest, u64 *buf) +{ + struct xgbe_prv_data *pdata = netdev_priv(dev); + int count = xgbe_selftest_get_count(pdata); + int i, ret; + + memset(buf, 0, sizeof(*buf) * count); + xgbe_test_id = 0; + + if (etest->flags != ETH_TEST_FL_OFFLINE) { + netdev_err(pdata->netdev, "Only offline tests are supported\n"); + etest->flags |= ETH_TEST_FL_FAILED; + return; + } else if (!netif_carrier_ok(dev)) { + netdev_err(pdata->netdev, + "Invalid link, cannot execute tests\n"); + etest->flags |= ETH_TEST_FL_FAILED; + return; + } + + /* Wait for queues drain */ + msleep(200); + + for (i = 0; i < count; i++) { + ret = 0; + + switch (xgbe_selftests[i].lb) { + case XGBE_LOOPBACK_MAC: + ret = xgbe_enable_mac_loopback(pdata); + break; + case XGBE_LOOPBACK_NONE: + break; + default: + ret = -EOPNOTSUPP; + break; + } + + /* + * First tests will always be MAC / PHY loopback. + * If any of them is not supported we abort earlier. + */ + if (ret) { + netdev_err(pdata->netdev, "Loopback not supported\n"); + etest->flags |= ETH_TEST_FL_FAILED; + break; + } + + ret = xgbe_selftests[i].fn(pdata); + if (ret && (ret != -EOPNOTSUPP)) + etest->flags |= ETH_TEST_FL_FAILED; + buf[i] = ret; + + switch (xgbe_selftests[i].lb) { + case XGBE_LOOPBACK_MAC: + xgbe_disable_mac_loopback(pdata); + break; + default: + break; + } + } +} + +void xgbe_selftest_get_strings(struct xgbe_prv_data *pdata, u8 *data) +{ + u8 *p = data; + int i; + + for (i = 0; i < xgbe_selftest_get_count(pdata); i++) + ethtool_puts(&p, xgbe_selftests[i].name); +} + +int xgbe_selftest_get_count(struct xgbe_prv_data *pdata) +{ + return ARRAY_SIZE(xgbe_selftests); +} diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 381f72a33d1af..dc03082c59aa6 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -1322,6 +1322,16 @@ void xgbe_update_tstamp_time(struct xgbe_prv_data *pdata, unsigned int sec, int xgbe_pps_config(struct xgbe_prv_data *pdata, struct xgbe_pps_config *cfg, int index, bool on); +/* Selftest functions */ +void xgbe_selftest_run(struct net_device *dev, + struct ethtool_test *etest, u64 *buf); +void xgbe_selftest_get_strings(struct xgbe_prv_data *pdata, u8 *data); +int xgbe_selftest_get_count(struct xgbe_prv_data *pdata); + +/* Loopback control */ +int xgbe_enable_mac_loopback(struct xgbe_prv_data *pdata); +void xgbe_disable_mac_loopback(struct xgbe_prv_data *pdata); + #ifdef CONFIG_DEBUG_FS void xgbe_debugfs_init(struct xgbe_prv_data *); void xgbe_debugfs_exit(struct xgbe_prv_data *); From 42b06fcc878d08785a0c44d2af42c8db453487e2 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Fri, 31 Oct 2025 16:45:55 +0530 Subject: [PATCH 601/867] amd-xgbe: add ethtool phy loopback selftest Add support for PHY loopback testing via ethtool self-test. The test uses phy_loopback() which enables PHY-level loopback through the PHY driver's set_loopback callback if provided, else uses the genphy_loopback(). Signed-off-by: Raju Rangoju Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/20251031111555.774425-3-Raju.Rangoju@amd.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amd/xgbe/xgbe-selftest.c | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-selftest.c b/drivers/net/ethernet/amd/xgbe/xgbe-selftest.c index 8a3a6279584d3..23b9d568a861e 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-selftest.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-selftest.c @@ -19,6 +19,7 @@ #define XGBE_LOOPBACK_NONE 0 #define XGBE_LOOPBACK_MAC 1 +#define XGBE_LOOPBACK_PHY 2 struct xgbe_test { char name[ETH_GSTRING_LEN]; @@ -151,11 +152,36 @@ static int xgbe_test_mac_loopback(struct xgbe_prv_data *pdata) return __xgbe_test_loopback(pdata, &attr); } +static int xgbe_test_phy_loopback(struct xgbe_prv_data *pdata) +{ + struct net_packet_attrs attr = {}; + int ret; + + if (!pdata->netdev->phydev) { + netdev_err(pdata->netdev, "phydev not found: cannot start PHY loopback test\n"); + return -EOPNOTSUPP; + } + + ret = phy_loopback(pdata->netdev->phydev, true, 0); + if (ret) + return ret; + + attr.dst = pdata->netdev->dev_addr; + ret = __xgbe_test_loopback(pdata, &attr); + + phy_loopback(pdata->netdev->phydev, false, 0); + return ret; +} + static const struct xgbe_test xgbe_selftests[] = { { .name = "MAC Loopback ", .lb = XGBE_LOOPBACK_MAC, .fn = xgbe_test_mac_loopback, + }, { + .name = "PHY Loopback ", + .lb = XGBE_LOOPBACK_NONE, + .fn = xgbe_test_phy_loopback, }, }; @@ -187,6 +213,13 @@ void xgbe_selftest_run(struct net_device *dev, ret = 0; switch (xgbe_selftests[i].lb) { + case XGBE_LOOPBACK_PHY: + ret = -EOPNOTSUPP; + if (dev->phydev) + ret = phy_loopback(dev->phydev, true, 0); + if (!ret) + break; + fallthrough; case XGBE_LOOPBACK_MAC: ret = xgbe_enable_mac_loopback(pdata); break; @@ -213,6 +246,13 @@ void xgbe_selftest_run(struct net_device *dev, buf[i] = ret; switch (xgbe_selftests[i].lb) { + case XGBE_LOOPBACK_PHY: + ret = -EOPNOTSUPP; + if (dev->phydev) + ret = phy_loopback(dev->phydev, false, 0); + if (!ret) + break; + fallthrough; case XGBE_LOOPBACK_MAC: xgbe_disable_mac_loopback(pdata); break; From d7735c6bb2310f7ca8235af7f946e6c8716cdb5e Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Fri, 31 Oct 2025 16:45:57 +0530 Subject: [PATCH 602/867] amd-xgbe: add ethtool split header selftest Adds support for ethtool split header selftest. Performs UDP and TCP check to ensure split header selft test works for both packet types. Signed-off-by: Raju Rangoju Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/20251031111555.774425-4-Raju.Rangoju@amd.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 2 + drivers/net/ethernet/amd/xgbe/xgbe-selftest.c | 46 +++++++++++++++++++ drivers/net/ethernet/amd/xgbe/xgbe.h | 1 + 3 files changed, 49 insertions(+) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index ffc7d83522c7e..b646ae575e6a1 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -211,6 +211,7 @@ static void xgbe_config_sph_mode(struct xgbe_prv_data *pdata) } XGMAC_IOWRITE_BITS(pdata, MAC_RCR, HDSMS, XGBE_SPH_HDSMS_SIZE); + pdata->sph = true; } static void xgbe_disable_sph_mode(struct xgbe_prv_data *pdata) @@ -223,6 +224,7 @@ static void xgbe_disable_sph_mode(struct xgbe_prv_data *pdata) XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_CR, SPH, 0); } + pdata->sph = false; } static int xgbe_write_rss_reg(struct xgbe_prv_data *pdata, unsigned int type, diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-selftest.c b/drivers/net/ethernet/amd/xgbe/xgbe-selftest.c index 23b9d568a861e..15c51e96bcdf7 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-selftest.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-selftest.c @@ -173,6 +173,48 @@ static int xgbe_test_phy_loopback(struct xgbe_prv_data *pdata) return ret; } +static int xgbe_test_sph(struct xgbe_prv_data *pdata) +{ + struct net_packet_attrs attr = {}; + unsigned long cnt_end, cnt_start; + int ret; + + cnt_start = pdata->ext_stats.rx_split_header_packets; + + if (!pdata->sph) { + netdev_err(pdata->netdev, "Split Header not enabled\n"); + return -EOPNOTSUPP; + } + + /* UDP test */ + attr.dst = pdata->netdev->dev_addr; + attr.tcp = false; + + ret = __xgbe_test_loopback(pdata, &attr); + if (ret) + return ret; + + cnt_end = pdata->ext_stats.rx_split_header_packets; + if (cnt_end <= cnt_start) + return -EINVAL; + + /* TCP test */ + cnt_start = cnt_end; + + attr.dst = pdata->netdev->dev_addr; + attr.tcp = true; + + ret = __xgbe_test_loopback(pdata, &attr); + if (ret) + return ret; + + cnt_end = pdata->ext_stats.rx_split_header_packets; + if (cnt_end <= cnt_start) + return -EINVAL; + + return 0; +} + static const struct xgbe_test xgbe_selftests[] = { { .name = "MAC Loopback ", @@ -182,6 +224,10 @@ static const struct xgbe_test xgbe_selftests[] = { .name = "PHY Loopback ", .lb = XGBE_LOOPBACK_NONE, .fn = xgbe_test_phy_loopback, + }, { + .name = "Split Header ", + .lb = XGBE_LOOPBACK_PHY, + .fn = xgbe_test_sph, }, }; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index dc03082c59aa6..03ef0f5484830 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -1246,6 +1246,7 @@ struct xgbe_prv_data { int rx_adapt_retries; bool rx_adapt_done; bool mode_set; + bool sph; }; /* Function prototypes*/ From 9c11b6b1abcd328136fc0cbc381734d6815d1c16 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Fri, 31 Oct 2025 16:45:59 +0530 Subject: [PATCH 603/867] amd-xgbe: add ethtool jumbo frame selftest Adds support for jumbo frame selftest. Works only for mtu size greater than 1500. Signed-off-by: Raju Rangoju Link: https://patch.msgid.link/20251031111555.774425-5-Raju.Rangoju@amd.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amd/xgbe/xgbe-selftest.c | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-selftest.c b/drivers/net/ethernet/amd/xgbe/xgbe-selftest.c index 15c51e96bcdf7..55e5e467facd7 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-selftest.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-selftest.c @@ -42,11 +42,19 @@ static int xgbe_test_loopback_validate(struct sk_buff *skb, struct tcphdr *th; struct udphdr *uh; struct iphdr *ih; + int eat; skb = skb_unshare(skb, GFP_ATOMIC); if (!skb) goto out; + eat = (skb->tail + skb->data_len) - skb->end; + if (eat > 0 && skb_shared(skb)) { + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + goto out; + } + if (skb_linearize(skb)) goto out; @@ -215,6 +223,17 @@ static int xgbe_test_sph(struct xgbe_prv_data *pdata) return 0; } +static int xgbe_test_jumbo(struct xgbe_prv_data *pdata) +{ + struct net_packet_attrs attr = {}; + int size = pdata->rx_buf_size; + + attr.dst = pdata->netdev->dev_addr; + attr.max_size = size - ETH_FCS_LEN; + + return __xgbe_test_loopback(pdata, &attr); +} + static const struct xgbe_test xgbe_selftests[] = { { .name = "MAC Loopback ", @@ -228,6 +247,10 @@ static const struct xgbe_test xgbe_selftests[] = { .name = "Split Header ", .lb = XGBE_LOOPBACK_PHY, .fn = xgbe_test_sph, + }, { + .name = "Jumbo Frame ", + .lb = XGBE_LOOPBACK_PHY, + .fn = xgbe_test_jumbo, }, }; From 0cc4b846159184892f4210c440daeb97cbe9583a Mon Sep 17 00:00:00 2001 From: Aswin Karuvally Date: Mon, 3 Nov 2025 11:16:52 +0100 Subject: [PATCH 604/867] s390/ctcm: Use info level for handshake UC_RCRESET CTC adapter throws CTC_EVENT_UC_RCRESET (Unit check remote reset event) during initial handshake, if the peer is not ready yet. This causes the ctcm driver to re-attempt the handshake. As it is normal for the event to occur during initialization, use info instead of warn level in kernel log and NOTICE instead of ERROR level in s390 debug feature. Also reword the log message for clarity. Reviewed-by: Alexandra Winter Signed-off-by: Aswin Karuvally Link: https://patch.msgid.link/20251103101652.2349855-1-aswin@linux.ibm.com Signed-off-by: Paolo Abeni --- drivers/s390/net/ctcm_fsms.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/s390/net/ctcm_fsms.c b/drivers/s390/net/ctcm_fsms.c index 9678c6a2cda72..1a48258b63b28 100644 --- a/drivers/s390/net/ctcm_fsms.c +++ b/drivers/s390/net/ctcm_fsms.c @@ -882,6 +882,13 @@ static void ctcm_chx_rxiniterr(fsm_instance *fi, int event, void *arg) fsm_newstate(fi, CTC_STATE_RXERR); fsm_event(priv->fsm, DEV_EVENT_RXDOWN, dev); } + } else if (event == CTC_EVENT_UC_RCRESET) { + CTCM_DBF_TEXT_(TRACE, CTC_DBF_NOTICE, + "%s(%s): %s in %s", CTCM_FUNTAIL, ch->id, + ctc_ch_event_names[event], fsm_getstate_str(fi)); + + dev_info(&dev->dev, + "Init handshake not received, peer not ready yet\n"); } else { CTCM_DBF_TEXT_(ERROR, CTC_DBF_ERROR, "%s(%s): %s in %s", CTCM_FUNTAIL, ch->id, @@ -967,6 +974,13 @@ static void ctcm_chx_txiniterr(fsm_instance *fi, int event, void *arg) fsm_newstate(fi, CTC_STATE_TXERR); fsm_event(priv->fsm, DEV_EVENT_TXDOWN, dev); } + } else if (event == CTC_EVENT_UC_RCRESET) { + CTCM_DBF_TEXT_(TRACE, CTC_DBF_NOTICE, + "%s(%s): %s in %s", CTCM_FUNTAIL, ch->id, + ctc_ch_event_names[event], fsm_getstate_str(fi)); + + dev_info(&dev->dev, + "Init handshake not sent, peer not ready yet\n"); } else { CTCM_DBF_TEXT_(ERROR, CTC_DBF_ERROR, "%s(%s): %s in %s", CTCM_FUNTAIL, ch->id, From 13068e9d57264d0a86b8195817a01155ba33d230 Mon Sep 17 00:00:00 2001 From: Pavan Kumar Linga Date: Mon, 3 Nov 2025 14:46:30 -0800 Subject: [PATCH 605/867] idpf: add support for IDPF PCI programming interface At present IDPF supports only 0x1452 and 0x145C as PF and VF device IDs on our current generation hardware. Future hardware exposes a new set of device IDs for each generation. To avoid adding a new device ID for each generation and to make the driver forward and backward compatible, make use of the IDPF PCI programming interface to load the driver. Write and read the VF_ARQBAL mailbox register to find if the current device is a PF or a VF. PCI SIG allocated a new programming interface for the IDPF compliant ethernet network controller devices. It can be found at: https://members.pcisig.com/wg/PCI-SIG/document/20113 with the document titled as 'PCI Code and ID Assignment Revision 1.16' or any latest revisions. Tested this patch by doing a simple driver load/unload on Intel IPU E2000 hardware which supports 0x1452 and 0x145C device IDs and new hardware which supports the IDPF PCI programming interface. Reviewed-by: Sridhar Samudrala Reviewed-by: Simon Horman Signed-off-by: Pavan Kumar Linga Signed-off-by: Madhu Chittim Reviewed-by: Aleksandr Loktionov Tested-by: Marek Landowski Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20251103224631.595527-1-anthony.l.nguyen@intel.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/idpf/idpf_main.c | 105 ++++++++++++++++---- 1 file changed, 88 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_main.c b/drivers/net/ethernet/intel/idpf/idpf_main.c index 8c46481d2e1f4..7a06eaf46a08e 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_main.c +++ b/drivers/net/ethernet/intel/idpf/idpf_main.c @@ -3,15 +3,93 @@ #include "idpf.h" #include "idpf_devids.h" +#include "idpf_lan_vf_regs.h" #include "idpf_virtchnl.h" #define DRV_SUMMARY "Intel(R) Infrastructure Data Path Function Linux Driver" +#define IDPF_NETWORK_ETHERNET_PROGIF 0x01 +#define IDPF_CLASS_NETWORK_ETHERNET_PROGIF \ + (PCI_CLASS_NETWORK_ETHERNET << 8 | IDPF_NETWORK_ETHERNET_PROGIF) +#define IDPF_VF_TEST_VAL 0xfeed0000u + MODULE_DESCRIPTION(DRV_SUMMARY); MODULE_IMPORT_NS("LIBETH"); MODULE_IMPORT_NS("LIBETH_XDP"); MODULE_LICENSE("GPL"); +/** + * idpf_get_device_type - Helper to find if it is a VF or PF device + * @pdev: PCI device information struct + * + * Return: PF/VF device ID or -%errno on failure. + */ +static int idpf_get_device_type(struct pci_dev *pdev) +{ + void __iomem *addr; + int ret; + + addr = ioremap(pci_resource_start(pdev, 0) + VF_ARQBAL, 4); + if (!addr) { + pci_err(pdev, "Failed to allocate BAR0 mbx region\n"); + return -EIO; + } + + writel(IDPF_VF_TEST_VAL, addr); + if (readl(addr) == IDPF_VF_TEST_VAL) + ret = IDPF_DEV_ID_VF; + else + ret = IDPF_DEV_ID_PF; + + iounmap(addr); + + return ret; +} + +/** + * idpf_dev_init - Initialize device specific parameters + * @adapter: adapter to initialize + * @ent: entry in idpf_pci_tbl + * + * Return: %0 on success, -%errno on failure. + */ +static int idpf_dev_init(struct idpf_adapter *adapter, + const struct pci_device_id *ent) +{ + int ret; + + if (ent->class == IDPF_CLASS_NETWORK_ETHERNET_PROGIF) { + ret = idpf_get_device_type(adapter->pdev); + switch (ret) { + case IDPF_DEV_ID_VF: + idpf_vf_dev_ops_init(adapter); + adapter->crc_enable = true; + break; + case IDPF_DEV_ID_PF: + idpf_dev_ops_init(adapter); + break; + default: + return ret; + } + + return 0; + } + + switch (ent->device) { + case IDPF_DEV_ID_PF: + idpf_dev_ops_init(adapter); + break; + case IDPF_DEV_ID_VF: + idpf_vf_dev_ops_init(adapter); + adapter->crc_enable = true; + break; + default: + return -ENODEV; + } + + return 0; +} + /** * idpf_remove - Device removal routine * @pdev: PCI device information struct @@ -165,21 +243,6 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->req_tx_splitq = true; adapter->req_rx_splitq = true; - switch (ent->device) { - case IDPF_DEV_ID_PF: - idpf_dev_ops_init(adapter); - break; - case IDPF_DEV_ID_VF: - idpf_vf_dev_ops_init(adapter); - adapter->crc_enable = true; - break; - default: - err = -ENODEV; - dev_err(&pdev->dev, "Unexpected dev ID 0x%x in idpf probe\n", - ent->device); - goto err_free; - } - adapter->pdev = pdev; err = pcim_enable_device(pdev); if (err) @@ -259,11 +322,18 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* setup msglvl */ adapter->msg_enable = netif_msg_init(-1, IDPF_AVAIL_NETIF_M); + err = idpf_dev_init(adapter, ent); + if (err) { + dev_err(&pdev->dev, "Unexpected dev ID 0x%x in idpf probe\n", + ent->device); + goto destroy_vc_event_wq; + } + err = idpf_cfg_hw(adapter); if (err) { dev_err(dev, "Failed to configure HW structure for adapter: %d\n", err); - goto err_cfg_hw; + goto destroy_vc_event_wq; } mutex_init(&adapter->vport_ctrl_lock); @@ -284,7 +354,7 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; -err_cfg_hw: +destroy_vc_event_wq: destroy_workqueue(adapter->vc_event_wq); err_vc_event_wq_alloc: destroy_workqueue(adapter->stats_wq); @@ -304,6 +374,7 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) static const struct pci_device_id idpf_pci_tbl[] = { { PCI_VDEVICE(INTEL, IDPF_DEV_ID_PF)}, { PCI_VDEVICE(INTEL, IDPF_DEV_ID_VF)}, + { PCI_DEVICE_CLASS(IDPF_CLASS_NETWORK_ETHERNET_PROGIF, ~0)}, { /* Sentinel */ } }; MODULE_DEVICE_TABLE(pci, idpf_pci_tbl); From 96baf482ca1f69f0da9d10a5bd8422c87ea9039e Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Tue, 4 Nov 2025 19:37:41 -0800 Subject: [PATCH 606/867] net: dsa: microchip: Fix reserved multicast address table programming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KSZ9477/KSZ9897 and LAN937X families of switches use a reserved multicast address table for some specific forwarding with some multicast addresses, like the one used in STP. The hardware assumes the host port is the last port in KSZ9897 family and port 5 in LAN937X family. Most of the time this assumption is correct but not in other cases like KSZ9477. Originally the function just setups the first entry, but the others still need update, especially for one common multicast address that is used by PTP operation. LAN937x also uses different register bits when accessing the reserved table. Fixes: 457c182af597 ("net: dsa: microchip: generic access to ksz9477 static and reserved table") Signed-off-by: Tristram Ha Tested-by: Łukasz Majewski Link: https://patch.msgid.link/20251105033741.6455-1-Tristram.Ha@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz9477.c | 98 +++++++++++++++++++++---- drivers/net/dsa/microchip/ksz9477_reg.h | 3 +- drivers/net/dsa/microchip/ksz_common.c | 4 + drivers/net/dsa/microchip/ksz_common.h | 2 + 4 files changed, 91 insertions(+), 16 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index d747ea1c41a79..5df8f153d511b 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -1355,9 +1355,15 @@ void ksz9477_config_cpu_port(struct dsa_switch *ds) } } +#define RESV_MCAST_CNT 8 + +static u8 reserved_mcast_map[RESV_MCAST_CNT] = { 0, 1, 3, 16, 32, 33, 2, 17 }; + int ksz9477_enable_stp_addr(struct ksz_device *dev) { + u8 i, ports, update; const u32 *masks; + bool override; u32 data; int ret; @@ -1366,23 +1372,87 @@ int ksz9477_enable_stp_addr(struct ksz_device *dev) /* Enable Reserved multicast table */ ksz_cfg(dev, REG_SW_LUE_CTRL_0, SW_RESV_MCAST_ENABLE, true); - /* Set the Override bit for forwarding BPDU packet to CPU */ - ret = ksz_write32(dev, REG_SW_ALU_VAL_B, - ALU_V_OVERRIDE | BIT(dev->cpu_port)); - if (ret < 0) - return ret; + /* The reserved multicast address table has 8 entries. Each entry has + * a default value of which port to forward. It is assumed the host + * port is the last port in most of the switches, but that is not the + * case for KSZ9477 or maybe KSZ9897. For LAN937X family the default + * port is port 5, the first RGMII port. It is okay for LAN9370, a + * 5-port switch, but may not be correct for the other 8-port + * versions. It is necessary to update the whole table to forward to + * the right ports. + * Furthermore PTP messages can use a reserved multicast address and + * the host will not receive them if this table is not correct. + */ + for (i = 0; i < RESV_MCAST_CNT; i++) { + data = reserved_mcast_map[i] << + dev->info->shifts[ALU_STAT_INDEX]; + data |= ALU_STAT_START | + masks[ALU_STAT_DIRECT] | + masks[ALU_RESV_MCAST_ADDR] | + masks[ALU_STAT_READ]; + ret = ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data); + if (ret < 0) + return ret; - data = ALU_STAT_START | ALU_RESV_MCAST_ADDR | masks[ALU_STAT_WRITE]; + /* wait to be finished */ + ret = ksz9477_wait_alu_sta_ready(dev); + if (ret < 0) + return ret; - ret = ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data); - if (ret < 0) - return ret; + ret = ksz_read32(dev, REG_SW_ALU_VAL_B, &data); + if (ret < 0) + return ret; - /* wait to be finished */ - ret = ksz9477_wait_alu_sta_ready(dev); - if (ret < 0) { - dev_err(dev->dev, "Failed to update Reserved Multicast table\n"); - return ret; + override = false; + ports = data & dev->port_mask; + switch (i) { + case 0: + case 6: + /* Change the host port. */ + update = BIT(dev->cpu_port); + override = true; + break; + case 2: + /* Change the host port. */ + update = BIT(dev->cpu_port); + break; + case 4: + case 5: + case 7: + /* Skip the host port. */ + update = dev->port_mask & ~BIT(dev->cpu_port); + break; + default: + update = ports; + break; + } + if (update != ports || override) { + data &= ~dev->port_mask; + data |= update; + /* Set Override bit to receive frame even when port is + * closed. + */ + if (override) + data |= ALU_V_OVERRIDE; + ret = ksz_write32(dev, REG_SW_ALU_VAL_B, data); + if (ret < 0) + return ret; + + data = reserved_mcast_map[i] << + dev->info->shifts[ALU_STAT_INDEX]; + data |= ALU_STAT_START | + masks[ALU_STAT_DIRECT] | + masks[ALU_RESV_MCAST_ADDR] | + masks[ALU_STAT_WRITE]; + ret = ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data); + if (ret < 0) + return ret; + + /* wait to be finished */ + ret = ksz9477_wait_alu_sta_ready(dev); + if (ret < 0) + return ret; + } } return 0; diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h index ff579920078ee..61ea11e3338e1 100644 --- a/drivers/net/dsa/microchip/ksz9477_reg.h +++ b/drivers/net/dsa/microchip/ksz9477_reg.h @@ -2,7 +2,7 @@ /* * Microchip KSZ9477 register definitions * - * Copyright (C) 2017-2024 Microchip Technology Inc. + * Copyright (C) 2017-2025 Microchip Technology Inc. */ #ifndef __KSZ9477_REGS_H @@ -397,7 +397,6 @@ #define ALU_RESV_MCAST_INDEX_M (BIT(6) - 1) #define ALU_STAT_START BIT(7) -#define ALU_RESV_MCAST_ADDR BIT(1) #define REG_SW_ALU_VAL_A 0x0420 diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index a962055bfdbd8..933ae8dc63378 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -808,6 +808,8 @@ static const u16 ksz9477_regs[] = { static const u32 ksz9477_masks[] = { [ALU_STAT_WRITE] = 0, [ALU_STAT_READ] = 1, + [ALU_STAT_DIRECT] = 0, + [ALU_RESV_MCAST_ADDR] = BIT(1), [P_MII_TX_FLOW_CTRL] = BIT(5), [P_MII_RX_FLOW_CTRL] = BIT(3), }; @@ -835,6 +837,8 @@ static const u8 ksz9477_xmii_ctrl1[] = { static const u32 lan937x_masks[] = { [ALU_STAT_WRITE] = 1, [ALU_STAT_READ] = 2, + [ALU_STAT_DIRECT] = BIT(3), + [ALU_RESV_MCAST_ADDR] = BIT(2), [P_MII_TX_FLOW_CTRL] = BIT(5), [P_MII_RX_FLOW_CTRL] = BIT(3), }; diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index a1eb39771bb99..c65188cd3c0a0 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -294,6 +294,8 @@ enum ksz_masks { DYNAMIC_MAC_TABLE_TIMESTAMP, ALU_STAT_WRITE, ALU_STAT_READ, + ALU_STAT_DIRECT, + ALU_RESV_MCAST_ADDR, P_MII_TX_FLOW_CTRL, P_MII_RX_FLOW_CTRL, }; From 067bf016e99ad72aa4ff869d6dec1fd62a9c6202 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 5 Nov 2025 07:26:20 +0000 Subject: [PATCH 607/867] bonding: fix NULL pointer dereference in actor_port_prio setting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Liang reported an issue where setting a slave’s actor_port_prio to predefined values such as 0, 255, or 65535 would cause a system crash. The problem occurs because in bond_opt_parse(), when the provided value matches a predefined table entry, the function returns that table entry, which does not contain slave information. Later, in bond_option_actor_port_prio_set(), calling bond_slave_get_rtnl() leads to a NULL pointer dereference. Since actor_port_prio is defined as a u16 and initialized to the default value of 255 in ad_initialize_port(), there is no need for the bond_actor_port_prio_tbl. Using the BOND_OPTFLAG_RAWVAL flag is sufficient. Fixes: 6b6dc81ee7e8 ("bonding: add support for per-port LACP actor priority") Reported-by: Liang Li Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20251105072620.164841-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_options.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 495a87f2ea7c8..384499c869b8d 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -225,13 +225,6 @@ static const struct bond_opt_value bond_ad_actor_sys_prio_tbl[] = { { NULL, -1, 0}, }; -static const struct bond_opt_value bond_actor_port_prio_tbl[] = { - { "minval", 0, BOND_VALFLAG_MIN}, - { "maxval", 65535, BOND_VALFLAG_MAX}, - { "default", 255, BOND_VALFLAG_DEFAULT}, - { NULL, -1, 0}, -}; - static const struct bond_opt_value bond_ad_user_port_key_tbl[] = { { "minval", 0, BOND_VALFLAG_MIN | BOND_VALFLAG_DEFAULT}, { "maxval", 1023, BOND_VALFLAG_MAX}, @@ -497,7 +490,7 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .id = BOND_OPT_ACTOR_PORT_PRIO, .name = "actor_port_prio", .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), - .values = bond_actor_port_prio_tbl, + .flags = BOND_OPTFLAG_RAWVAL, .set = bond_option_actor_port_prio_set, }, [BOND_OPT_AD_ACTOR_SYSTEM] = { From 0216721ce71252f60d89af49c8dff613358058d3 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Wed, 5 Nov 2025 08:49:55 +0100 Subject: [PATCH 608/867] lan966x: Fix sleeping in atomic context The following warning was seen when we try to connect using ssh to the device. BUG: sleeping function called from invalid context at kernel/locking/mutex.c:575 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 104, name: dropbear preempt_count: 1, expected: 0 INFO: lockdep is turned off. CPU: 0 UID: 0 PID: 104 Comm: dropbear Tainted: G W 6.18.0-rc2-00399-g6f1ab1b109b9-dirty #530 NONE Tainted: [W]=WARN Hardware name: Generic DT based system Call trace: unwind_backtrace from show_stack+0x10/0x14 show_stack from dump_stack_lvl+0x7c/0xac dump_stack_lvl from __might_resched+0x16c/0x2b0 __might_resched from __mutex_lock+0x64/0xd34 __mutex_lock from mutex_lock_nested+0x1c/0x24 mutex_lock_nested from lan966x_stats_get+0x5c/0x558 lan966x_stats_get from dev_get_stats+0x40/0x43c dev_get_stats from dev_seq_printf_stats+0x3c/0x184 dev_seq_printf_stats from dev_seq_show+0x10/0x30 dev_seq_show from seq_read_iter+0x350/0x4ec seq_read_iter from seq_read+0xfc/0x194 seq_read from proc_reg_read+0xac/0x100 proc_reg_read from vfs_read+0xb0/0x2b0 vfs_read from ksys_read+0x6c/0xec ksys_read from ret_fast_syscall+0x0/0x1c Exception stack(0xf0b11fa8 to 0xf0b11ff0) 1fa0: 00000001 00001000 00000008 be9048d8 00001000 00000001 1fc0: 00000001 00001000 00000008 00000003 be905920 0000001e 00000000 00000001 1fe0: 0005404c be9048c0 00018684 b6ec2cd8 It seems that we are using a mutex in a atomic context which is wrong. Change the mutex with a spinlock. Fixes: 12c2d0a5b8e2 ("net: lan966x: add ethtool configuration and statistics") Signed-off-by: Horatiu Vultur Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20251105074955.1766792-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- .../microchip/lan966x/lan966x_ethtool.c | 18 +++++++++--------- .../ethernet/microchip/lan966x/lan966x_main.c | 2 -- .../ethernet/microchip/lan966x/lan966x_main.h | 4 ++-- .../microchip/lan966x/lan966x_vcap_impl.c | 8 ++++---- 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c index 2474dfd330f46..fe4e614052840 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c @@ -294,7 +294,7 @@ static void lan966x_stats_update(struct lan966x *lan966x) { int i, j; - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); for (i = 0; i < lan966x->num_phys_ports; i++) { uint idx = i * lan966x->num_stats; @@ -310,7 +310,7 @@ static void lan966x_stats_update(struct lan966x *lan966x) } } - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); } static int lan966x_get_sset_count(struct net_device *dev, int sset) @@ -365,7 +365,7 @@ static void lan966x_get_eth_mac_stats(struct net_device *dev, idx = port->chip_port * lan966x->num_stats; - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); mac_stats->FramesTransmittedOK = lan966x->stats[idx + SYS_COUNT_TX_UC] + @@ -416,7 +416,7 @@ static void lan966x_get_eth_mac_stats(struct net_device *dev, lan966x->stats[idx + SYS_COUNT_RX_LONG] + lan966x->stats[idx + SYS_COUNT_RX_PMAC_LONG]; - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); } static const struct ethtool_rmon_hist_range lan966x_rmon_ranges[] = { @@ -442,7 +442,7 @@ static void lan966x_get_eth_rmon_stats(struct net_device *dev, idx = port->chip_port * lan966x->num_stats; - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); rmon_stats->undersize_pkts = lan966x->stats[idx + SYS_COUNT_RX_SHORT] + @@ -500,7 +500,7 @@ static void lan966x_get_eth_rmon_stats(struct net_device *dev, lan966x->stats[idx + SYS_COUNT_TX_SZ_1024_1526] + lan966x->stats[idx + SYS_COUNT_TX_PMAC_SZ_1024_1526]; - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); *ranges = lan966x_rmon_ranges; } @@ -603,7 +603,7 @@ void lan966x_stats_get(struct net_device *dev, idx = port->chip_port * lan966x->num_stats; - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); stats->rx_bytes = lan966x->stats[idx + SYS_COUNT_RX_OCT] + lan966x->stats[idx + SYS_COUNT_RX_PMAC_OCT]; @@ -685,7 +685,7 @@ void lan966x_stats_get(struct net_device *dev, stats->collisions = lan966x->stats[idx + SYS_COUNT_TX_COL]; - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); } int lan966x_stats_init(struct lan966x *lan966x) @@ -701,7 +701,7 @@ int lan966x_stats_init(struct lan966x *lan966x) return -ENOMEM; /* Init stats worker */ - mutex_init(&lan966x->stats_lock); + spin_lock_init(&lan966x->stats_lock); snprintf(queue_name, sizeof(queue_name), "%s-stats", dev_name(lan966x->dev)); lan966x->stats_queue = create_singlethread_workqueue(queue_name); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 7001584f1b7a6..47752d3fde0b1 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -1261,7 +1261,6 @@ static int lan966x_probe(struct platform_device *pdev) cancel_delayed_work_sync(&lan966x->stats_work); destroy_workqueue(lan966x->stats_queue); - mutex_destroy(&lan966x->stats_lock); debugfs_remove_recursive(lan966x->debugfs_root); @@ -1279,7 +1278,6 @@ static void lan966x_remove(struct platform_device *pdev) cancel_delayed_work_sync(&lan966x->stats_work); destroy_workqueue(lan966x->stats_queue); - mutex_destroy(&lan966x->stats_lock); lan966x_mac_purge_entries(lan966x); lan966x_mdb_deinit(lan966x); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index 4f75f06883693..eea286c29474f 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -295,8 +295,8 @@ struct lan966x { const struct lan966x_stat_layout *stats_layout; u32 num_stats; - /* workqueue for reading stats */ - struct mutex stats_lock; + /* lock for reading stats */ + spinlock_t stats_lock; u64 *stats; struct delayed_work stats_work; struct workqueue_struct *stats_queue; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c index a1471e38d1189..2a37fc1ba4bcd 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c @@ -403,11 +403,11 @@ static void lan966x_es0_read_esdx_counter(struct lan966x *lan966x, u32 counter; id = id & 0xff; /* counter limit */ - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); lan_wr(SYS_STAT_CFG_STAT_VIEW_SET(id), lan966x, SYS_STAT_CFG); counter = lan_rd(lan966x, SYS_CNT(LAN966X_STAT_ESDX_GRN_PKTS)) + lan_rd(lan966x, SYS_CNT(LAN966X_STAT_ESDX_YEL_PKTS)); - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); if (counter) admin->cache.counter = counter; } @@ -417,14 +417,14 @@ static void lan966x_es0_write_esdx_counter(struct lan966x *lan966x, { id = id & 0xff; /* counter limit */ - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); lan_wr(SYS_STAT_CFG_STAT_VIEW_SET(id), lan966x, SYS_STAT_CFG); lan_wr(0, lan966x, SYS_CNT(LAN966X_STAT_ESDX_GRN_BYTES)); lan_wr(admin->cache.counter, lan966x, SYS_CNT(LAN966X_STAT_ESDX_GRN_PKTS)); lan_wr(0, lan966x, SYS_CNT(LAN966X_STAT_ESDX_YEL_BYTES)); lan_wr(0, lan966x, SYS_CNT(LAN966X_STAT_ESDX_YEL_PKTS)); - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); } static void lan966x_vcap_cache_write(struct net_device *dev, From 8dca36978aa80bab9d4da130c211db75c9e00048 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 5 Nov 2025 13:19:18 +0200 Subject: [PATCH 609/867] net: bridge: fix use-after-free due to MST port state bypass syzbot reported[1] a use-after-free when deleting an expired fdb. It is due to a race condition between learning still happening and a port being deleted, after all its fdbs have been flushed. The port's state has been toggled to disabled so no learning should happen at that time, but if we have MST enabled, it will bypass the port's state, that together with VLAN filtering disabled can lead to fdb learning at a time when it shouldn't happen while the port is being deleted. VLAN filtering must be disabled because we flush the port VLANs when it's being deleted which will stop learning. This fix adds a check for the port's vlan group which is initialized to NULL when the port is getting deleted, that avoids the port state bypass. When MST is enabled there would be a minimal new overhead in the fast-path because the port's vlan group pointer is cache-hot. [1] https://syzkaller.appspot.com/bug?extid=dd280197f0f7ab3917be Fixes: ec7328b59176 ("net: bridge: mst: Multiple Spanning Tree (MST) mode") Reported-by: syzbot+dd280197f0f7ab3917be@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/69088ffa.050a0220.29fc44.003d.GAE@google.com/ Signed-off-by: Nikolay Aleksandrov Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20251105111919.1499702-2-razor@blackwall.org Signed-off-by: Jakub Kicinski --- net/bridge/br_forward.c | 2 +- net/bridge/br_input.c | 4 ++-- net/bridge/br_private.h | 8 +++++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 870bdf2e082c4..dea09096ad0fb 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -25,7 +25,7 @@ static inline int should_deliver(const struct net_bridge_port *p, vg = nbp_vlan_group_rcu(p); return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && - (br_mst_is_enabled(p->br) || p->state == BR_STATE_FORWARDING) && + (br_mst_is_enabled(p) || p->state == BR_STATE_FORWARDING) && br_allowed_egress(vg, skb) && nbp_switchdev_allowed_egress(p, skb) && !br_skb_isolated(p, skb); } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 67b4c905e49a2..777fa869c1a14 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -94,7 +94,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb br = p->br; - if (br_mst_is_enabled(br)) { + if (br_mst_is_enabled(p)) { state = BR_STATE_FORWARDING; } else { if (p->state == BR_STATE_DISABLED) { @@ -429,7 +429,7 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb) return RX_HANDLER_PASS; forward: - if (br_mst_is_enabled(p->br)) + if (br_mst_is_enabled(p)) goto defer_stp_filtering; switch (p->state) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 16be5d2504023..b571d6f613890 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1935,10 +1935,12 @@ static inline bool br_vlan_state_allowed(u8 state, bool learn_allow) /* br_mst.c */ #ifdef CONFIG_BRIDGE_VLAN_FILTERING DECLARE_STATIC_KEY_FALSE(br_mst_used); -static inline bool br_mst_is_enabled(struct net_bridge *br) +static inline bool br_mst_is_enabled(const struct net_bridge_port *p) { + /* check the port's vlan group to avoid racing with port deletion */ return static_branch_unlikely(&br_mst_used) && - br_opt_get(br, BROPT_MST_ENABLED); + br_opt_get(p->br, BROPT_MST_ENABLED) && + rcu_access_pointer(p->vlgrp); } int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, @@ -1953,7 +1955,7 @@ int br_mst_fill_info(struct sk_buff *skb, int br_mst_process(struct net_bridge_port *p, const struct nlattr *mst_attr, struct netlink_ext_ack *extack); #else -static inline bool br_mst_is_enabled(struct net_bridge *br) +static inline bool br_mst_is_enabled(const struct net_bridge_port *p) { return false; } From ee87c63f9b2a418f698d79c2991347e31a7d2c27 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 5 Nov 2025 13:19:19 +0200 Subject: [PATCH 610/867] net: bridge: fix MST static key usage As Ido pointed out, the static key usage in MST is buggy and should use inc/dec instead of enable/disable because we can have multiple bridges with MST enabled which means a single bridge can disable MST for all. Use static_branch_inc/dec to avoid that. When destroying a bridge decrement the key if MST was enabled. Fixes: ec7328b59176 ("net: bridge: mst: Multiple Spanning Tree (MST) mode") Reported-by: Ido Schimmel Closes: https://lore.kernel.org/netdev/20251104120313.1306566-1-razor@blackwall.org/T/#m6888d87658f94ed1725433940f4f4ebb00b5a68b Signed-off-by: Nikolay Aleksandrov Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20251105111919.1499702-3-razor@blackwall.org Signed-off-by: Jakub Kicinski --- net/bridge/br_if.c | 1 + net/bridge/br_mst.c | 10 ++++++++-- net/bridge/br_private.h | 5 +++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 98c5b9c3145f3..ca3a637d7cca7 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -386,6 +386,7 @@ void br_dev_delete(struct net_device *dev, struct list_head *head) del_nbp(p); } + br_mst_uninit(br); br_recalculate_neigh_suppress_enabled(br); br_fdb_delete_by_port(br, NULL, 0, 1); diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c index 3f24b4ee49c27..43a300ae6bfaf 100644 --- a/net/bridge/br_mst.c +++ b/net/bridge/br_mst.c @@ -22,6 +22,12 @@ bool br_mst_enabled(const struct net_device *dev) } EXPORT_SYMBOL_GPL(br_mst_enabled); +void br_mst_uninit(struct net_bridge *br) +{ + if (br_opt_get(br, BROPT_MST_ENABLED)) + static_branch_dec(&br_mst_used); +} + int br_mst_get_info(const struct net_device *dev, u16 msti, unsigned long *vids) { const struct net_bridge_vlan_group *vg; @@ -225,9 +231,9 @@ int br_mst_set_enabled(struct net_bridge *br, bool on, return err; if (on) - static_branch_enable(&br_mst_used); + static_branch_inc(&br_mst_used); else - static_branch_disable(&br_mst_used); + static_branch_dec(&br_mst_used); br_opt_toggle(br, BROPT_MST_ENABLED, on); return 0; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index b571d6f613890..7280c4e9305f3 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1954,6 +1954,7 @@ int br_mst_fill_info(struct sk_buff *skb, const struct net_bridge_vlan_group *vg); int br_mst_process(struct net_bridge_port *p, const struct nlattr *mst_attr, struct netlink_ext_ack *extack); +void br_mst_uninit(struct net_bridge *br); #else static inline bool br_mst_is_enabled(const struct net_bridge_port *p) { @@ -1989,6 +1990,10 @@ static inline int br_mst_process(struct net_bridge_port *p, { return -EOPNOTSUPP; } + +static inline void br_mst_uninit(struct net_bridge *br) +{ +} #endif struct nf_br_ops { From 3534e03e0ec2e00908765549828a69df5ebefb91 Mon Sep 17 00:00:00 2001 From: Bobby Eshleman Date: Wed, 5 Nov 2025 07:59:19 -0800 Subject: [PATCH 611/867] selftests/vsock: avoid false-positives when checking dmesg Sometimes VMs will have some intermittent dmesg warnings that are unrelated to vsock. Change the dmesg parsing to filter on strings containing 'vsock' to avoid false positive failures that are unrelated to vsock. The downside is that it is possible for some vsock related warnings to not contain the substring 'vsock', so those will be missed. Fixes: a4a65c6fe08b ("selftests/vsock: add initial vmtest.sh for vsock") Reviewed-by: Simon Horman Signed-off-by: Bobby Eshleman Reviewed-by: Stefano Garzarella Link: https://patch.msgid.link/20251105-vsock-vmtest-dmesg-fix-v2-1-1a042a14892c@meta.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/vsock/vmtest.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh index edacebfc16325..8ceeb8a7894f5 100755 --- a/tools/testing/selftests/vsock/vmtest.sh +++ b/tools/testing/selftests/vsock/vmtest.sh @@ -389,9 +389,9 @@ run_test() { local rc host_oops_cnt_before=$(dmesg | grep -c -i 'Oops') - host_warn_cnt_before=$(dmesg --level=warn | wc -l) + host_warn_cnt_before=$(dmesg --level=warn | grep -c -i 'vsock') vm_oops_cnt_before=$(vm_ssh -- dmesg | grep -c -i 'Oops') - vm_warn_cnt_before=$(vm_ssh -- dmesg --level=warn | wc -l) + vm_warn_cnt_before=$(vm_ssh -- dmesg --level=warn | grep -c -i 'vsock') name=$(echo "${1}" | awk '{ print $1 }') eval test_"${name}" @@ -403,7 +403,7 @@ run_test() { rc=$KSFT_FAIL fi - host_warn_cnt_after=$(dmesg --level=warn | wc -l) + host_warn_cnt_after=$(dmesg --level=warn | grep -c -i 'vsock') if [[ ${host_warn_cnt_after} -gt ${host_warn_cnt_before} ]]; then echo "FAIL: kernel warning detected on host" | log_host "${name}" rc=$KSFT_FAIL @@ -415,7 +415,7 @@ run_test() { rc=$KSFT_FAIL fi - vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | wc -l) + vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | grep -c -i 'vsock') if [[ ${vm_warn_cnt_after} -gt ${vm_warn_cnt_before} ]]; then echo "FAIL: kernel warning detected on vm" | log_host "${name}" rc=$KSFT_FAIL From 9352d40c8bcd2ef29366d2c38b163c0b115039ed Mon Sep 17 00:00:00 2001 From: Mohammad Heib Date: Sat, 25 Oct 2025 16:08:58 +0300 Subject: [PATCH 612/867] devlink: Add new "max_mac_per_vf" generic device param Add a new device generic parameter to controls the maximum number of MAC filters allowed per VF. For example, to limit a VF to 3 MAC addresses: $ devlink dev param set pci/0000:3b:00.0 name max_mac_per_vf \ value 3 \ cmode runtime Signed-off-by: Mohammad Heib Reviewed-by: Simon Horman Signed-off-by: Jacob Keller Signed-off-by: Tony Nguyen --- Documentation/networking/devlink/devlink-params.rst | 4 ++++ include/net/devlink.h | 4 ++++ net/devlink/param.c | 5 +++++ 3 files changed, 13 insertions(+) diff --git a/Documentation/networking/devlink/devlink-params.rst b/Documentation/networking/devlink/devlink-params.rst index 0a9c20d701225..c0597d456641d 100644 --- a/Documentation/networking/devlink/devlink-params.rst +++ b/Documentation/networking/devlink/devlink-params.rst @@ -151,3 +151,7 @@ own name. * - ``num_doorbells`` - u32 - Controls the number of doorbells used by the device. + * - ``max_mac_per_vf`` + - u32 + - Controls the maximum number of MAC address filters that can be assigned + to a Virtual Function (VF). diff --git a/include/net/devlink.h b/include/net/devlink.h index 9e824f61e40f6..d01046ef0577c 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -532,6 +532,7 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_CLOCK_ID, DEVLINK_PARAM_GENERIC_ID_TOTAL_VFS, DEVLINK_PARAM_GENERIC_ID_NUM_DOORBELLS, + DEVLINK_PARAM_GENERIC_ID_MAX_MAC_PER_VF, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -602,6 +603,9 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_NUM_DOORBELLS_NAME "num_doorbells" #define DEVLINK_PARAM_GENERIC_NUM_DOORBELLS_TYPE DEVLINK_PARAM_TYPE_U32 +#define DEVLINK_PARAM_GENERIC_MAX_MAC_PER_VF_NAME "max_mac_per_vf" +#define DEVLINK_PARAM_GENERIC_MAX_MAC_PER_VF_TYPE DEVLINK_PARAM_TYPE_U32 + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ diff --git a/net/devlink/param.c b/net/devlink/param.c index 70e69523412cb..6b233b13b69ae 100644 --- a/net/devlink/param.c +++ b/net/devlink/param.c @@ -112,6 +112,11 @@ static const struct devlink_param devlink_param_generic[] = { .name = DEVLINK_PARAM_GENERIC_NUM_DOORBELLS_NAME, .type = DEVLINK_PARAM_GENERIC_NUM_DOORBELLS_TYPE, }, + { + .id = DEVLINK_PARAM_GENERIC_ID_MAX_MAC_PER_VF, + .name = DEVLINK_PARAM_GENERIC_MAX_MAC_PER_VF_NAME, + .type = DEVLINK_PARAM_GENERIC_MAX_MAC_PER_VF_TYPE, + }, }; static int devlink_param_generic_verify(const struct devlink_param *param) From 2c031d4c772f3a9191d04d57a3403ad6a56375c7 Mon Sep 17 00:00:00 2001 From: Mohammad Heib Date: Sat, 25 Oct 2025 16:08:59 +0300 Subject: [PATCH 613/867] i40e: support generic devlink param "max_mac_per_vf" Currently the i40e driver enforces its own internally calculated per-VF MAC filter limit, derived from the number of allocated VFs and available hardware resources. This limit is not configurable by the administrator, which makes it difficult to control how many MAC addresses each VF may use. This patch adds support for the new generic devlink runtime parameter "max_mac_per_vf" which provides administrators with a way to cap the number of MAC addresses a VF can use: - When the parameter is set to 0 (default), the driver continues to use its internally calculated limit. - When set to a non-zero value, the driver applies this value as a strict cap for VFs, overriding the internal calculation. Important notes: - The configured value is a theoretical maximum. Hardware limits may still prevent additional MAC addresses from being added, even if the parameter allows it. - Since MAC filters are a shared hardware resource across all VFs, setting a high value may cause resource contention and starve other VFs. - This change gives administrators predictable and flexible control over VF resource allocation, while still respecting hardware limitations. - Previous discussion about this change: https://lore.kernel.org/netdev/20250805134042.2604897-2-dhill@redhat.com https://lore.kernel.org/netdev/20250823094952.182181-1-mheib@redhat.com Signed-off-by: Mohammad Heib Reviewed-by: Jacob Keller Reviewed-by: Aleksandr Loktionov Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Jacob Keller Signed-off-by: Tony Nguyen --- Documentation/networking/devlink/i40e.rst | 34 ++++++++++++ drivers/net/ethernet/intel/i40e/i40e.h | 4 ++ .../net/ethernet/intel/i40e/i40e_devlink.c | 54 ++++++++++++++++++- .../ethernet/intel/i40e/i40e_virtchnl_pf.c | 31 ++++++++--- 4 files changed, 113 insertions(+), 10 deletions(-) diff --git a/Documentation/networking/devlink/i40e.rst b/Documentation/networking/devlink/i40e.rst index d3cb5bb5197e9..51c887f0dc833 100644 --- a/Documentation/networking/devlink/i40e.rst +++ b/Documentation/networking/devlink/i40e.rst @@ -7,6 +7,40 @@ i40e devlink support This document describes the devlink features implemented by the ``i40e`` device driver. +Parameters +========== + +.. list-table:: Generic parameters implemented + :widths: 5 5 90 + + * - Name + - Mode + - Notes + * - ``max_mac_per_vf`` + - runtime + - Controls the maximum number of MAC addresses a VF can use + on i40e devices. + + By default (``0``), the driver enforces its internally calculated per-VF + MAC filter limit, which is based on the number of allocated VFS. + + If set to a non-zero value, this parameter acts as a strict cap: + the driver will use the user-provided value instead of its internal + calculation. + + **Important notes:** + + - This value **must be set before enabling SR-IOV**. + Attempting to change it while SR-IOV is enabled will return an error. + - MAC filters are a **shared hardware resource** across all VFs. + Setting a high value may cause other VFs to be starved of filters. + - This value is a **Administrative policy**. The hardware may return + errors when its absolute limit is reached, regardless of the value + set here. + + The default value is ``0`` (internal calculation is used). + + Info versions ============= diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 801a57a925dad..d2d03db2acec6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -574,6 +574,10 @@ struct i40e_pf { struct i40e_vf *vf; int num_alloc_vfs; /* actual number of VFs allocated */ u32 vf_aq_requests; + /* If set to non-zero, the device uses this value + * as maximum number of MAC filters per VF. + */ + u32 max_mac_per_vf; u32 arq_overflows; /* Not fatal, possibly indicative of problems */ struct ratelimit_state mdd_message_rate_limit; /* DCBx/DCBNL capability for PF that indicates diff --git a/drivers/net/ethernet/intel/i40e/i40e_devlink.c b/drivers/net/ethernet/intel/i40e/i40e_devlink.c index cc4e9e2addb75..bc205e3077c7f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_devlink.c +++ b/drivers/net/ethernet/intel/i40e/i40e_devlink.c @@ -5,6 +5,41 @@ #include "i40e.h" #include "i40e_devlink.h" +static int i40e_max_mac_per_vf_set(struct devlink *devlink, + u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) +{ + struct i40e_pf *pf = devlink_priv(devlink); + + if (pf->num_alloc_vfs > 0) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot change max_mac_per_vf while SR-IOV is enabled"); + return -EBUSY; + } + + pf->max_mac_per_vf = ctx->val.vu32; + return 0; +} + +static int i40e_max_mac_per_vf_get(struct devlink *devlink, + u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct i40e_pf *pf = devlink_priv(devlink); + + ctx->val.vu32 = pf->max_mac_per_vf; + return 0; +} + +static const struct devlink_param i40e_dl_params[] = { + DEVLINK_PARAM_GENERIC(MAX_MAC_PER_VF, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + i40e_max_mac_per_vf_get, + i40e_max_mac_per_vf_set, + NULL), +}; + static void i40e_info_get_dsn(struct i40e_pf *pf, char *buf, size_t len) { u8 dsn[8]; @@ -165,7 +200,18 @@ void i40e_free_pf(struct i40e_pf *pf) **/ void i40e_devlink_register(struct i40e_pf *pf) { - devlink_register(priv_to_devlink(pf)); + struct devlink *dl = priv_to_devlink(pf); + struct device *dev = &pf->pdev->dev; + int err; + + err = devlink_params_register(dl, i40e_dl_params, + ARRAY_SIZE(i40e_dl_params)); + if (err) + dev_err(dev, + "devlink params register failed with error %d", err); + + devlink_register(dl); + } /** @@ -176,7 +222,11 @@ void i40e_devlink_register(struct i40e_pf *pf) **/ void i40e_devlink_unregister(struct i40e_pf *pf) { - devlink_unregister(priv_to_devlink(pf)); + struct devlink *dl = priv_to_devlink(pf); + + devlink_unregister(dl); + devlink_params_unregister(dl, i40e_dl_params, + ARRAY_SIZE(i40e_dl_params)); } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 0fe0d52c796b3..9d91a382612dc 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2935,33 +2935,48 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf, if (!f) ++mac_add_cnt; } - - /* If this VF is not privileged, then we can't add more than a limited - * number of addresses. + /* Determine the maximum number of MAC addresses this VF may use. + * + * - For untrusted VFs: use a fixed small limit. + * + * - For trusted VFs: limit is calculated by dividing total MAC + * filter pool across all VFs/ports. * - * If this VF is trusted, it can use more resources than untrusted. - * However to ensure that every trusted VF has appropriate number of - * resources, divide whole pool of resources per port and then across - * all VFs. + * - User can override this by devlink param "max_mac_per_vf". + * If set its value is used as a strict cap for both trusted and + * untrusted VFs. + * Note: + * even when overridden, this is a theoretical maximum; hardware + * may reject additional MACs if the absolute HW limit is reached. */ if (!vf_trusted) mac_add_max = I40E_VC_MAX_MAC_ADDR_PER_VF; else mac_add_max = I40E_VC_MAX_MACVLAN_PER_TRUSTED_VF(pf->num_alloc_vfs, hw->num_ports); + if (pf->max_mac_per_vf > 0) + mac_add_max = pf->max_mac_per_vf; + /* VF can replace all its filters in one step, in this case mac_add_max * will be added as active and another mac_add_max will be in * a to-be-removed state. Account for that. */ if ((i40e_count_active_filters(vsi) + mac_add_cnt) > mac_add_max || (i40e_count_all_filters(vsi) + mac_add_cnt) > 2 * mac_add_max) { + if (pf->max_mac_per_vf == mac_add_max && mac_add_max > 0) { + dev_err(&pf->pdev->dev, + "Cannot add more MAC addresses: VF reached its maximum allowed limit (%d)\n", + mac_add_max); + return -EPERM; + } if (!vf_trusted) { dev_err(&pf->pdev->dev, "Cannot add more MAC addresses, VF is not trusted, switch the VF to trusted to add more functionality\n"); return -EPERM; } else { dev_err(&pf->pdev->dev, - "Cannot add more MAC addresses, trusted VF exhausted it's resources\n"); + "Cannot add more MAC addresses: trusted VF reached its maximum allowed limit (%d)\n", + mac_add_max); return -EPERM; } } From 322a1e6f3d687d3b116837036299abeb4d79ba76 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Mon, 3 Nov 2025 12:18:41 +0000 Subject: [PATCH 614/867] net: dsa: lantiq_gswip: split into common and MMIO parts Move all parts specific for the MMIO/SoC driver into a module of its own to prepare for supporting MDIO-connected switch ICs. Modify gswip_probe() functions by splitting it into a common function gswip_probe_common() which covers allocating, initializing and registering the DSA switch, while keeping transport-specific regmap initialization as well as PHY firmware loading in the new MMIO/SoC-specific gswip_probe() function. Signed-off-by: Daniel Golle Tested-by: Alexander Sverdlin Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/dc7da5b65ec220ba8e9bc4bd04fe1ed7de046656.1762170107.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/Kconfig | 6 +- drivers/net/dsa/lantiq/Makefile | 1 + drivers/net/dsa/lantiq/lantiq_gswip.c | 1617 +---------------- drivers/net/dsa/lantiq/lantiq_gswip.h | 4 + drivers/net/dsa/lantiq/lantiq_gswip_common.c | 1622 ++++++++++++++++++ 5 files changed, 1658 insertions(+), 1592 deletions(-) create mode 100644 drivers/net/dsa/lantiq/lantiq_gswip_common.c diff --git a/drivers/net/dsa/lantiq/Kconfig b/drivers/net/dsa/lantiq/Kconfig index 3cfa16840cf59..78db82a47d092 100644 --- a/drivers/net/dsa/lantiq/Kconfig +++ b/drivers/net/dsa/lantiq/Kconfig @@ -1,8 +1,12 @@ +config NET_DSA_LANTIQ_COMMON + tristate + select REGMAP + config NET_DSA_LANTIQ_GSWIP tristate "Lantiq / Intel GSWIP" depends on HAS_IOMEM select NET_DSA_TAG_GSWIP - select REGMAP + select NET_DSA_LANTIQ_COMMON help This enables support for the Lantiq / Intel GSWIP 2.1 found in the xrx200 / VR9 SoC. diff --git a/drivers/net/dsa/lantiq/Makefile b/drivers/net/dsa/lantiq/Makefile index 849f85ebebd6c..65ffa7bb71aa7 100644 --- a/drivers/net/dsa/lantiq/Makefile +++ b/drivers/net/dsa/lantiq/Makefile @@ -1 +1,2 @@ obj-$(CONFIG_NET_DSA_LANTIQ_GSWIP) += lantiq_gswip.o +obj-$(CONFIG_NET_DSA_LANTIQ_COMMON) += lantiq_gswip_common.o diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c index 38f7f6352e8d0..57dd063c07403 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip.c @@ -2,1226 +2,32 @@ /* * Lantiq / Intel GSWIP switch driver for VRX200, xRX300 and xRX330 SoCs * - * Copyright (C) 2010 Lantiq Deutschland - * Copyright (C) 2012 John Crispin - * Copyright (C) 2017 - 2019 Hauke Mehrtens - * - * The VLAN and bridge model the GSWIP hardware uses does not directly - * matches the model DSA uses. - * - * The hardware has 64 possible table entries for bridges with one VLAN - * ID, one flow id and a list of ports for each bridge. All entries which - * match the same flow ID are combined in the mac learning table, they - * act as one global bridge. - * The hardware does not support VLAN filter on the port, but on the - * bridge, this driver converts the DSA model to the hardware. - * - * The CPU gets all the exception frames which do not match any forwarding - * rule and the CPU port is also added to all bridges. This makes it possible - * to handle all the special cases easily in software. - * At the initialization the driver allocates one bridge table entry for - * each switch port which is used when the port is used without an - * explicit bridge. This prevents the frames from being forwarded - * between all LAN ports by default. - */ - -#include "lantiq_gswip.h" -#include "lantiq_pce.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct xway_gphy_match_data { - char *fe_firmware_name; - char *ge_firmware_name; -}; - -struct gswip_pce_table_entry { - u16 index; // PCE_TBL_ADDR.ADDR = pData->table_index - u16 table; // PCE_TBL_CTRL.ADDR = pData->table - u16 key[8]; - u16 val[5]; - u16 mask; - u8 gmap; - bool type; - bool valid; - bool key_mode; -}; - -struct gswip_rmon_cnt_desc { - unsigned int size; - unsigned int offset; - const char *name; -}; - -#define MIB_DESC(_size, _offset, _name) {.size = _size, .offset = _offset, .name = _name} - -static const struct gswip_rmon_cnt_desc gswip_rmon_cnt[] = { - /** Receive Packet Count (only packets that are accepted and not discarded). */ - MIB_DESC(1, 0x1F, "RxGoodPkts"), - MIB_DESC(1, 0x23, "RxUnicastPkts"), - MIB_DESC(1, 0x22, "RxMulticastPkts"), - MIB_DESC(1, 0x21, "RxFCSErrorPkts"), - MIB_DESC(1, 0x1D, "RxUnderSizeGoodPkts"), - MIB_DESC(1, 0x1E, "RxUnderSizeErrorPkts"), - MIB_DESC(1, 0x1B, "RxOversizeGoodPkts"), - MIB_DESC(1, 0x1C, "RxOversizeErrorPkts"), - MIB_DESC(1, 0x20, "RxGoodPausePkts"), - MIB_DESC(1, 0x1A, "RxAlignErrorPkts"), - MIB_DESC(1, 0x12, "Rx64BytePkts"), - MIB_DESC(1, 0x13, "Rx127BytePkts"), - MIB_DESC(1, 0x14, "Rx255BytePkts"), - MIB_DESC(1, 0x15, "Rx511BytePkts"), - MIB_DESC(1, 0x16, "Rx1023BytePkts"), - /** Receive Size 1024-1522 (or more, if configured) Packet Count. */ - MIB_DESC(1, 0x17, "RxMaxBytePkts"), - MIB_DESC(1, 0x18, "RxDroppedPkts"), - MIB_DESC(1, 0x19, "RxFilteredPkts"), - MIB_DESC(2, 0x24, "RxGoodBytes"), - MIB_DESC(2, 0x26, "RxBadBytes"), - MIB_DESC(1, 0x11, "TxAcmDroppedPkts"), - MIB_DESC(1, 0x0C, "TxGoodPkts"), - MIB_DESC(1, 0x06, "TxUnicastPkts"), - MIB_DESC(1, 0x07, "TxMulticastPkts"), - MIB_DESC(1, 0x00, "Tx64BytePkts"), - MIB_DESC(1, 0x01, "Tx127BytePkts"), - MIB_DESC(1, 0x02, "Tx255BytePkts"), - MIB_DESC(1, 0x03, "Tx511BytePkts"), - MIB_DESC(1, 0x04, "Tx1023BytePkts"), - /** Transmit Size 1024-1522 (or more, if configured) Packet Count. */ - MIB_DESC(1, 0x05, "TxMaxBytePkts"), - MIB_DESC(1, 0x08, "TxSingleCollCount"), - MIB_DESC(1, 0x09, "TxMultCollCount"), - MIB_DESC(1, 0x0A, "TxLateCollCount"), - MIB_DESC(1, 0x0B, "TxExcessCollCount"), - MIB_DESC(1, 0x0D, "TxPauseCount"), - MIB_DESC(1, 0x10, "TxDroppedPkts"), - MIB_DESC(2, 0x0E, "TxGoodBytes"), -}; - -static u32 gswip_switch_r_timeout(struct gswip_priv *priv, u32 offset, - u32 cleared) -{ - u32 val; - - return regmap_read_poll_timeout(priv->gswip, offset, val, - !(val & cleared), 20, 50000); -} - -static void gswip_mii_mask_cfg(struct gswip_priv *priv, u32 mask, u32 set, - int port) -{ - int reg_port; - - /* MII_CFG register only exists for MII ports */ - if (!(priv->hw_info->mii_ports & BIT(port))) - return; - - reg_port = port + priv->hw_info->mii_port_reg_offset; - - regmap_write_bits(priv->mii, GSWIP_MII_CFGp(reg_port), mask, - set); -} - -static void gswip_mii_mask_pcdu(struct gswip_priv *priv, u32 mask, u32 set, - int port) -{ - int reg_port; - - /* MII_PCDU register only exists for MII ports */ - if (!(priv->hw_info->mii_ports & BIT(port))) - return; - - reg_port = port + priv->hw_info->mii_port_reg_offset; - - switch (reg_port) { - case 0: - regmap_write_bits(priv->mii, GSWIP_MII_PCDU0, mask, set); - break; - case 1: - regmap_write_bits(priv->mii, GSWIP_MII_PCDU1, mask, set); - break; - case 5: - regmap_write_bits(priv->mii, GSWIP_MII_PCDU5, mask, set); - break; - } -} - -static int gswip_mdio_poll(struct gswip_priv *priv) -{ - u32 ctrl; - - return regmap_read_poll_timeout(priv->mdio, GSWIP_MDIO_CTRL, ctrl, - !(ctrl & GSWIP_MDIO_CTRL_BUSY), 40, 4000); -} - -static int gswip_mdio_wr(struct mii_bus *bus, int addr, int reg, u16 val) -{ - struct gswip_priv *priv = bus->priv; - int err; - - err = gswip_mdio_poll(priv); - if (err) { - dev_err(&bus->dev, "waiting for MDIO bus busy timed out\n"); - return err; - } - - regmap_write(priv->mdio, GSWIP_MDIO_WRITE, val); - regmap_write(priv->mdio, GSWIP_MDIO_CTRL, - GSWIP_MDIO_CTRL_BUSY | GSWIP_MDIO_CTRL_WR | - ((addr & GSWIP_MDIO_CTRL_PHYAD_MASK) << GSWIP_MDIO_CTRL_PHYAD_SHIFT) | - (reg & GSWIP_MDIO_CTRL_REGAD_MASK)); - - return 0; -} - -static int gswip_mdio_rd(struct mii_bus *bus, int addr, int reg) -{ - struct gswip_priv *priv = bus->priv; - u32 val; - int err; - - err = gswip_mdio_poll(priv); - if (err) { - dev_err(&bus->dev, "waiting for MDIO bus busy timed out\n"); - return err; - } - - regmap_write(priv->mdio, GSWIP_MDIO_CTRL, - GSWIP_MDIO_CTRL_BUSY | GSWIP_MDIO_CTRL_RD | - ((addr & GSWIP_MDIO_CTRL_PHYAD_MASK) << GSWIP_MDIO_CTRL_PHYAD_SHIFT) | - (reg & GSWIP_MDIO_CTRL_REGAD_MASK)); - - err = gswip_mdio_poll(priv); - if (err) { - dev_err(&bus->dev, "waiting for MDIO bus busy timed out\n"); - return err; - } - - err = regmap_read(priv->mdio, GSWIP_MDIO_READ, &val); - if (err) - return err; - - return val; -} - -static int gswip_mdio(struct gswip_priv *priv) -{ - struct device_node *mdio_np, *switch_np = priv->dev->of_node; - struct device *dev = priv->dev; - struct mii_bus *bus; - int err = 0; - - mdio_np = of_get_compatible_child(switch_np, "lantiq,xrx200-mdio"); - if (!mdio_np) - mdio_np = of_get_child_by_name(switch_np, "mdio"); - - if (!of_device_is_available(mdio_np)) - goto out_put_node; - - bus = devm_mdiobus_alloc(dev); - if (!bus) { - err = -ENOMEM; - goto out_put_node; - } - - bus->priv = priv; - bus->read = gswip_mdio_rd; - bus->write = gswip_mdio_wr; - bus->name = "lantiq,xrx200-mdio"; - snprintf(bus->id, MII_BUS_ID_SIZE, "%s-mii", dev_name(priv->dev)); - bus->parent = priv->dev; - - err = devm_of_mdiobus_register(dev, bus, mdio_np); - -out_put_node: - of_node_put(mdio_np); - - return err; -} - -static int gswip_pce_table_entry_read(struct gswip_priv *priv, - struct gswip_pce_table_entry *tbl) -{ - int i; - int err; - u32 crtl; - u32 tmp; - u16 addr_mode = tbl->key_mode ? GSWIP_PCE_TBL_CTRL_OPMOD_KSRD : - GSWIP_PCE_TBL_CTRL_OPMOD_ADRD; - - mutex_lock(&priv->pce_table_lock); - - err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL, - GSWIP_PCE_TBL_CTRL_BAS); - if (err) - goto out_unlock; - - regmap_write(priv->gswip, GSWIP_PCE_TBL_ADDR, tbl->index); - regmap_write_bits(priv->gswip, GSWIP_PCE_TBL_CTRL, - GSWIP_PCE_TBL_CTRL_ADDR_MASK | - GSWIP_PCE_TBL_CTRL_OPMOD_MASK | - GSWIP_PCE_TBL_CTRL_BAS, - tbl->table | addr_mode | GSWIP_PCE_TBL_CTRL_BAS); - - err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL, - GSWIP_PCE_TBL_CTRL_BAS); - if (err) - goto out_unlock; - - for (i = 0; i < ARRAY_SIZE(tbl->key); i++) { - err = regmap_read(priv->gswip, GSWIP_PCE_TBL_KEY(i), &tmp); - if (err) - goto out_unlock; - tbl->key[i] = tmp; - } - for (i = 0; i < ARRAY_SIZE(tbl->val); i++) { - err = regmap_read(priv->gswip, GSWIP_PCE_TBL_VAL(i), &tmp); - if (err) - goto out_unlock; - tbl->val[i] = tmp; - } - - err = regmap_read(priv->gswip, GSWIP_PCE_TBL_MASK, &tmp); - if (err) - goto out_unlock; - - tbl->mask = tmp; - err = regmap_read(priv->gswip, GSWIP_PCE_TBL_CTRL, &crtl); - if (err) - goto out_unlock; - - tbl->type = !!(crtl & GSWIP_PCE_TBL_CTRL_TYPE); - tbl->valid = !!(crtl & GSWIP_PCE_TBL_CTRL_VLD); - tbl->gmap = (crtl & GSWIP_PCE_TBL_CTRL_GMAP_MASK) >> 7; - -out_unlock: - mutex_unlock(&priv->pce_table_lock); - - return err; -} - -static int gswip_pce_table_entry_write(struct gswip_priv *priv, - struct gswip_pce_table_entry *tbl) -{ - int i; - int err; - u32 crtl; - u16 addr_mode = tbl->key_mode ? GSWIP_PCE_TBL_CTRL_OPMOD_KSWR : - GSWIP_PCE_TBL_CTRL_OPMOD_ADWR; - - mutex_lock(&priv->pce_table_lock); - - err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL, - GSWIP_PCE_TBL_CTRL_BAS); - if (err) { - mutex_unlock(&priv->pce_table_lock); - return err; - } - - regmap_write(priv->gswip, GSWIP_PCE_TBL_ADDR, tbl->index); - regmap_write_bits(priv->gswip, GSWIP_PCE_TBL_CTRL, - GSWIP_PCE_TBL_CTRL_ADDR_MASK | - GSWIP_PCE_TBL_CTRL_OPMOD_MASK, - tbl->table | addr_mode); - - for (i = 0; i < ARRAY_SIZE(tbl->key); i++) - regmap_write(priv->gswip, GSWIP_PCE_TBL_KEY(i), tbl->key[i]); - - for (i = 0; i < ARRAY_SIZE(tbl->val); i++) - regmap_write(priv->gswip, GSWIP_PCE_TBL_VAL(i), tbl->val[i]); - - regmap_write_bits(priv->gswip, GSWIP_PCE_TBL_CTRL, - GSWIP_PCE_TBL_CTRL_ADDR_MASK | - GSWIP_PCE_TBL_CTRL_OPMOD_MASK, - tbl->table | addr_mode); - - regmap_write(priv->gswip, GSWIP_PCE_TBL_MASK, tbl->mask); - - regmap_read(priv->gswip, GSWIP_PCE_TBL_CTRL, &crtl); - crtl &= ~(GSWIP_PCE_TBL_CTRL_TYPE | GSWIP_PCE_TBL_CTRL_VLD | - GSWIP_PCE_TBL_CTRL_GMAP_MASK); - if (tbl->type) - crtl |= GSWIP_PCE_TBL_CTRL_TYPE; - if (tbl->valid) - crtl |= GSWIP_PCE_TBL_CTRL_VLD; - crtl |= (tbl->gmap << 7) & GSWIP_PCE_TBL_CTRL_GMAP_MASK; - crtl |= GSWIP_PCE_TBL_CTRL_BAS; - regmap_write(priv->gswip, GSWIP_PCE_TBL_CTRL, crtl); - - err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL, - GSWIP_PCE_TBL_CTRL_BAS); - - mutex_unlock(&priv->pce_table_lock); - - return err; -} - -/* Add the LAN port into a bridge with the CPU port by - * default. This prevents automatic forwarding of - * packages between the LAN ports when no explicit - * bridge is configured. - */ -static int gswip_add_single_port_br(struct gswip_priv *priv, int port, bool add) -{ - struct gswip_pce_table_entry vlan_active = {0,}; - struct gswip_pce_table_entry vlan_mapping = {0,}; - int err; - - vlan_active.index = port + 1; - vlan_active.table = GSWIP_TABLE_ACTIVE_VLAN; - vlan_active.key[0] = GSWIP_VLAN_UNAWARE_PVID; - vlan_active.val[0] = port + 1 /* fid */; - vlan_active.valid = add; - err = gswip_pce_table_entry_write(priv, &vlan_active); - if (err) { - dev_err(priv->dev, "failed to write active VLAN: %d\n", err); - return err; - } - - if (!add) - return 0; - - vlan_mapping.index = port + 1; - vlan_mapping.table = GSWIP_TABLE_VLAN_MAPPING; - vlan_mapping.val[0] = GSWIP_VLAN_UNAWARE_PVID; - vlan_mapping.val[1] = BIT(port) | dsa_cpu_ports(priv->ds); - vlan_mapping.val[2] = 0; - err = gswip_pce_table_entry_write(priv, &vlan_mapping); - if (err) { - dev_err(priv->dev, "failed to write VLAN mapping: %d\n", err); - return err; - } - - return 0; -} - -static int gswip_port_setup(struct dsa_switch *ds, int port) -{ - struct gswip_priv *priv = ds->priv; - int err; - - if (!dsa_is_cpu_port(ds, port)) { - err = gswip_add_single_port_br(priv, port, true); - if (err) - return err; - } - - return 0; -} - -static int gswip_port_enable(struct dsa_switch *ds, int port, - struct phy_device *phydev) -{ - struct gswip_priv *priv = ds->priv; - - if (!dsa_is_cpu_port(ds, port)) { - u32 mdio_phy = 0; - - if (phydev) - mdio_phy = phydev->mdio.addr & GSWIP_MDIO_PHY_ADDR_MASK; - - regmap_write_bits(priv->mdio, GSWIP_MDIO_PHYp(port), - GSWIP_MDIO_PHY_ADDR_MASK, - mdio_phy); - } - - /* RMON Counter Enable for port */ - regmap_write(priv->gswip, GSWIP_BM_PCFGp(port), GSWIP_BM_PCFG_CNTEN); - - /* enable port fetch/store dma & VLAN Modification */ - regmap_set_bits(priv->gswip, GSWIP_FDMA_PCTRLp(port), - GSWIP_FDMA_PCTRL_EN | GSWIP_FDMA_PCTRL_VLANMOD_BOTH); - regmap_set_bits(priv->gswip, GSWIP_SDMA_PCTRLp(port), - GSWIP_SDMA_PCTRL_EN); - - return 0; -} - -static void gswip_port_disable(struct dsa_switch *ds, int port) -{ - struct gswip_priv *priv = ds->priv; - - regmap_clear_bits(priv->gswip, GSWIP_FDMA_PCTRLp(port), - GSWIP_FDMA_PCTRL_EN); - regmap_clear_bits(priv->gswip, GSWIP_SDMA_PCTRLp(port), - GSWIP_SDMA_PCTRL_EN); -} - -static int gswip_pce_load_microcode(struct gswip_priv *priv) -{ - int i; - int err; - - regmap_write_bits(priv->gswip, GSWIP_PCE_TBL_CTRL, - GSWIP_PCE_TBL_CTRL_ADDR_MASK | - GSWIP_PCE_TBL_CTRL_OPMOD_MASK | - GSWIP_PCE_TBL_CTRL_OPMOD_ADWR, - GSWIP_PCE_TBL_CTRL_OPMOD_ADWR); - regmap_write(priv->gswip, GSWIP_PCE_TBL_MASK, 0); - - for (i = 0; i < priv->hw_info->pce_microcode_size; i++) { - regmap_write(priv->gswip, GSWIP_PCE_TBL_ADDR, i); - regmap_write(priv->gswip, GSWIP_PCE_TBL_VAL(0), - (*priv->hw_info->pce_microcode)[i].val_0); - regmap_write(priv->gswip, GSWIP_PCE_TBL_VAL(1), - (*priv->hw_info->pce_microcode)[i].val_1); - regmap_write(priv->gswip, GSWIP_PCE_TBL_VAL(2), - (*priv->hw_info->pce_microcode)[i].val_2); - regmap_write(priv->gswip, GSWIP_PCE_TBL_VAL(3), - (*priv->hw_info->pce_microcode)[i].val_3); - - /* start the table access: */ - regmap_set_bits(priv->gswip, GSWIP_PCE_TBL_CTRL, - GSWIP_PCE_TBL_CTRL_BAS); - err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL, - GSWIP_PCE_TBL_CTRL_BAS); - if (err) - return err; - } - - /* tell the switch that the microcode is loaded */ - regmap_set_bits(priv->gswip, GSWIP_PCE_GCTRL_0, - GSWIP_PCE_GCTRL_0_MC_VALID); - - return 0; -} - -static void gswip_port_commit_pvid(struct gswip_priv *priv, int port) -{ - struct dsa_port *dp = dsa_to_port(priv->ds, port); - struct net_device *br = dsa_port_bridge_dev_get(dp); - u32 vinr; - int idx; - - if (!dsa_port_is_user(dp)) - return; - - if (br) { - u16 pvid = GSWIP_VLAN_UNAWARE_PVID; - - if (br_vlan_enabled(br)) - br_vlan_get_pvid(br, &pvid); - - /* VLAN-aware bridge ports with no PVID will use Active VLAN - * index 0. The expectation is that this drops all untagged and - * VID-0 tagged ingress traffic. - */ - idx = 0; - for (int i = priv->hw_info->max_ports; - i < ARRAY_SIZE(priv->vlans); i++) { - if (priv->vlans[i].bridge == br && - priv->vlans[i].vid == pvid) { - idx = i; - break; - } - } - } else { - /* The Active VLAN table index as configured by - * gswip_add_single_port_br() - */ - idx = port + 1; - } - - vinr = idx ? GSWIP_PCE_VCTRL_VINR_ALL : GSWIP_PCE_VCTRL_VINR_TAGGED; - regmap_write_bits(priv->gswip, GSWIP_PCE_VCTRL(port), - GSWIP_PCE_VCTRL_VINR, - FIELD_PREP(GSWIP_PCE_VCTRL_VINR, vinr)); - - /* Note that in GSWIP 2.2 VLAN mode the VID needs to be programmed - * directly instead of referencing the index in the Active VLAN Tablet. - * However, without the VLANMD bit (9) in PCE_GCTRL_1 (0x457) even - * GSWIP 2.2 and newer hardware maintain the GSWIP 2.1 behavior. - */ - regmap_write(priv->gswip, GSWIP_PCE_DEFPVID(port), idx); -} - -static int gswip_port_vlan_filtering(struct dsa_switch *ds, int port, - bool vlan_filtering, - struct netlink_ext_ack *extack) -{ - struct gswip_priv *priv = ds->priv; - - if (vlan_filtering) { - /* Use tag based VLAN */ - regmap_write_bits(priv->gswip, GSWIP_PCE_VCTRL(port), - GSWIP_PCE_VCTRL_VSR | - GSWIP_PCE_VCTRL_UVR | - GSWIP_PCE_VCTRL_VIMR | - GSWIP_PCE_VCTRL_VEMR | - GSWIP_PCE_VCTRL_VID0, - GSWIP_PCE_VCTRL_UVR | - GSWIP_PCE_VCTRL_VIMR | - GSWIP_PCE_VCTRL_VEMR | - GSWIP_PCE_VCTRL_VID0); - regmap_clear_bits(priv->gswip, GSWIP_PCE_PCTRL_0p(port), - GSWIP_PCE_PCTRL_0_TVM); - } else { - /* Use port based VLAN */ - regmap_write_bits(priv->gswip, GSWIP_PCE_VCTRL(port), - GSWIP_PCE_VCTRL_UVR | - GSWIP_PCE_VCTRL_VIMR | - GSWIP_PCE_VCTRL_VEMR | - GSWIP_PCE_VCTRL_VID0 | - GSWIP_PCE_VCTRL_VSR, - GSWIP_PCE_VCTRL_VSR); - regmap_set_bits(priv->gswip, GSWIP_PCE_PCTRL_0p(port), - GSWIP_PCE_PCTRL_0_TVM); - } - - gswip_port_commit_pvid(priv, port); - - return 0; -} - -static int gswip_setup(struct dsa_switch *ds) -{ - unsigned int cpu_ports = dsa_cpu_ports(ds); - struct gswip_priv *priv = ds->priv; - struct dsa_port *cpu_dp; - int err, i; - - regmap_write(priv->gswip, GSWIP_SWRES, GSWIP_SWRES_R0); - usleep_range(5000, 10000); - regmap_write(priv->gswip, GSWIP_SWRES, 0); - - /* disable port fetch/store dma on all ports */ - for (i = 0; i < priv->hw_info->max_ports; i++) { - gswip_port_disable(ds, i); - gswip_port_vlan_filtering(ds, i, false, NULL); - } - - /* enable Switch */ - regmap_set_bits(priv->mdio, GSWIP_MDIO_GLOB, GSWIP_MDIO_GLOB_ENABLE); - - err = gswip_pce_load_microcode(priv); - if (err) { - dev_err(priv->dev, "writing PCE microcode failed, %i\n", err); - return err; - } - - /* Default unknown Broadcast/Multicast/Unicast port maps */ - regmap_write(priv->gswip, GSWIP_PCE_PMAP1, cpu_ports); - regmap_write(priv->gswip, GSWIP_PCE_PMAP2, cpu_ports); - regmap_write(priv->gswip, GSWIP_PCE_PMAP3, cpu_ports); - - /* Deactivate MDIO PHY auto polling. Some PHYs as the AR8030 have an - * interoperability problem with this auto polling mechanism because - * their status registers think that the link is in a different state - * than it actually is. For the AR8030 it has the BMSR_ESTATEN bit set - * as well as ESTATUS_1000_TFULL and ESTATUS_1000_XFULL. This makes the - * auto polling state machine consider the link being negotiated with - * 1Gbit/s. Since the PHY itself is a Fast Ethernet RMII PHY this leads - * to the switch port being completely dead (RX and TX are both not - * working). - * Also with various other PHY / port combinations (PHY11G GPHY, PHY22F - * GPHY, external RGMII PEF7071/7072) any traffic would stop. Sometimes - * it would work fine for a few minutes to hours and then stop, on - * other device it would no traffic could be sent or received at all. - * Testing shows that when PHY auto polling is disabled these problems - * go away. - */ - regmap_write(priv->mdio, GSWIP_MDIO_MDC_CFG0, 0x0); - - /* Configure the MDIO Clock 2.5 MHz */ - regmap_write_bits(priv->mdio, GSWIP_MDIO_MDC_CFG1, 0xff, 0x09); - - /* bring up the mdio bus */ - err = gswip_mdio(priv); - if (err) { - dev_err(priv->dev, "mdio bus setup failed\n"); - return err; - } - - /* Disable the xMII interface and clear it's isolation bit */ - for (i = 0; i < priv->hw_info->max_ports; i++) - gswip_mii_mask_cfg(priv, - GSWIP_MII_CFG_EN | GSWIP_MII_CFG_ISOLATE, - 0, i); - - dsa_switch_for_each_cpu_port(cpu_dp, ds) { - /* enable special tag insertion on cpu port */ - regmap_set_bits(priv->gswip, GSWIP_FDMA_PCTRLp(cpu_dp->index), - GSWIP_FDMA_PCTRL_STEN); - - /* accept special tag in ingress direction */ - regmap_set_bits(priv->gswip, - GSWIP_PCE_PCTRL_0p(cpu_dp->index), - GSWIP_PCE_PCTRL_0_INGRESS); - } - - regmap_set_bits(priv->gswip, GSWIP_BM_QUEUE_GCTRL, - GSWIP_BM_QUEUE_GCTRL_GL_MOD); - - /* VLAN aware Switching */ - regmap_set_bits(priv->gswip, GSWIP_PCE_GCTRL_0, - GSWIP_PCE_GCTRL_0_VLAN); - - /* Flush MAC Table */ - regmap_set_bits(priv->gswip, GSWIP_PCE_GCTRL_0, - GSWIP_PCE_GCTRL_0_MTFL); - - err = gswip_switch_r_timeout(priv, GSWIP_PCE_GCTRL_0, - GSWIP_PCE_GCTRL_0_MTFL); - if (err) { - dev_err(priv->dev, "MAC flushing didn't finish\n"); - return err; - } - - ds->mtu_enforcement_ingress = true; - - return 0; -} - -static enum dsa_tag_protocol gswip_get_tag_protocol(struct dsa_switch *ds, - int port, - enum dsa_tag_protocol mp) -{ - struct gswip_priv *priv = ds->priv; - - return priv->hw_info->tag_protocol; -} - -static int gswip_vlan_active_create(struct gswip_priv *priv, - struct net_device *bridge, - int fid, u16 vid) -{ - struct gswip_pce_table_entry vlan_active = {0,}; - unsigned int max_ports = priv->hw_info->max_ports; - int idx = -1; - int err; - int i; - - /* Look for a free slot */ - for (i = max_ports; i < ARRAY_SIZE(priv->vlans); i++) { - if (!priv->vlans[i].bridge) { - idx = i; - break; - } - } - - if (idx == -1) - return -ENOSPC; - - if (fid == -1) - fid = idx; - - vlan_active.index = idx; - vlan_active.table = GSWIP_TABLE_ACTIVE_VLAN; - vlan_active.key[0] = vid; - vlan_active.val[0] = fid; - vlan_active.valid = true; - - err = gswip_pce_table_entry_write(priv, &vlan_active); - if (err) { - dev_err(priv->dev, "failed to write active VLAN: %d\n", err); - return err; - } - - priv->vlans[idx].bridge = bridge; - priv->vlans[idx].vid = vid; - priv->vlans[idx].fid = fid; - - return idx; -} - -static int gswip_vlan_active_remove(struct gswip_priv *priv, int idx) -{ - struct gswip_pce_table_entry vlan_active = {0,}; - int err; - - vlan_active.index = idx; - vlan_active.table = GSWIP_TABLE_ACTIVE_VLAN; - vlan_active.valid = false; - err = gswip_pce_table_entry_write(priv, &vlan_active); - if (err) - dev_err(priv->dev, "failed to delete active VLAN: %d\n", err); - priv->vlans[idx].bridge = NULL; - - return err; -} - -static int gswip_vlan_add(struct gswip_priv *priv, struct net_device *bridge, - int port, u16 vid, bool untagged, bool pvid, - bool vlan_aware) -{ - struct gswip_pce_table_entry vlan_mapping = {0,}; - unsigned int max_ports = priv->hw_info->max_ports; - unsigned int cpu_ports = dsa_cpu_ports(priv->ds); - bool active_vlan_created = false; - int fid = -1, idx = -1; - int i, err; - - /* Check if there is already a page for this bridge */ - for (i = max_ports; i < ARRAY_SIZE(priv->vlans); i++) { - if (priv->vlans[i].bridge == bridge) { - if (vlan_aware) { - if (fid != -1 && fid != priv->vlans[i].fid) - dev_err(priv->dev, "one bridge with multiple flow ids\n"); - fid = priv->vlans[i].fid; - } - if (priv->vlans[i].vid == vid) { - idx = i; - break; - } - } - } - - /* If this bridge is not programmed yet, add a Active VLAN table - * entry in a free slot and prepare the VLAN mapping table entry. - */ - if (idx == -1) { - idx = gswip_vlan_active_create(priv, bridge, fid, vid); - if (idx < 0) - return idx; - active_vlan_created = true; - - vlan_mapping.index = idx; - vlan_mapping.table = GSWIP_TABLE_VLAN_MAPPING; - } else { - /* Read the existing VLAN mapping entry from the switch */ - vlan_mapping.index = idx; - vlan_mapping.table = GSWIP_TABLE_VLAN_MAPPING; - err = gswip_pce_table_entry_read(priv, &vlan_mapping); - if (err) { - dev_err(priv->dev, "failed to read VLAN mapping: %d\n", - err); - return err; - } - } - - /* VLAN ID byte, maps to the VLAN ID of vlan active table */ - vlan_mapping.val[0] = vid; - /* Update the VLAN mapping entry and write it to the switch */ - vlan_mapping.val[1] |= cpu_ports; - vlan_mapping.val[1] |= BIT(port); - if (vlan_aware) - vlan_mapping.val[2] |= cpu_ports; - if (untagged) - vlan_mapping.val[2] &= ~BIT(port); - else - vlan_mapping.val[2] |= BIT(port); - err = gswip_pce_table_entry_write(priv, &vlan_mapping); - if (err) { - dev_err(priv->dev, "failed to write VLAN mapping: %d\n", err); - /* In case an Active VLAN was creaetd delete it again */ - if (active_vlan_created) - gswip_vlan_active_remove(priv, idx); - return err; - } - - gswip_port_commit_pvid(priv, port); - - return 0; -} - -static int gswip_vlan_remove(struct gswip_priv *priv, - struct net_device *bridge, int port, - u16 vid) -{ - struct gswip_pce_table_entry vlan_mapping = {0,}; - unsigned int max_ports = priv->hw_info->max_ports; - int idx = -1; - int i; - int err; - - /* Check if there is already a page for this bridge */ - for (i = max_ports; i < ARRAY_SIZE(priv->vlans); i++) { - if (priv->vlans[i].bridge == bridge && - priv->vlans[i].vid == vid) { - idx = i; - break; - } - } - - if (idx == -1) { - dev_err(priv->dev, "Port %d cannot find VID %u of bridge %s\n", - port, vid, bridge ? bridge->name : "(null)"); - return -ENOENT; - } - - vlan_mapping.index = idx; - vlan_mapping.table = GSWIP_TABLE_VLAN_MAPPING; - err = gswip_pce_table_entry_read(priv, &vlan_mapping); - if (err) { - dev_err(priv->dev, "failed to read VLAN mapping: %d\n", err); - return err; - } - - vlan_mapping.val[1] &= ~BIT(port); - vlan_mapping.val[2] &= ~BIT(port); - err = gswip_pce_table_entry_write(priv, &vlan_mapping); - if (err) { - dev_err(priv->dev, "failed to write VLAN mapping: %d\n", err); - return err; - } - - /* In case all ports are removed from the bridge, remove the VLAN */ - if (!(vlan_mapping.val[1] & ~dsa_cpu_ports(priv->ds))) { - err = gswip_vlan_active_remove(priv, idx); - if (err) { - dev_err(priv->dev, "failed to write active VLAN: %d\n", - err); - return err; - } - } - - gswip_port_commit_pvid(priv, port); - - return 0; -} - -static int gswip_port_bridge_join(struct dsa_switch *ds, int port, - struct dsa_bridge bridge, - bool *tx_fwd_offload, - struct netlink_ext_ack *extack) -{ - struct net_device *br = bridge.dev; - struct gswip_priv *priv = ds->priv; - int err; - - /* Set up the VLAN for VLAN-unaware bridging for this port, and remove - * it from the "single-port bridge" through which it was operating as - * standalone. - */ - err = gswip_vlan_add(priv, br, port, GSWIP_VLAN_UNAWARE_PVID, - true, true, false); - if (err) - return err; - - return gswip_add_single_port_br(priv, port, false); -} - -static void gswip_port_bridge_leave(struct dsa_switch *ds, int port, - struct dsa_bridge bridge) -{ - struct net_device *br = bridge.dev; - struct gswip_priv *priv = ds->priv; - - /* Add the port back to the "single-port bridge", and remove it from - * the VLAN-unaware PVID created for this bridge. - */ - gswip_add_single_port_br(priv, port, true); - gswip_vlan_remove(priv, br, port, GSWIP_VLAN_UNAWARE_PVID); -} - -static int gswip_port_vlan_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct netlink_ext_ack *extack) -{ - struct net_device *bridge = dsa_port_bridge_dev_get(dsa_to_port(ds, port)); - struct gswip_priv *priv = ds->priv; - unsigned int max_ports = priv->hw_info->max_ports; - int pos = max_ports; - int i, idx = -1; - - /* We only support VLAN filtering on bridges */ - if (!dsa_is_cpu_port(ds, port) && !bridge) - return -EOPNOTSUPP; - - /* Check if there is already a page for this VLAN */ - for (i = max_ports; i < ARRAY_SIZE(priv->vlans); i++) { - if (priv->vlans[i].bridge == bridge && - priv->vlans[i].vid == vlan->vid) { - idx = i; - break; - } - } - - /* If this VLAN is not programmed yet, we have to reserve - * one entry in the VLAN table. Make sure we start at the - * next position round. - */ - if (idx == -1) { - /* Look for a free slot */ - for (; pos < ARRAY_SIZE(priv->vlans); pos++) { - if (!priv->vlans[pos].bridge) { - idx = pos; - pos++; - break; - } - } - - if (idx == -1) { - NL_SET_ERR_MSG_MOD(extack, "No slot in VLAN table"); - return -ENOSPC; - } - } - - return 0; -} - -static int gswip_port_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct netlink_ext_ack *extack) -{ - struct net_device *bridge = dsa_port_bridge_dev_get(dsa_to_port(ds, port)); - struct gswip_priv *priv = ds->priv; - bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; - bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; - int err; - - if (vlan->vid == GSWIP_VLAN_UNAWARE_PVID) - return 0; - - err = gswip_port_vlan_prepare(ds, port, vlan, extack); - if (err) - return err; - - /* We have to receive all packets on the CPU port and should not - * do any VLAN filtering here. This is also called with bridge - * NULL and then we do not know for which bridge to configure - * this. - */ - if (dsa_is_cpu_port(ds, port)) - return 0; - - return gswip_vlan_add(priv, bridge, port, vlan->vid, untagged, pvid, - true); -} - -static int gswip_port_vlan_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) -{ - struct net_device *bridge = dsa_port_bridge_dev_get(dsa_to_port(ds, port)); - struct gswip_priv *priv = ds->priv; - - if (vlan->vid == GSWIP_VLAN_UNAWARE_PVID) - return 0; - - /* We have to receive all packets on the CPU port and should not - * do any VLAN filtering here. This is also called with bridge - * NULL and then we do not know for which bridge to configure - * this. - */ - if (dsa_is_cpu_port(ds, port)) - return 0; - - return gswip_vlan_remove(priv, bridge, port, vlan->vid); -} - -static void gswip_port_fast_age(struct dsa_switch *ds, int port) -{ - struct gswip_priv *priv = ds->priv; - struct gswip_pce_table_entry mac_bridge = {0,}; - int i; - int err; - - for (i = 0; i < 2048; i++) { - mac_bridge.table = GSWIP_TABLE_MAC_BRIDGE; - mac_bridge.index = i; - - err = gswip_pce_table_entry_read(priv, &mac_bridge); - if (err) { - dev_err(priv->dev, "failed to read mac bridge: %d\n", - err); - return; - } - - if (!mac_bridge.valid) - continue; - - if (mac_bridge.val[1] & GSWIP_TABLE_MAC_BRIDGE_VAL1_STATIC) - continue; - - if (port != FIELD_GET(GSWIP_TABLE_MAC_BRIDGE_VAL0_PORT, - mac_bridge.val[0])) - continue; - - mac_bridge.valid = false; - err = gswip_pce_table_entry_write(priv, &mac_bridge); - if (err) { - dev_err(priv->dev, "failed to write mac bridge: %d\n", - err); - return; - } - } -} - -static void gswip_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) -{ - struct gswip_priv *priv = ds->priv; - u32 stp_state; - - switch (state) { - case BR_STATE_DISABLED: - regmap_clear_bits(priv->gswip, GSWIP_SDMA_PCTRLp(port), - GSWIP_SDMA_PCTRL_EN); - return; - case BR_STATE_BLOCKING: - case BR_STATE_LISTENING: - stp_state = GSWIP_PCE_PCTRL_0_PSTATE_LISTEN; - break; - case BR_STATE_LEARNING: - stp_state = GSWIP_PCE_PCTRL_0_PSTATE_LEARNING; - break; - case BR_STATE_FORWARDING: - stp_state = GSWIP_PCE_PCTRL_0_PSTATE_FORWARDING; - break; - default: - dev_err(priv->dev, "invalid STP state: %d\n", state); - return; - } - - regmap_set_bits(priv->gswip, GSWIP_SDMA_PCTRLp(port), - GSWIP_SDMA_PCTRL_EN); - regmap_write_bits(priv->gswip, GSWIP_PCE_PCTRL_0p(port), - GSWIP_PCE_PCTRL_0_PSTATE_MASK, - stp_state); -} - -static int gswip_port_fdb(struct dsa_switch *ds, int port, - struct net_device *bridge, const unsigned char *addr, - u16 vid, bool add) -{ - struct gswip_priv *priv = ds->priv; - struct gswip_pce_table_entry mac_bridge = {0,}; - unsigned int max_ports = priv->hw_info->max_ports; - int fid = -1; - int i; - int err; - - for (i = max_ports; i < ARRAY_SIZE(priv->vlans); i++) { - if (priv->vlans[i].bridge == bridge) { - fid = priv->vlans[i].fid; - break; - } - } - - if (fid == -1) { - dev_err(priv->dev, "no FID found for bridge %s\n", - bridge->name); - return -EINVAL; - } - - mac_bridge.table = GSWIP_TABLE_MAC_BRIDGE; - mac_bridge.key_mode = true; - mac_bridge.key[0] = addr[5] | (addr[4] << 8); - mac_bridge.key[1] = addr[3] | (addr[2] << 8); - mac_bridge.key[2] = addr[1] | (addr[0] << 8); - mac_bridge.key[3] = FIELD_PREP(GSWIP_TABLE_MAC_BRIDGE_KEY3_FID, fid); - mac_bridge.val[0] = add ? BIT(port) : 0; /* port map */ - mac_bridge.val[1] = GSWIP_TABLE_MAC_BRIDGE_VAL1_STATIC; - mac_bridge.valid = add; - - err = gswip_pce_table_entry_write(priv, &mac_bridge); - if (err) - dev_err(priv->dev, "failed to write mac bridge: %d\n", err); - - return err; -} - -static int gswip_port_fdb_add(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid, - struct dsa_db db) -{ - if (db.type != DSA_DB_BRIDGE) - return -EOPNOTSUPP; - - return gswip_port_fdb(ds, port, db.bridge.dev, addr, vid, true); -} - -static int gswip_port_fdb_del(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid, - struct dsa_db db) -{ - if (db.type != DSA_DB_BRIDGE) - return -EOPNOTSUPP; - - return gswip_port_fdb(ds, port, db.bridge.dev, addr, vid, false); -} - -static int gswip_port_fdb_dump(struct dsa_switch *ds, int port, - dsa_fdb_dump_cb_t *cb, void *data) -{ - struct gswip_priv *priv = ds->priv; - struct gswip_pce_table_entry mac_bridge = {0,}; - unsigned char addr[ETH_ALEN]; - int i; - int err; - - for (i = 0; i < 2048; i++) { - mac_bridge.table = GSWIP_TABLE_MAC_BRIDGE; - mac_bridge.index = i; - - err = gswip_pce_table_entry_read(priv, &mac_bridge); - if (err) { - dev_err(priv->dev, - "failed to read mac bridge entry %d: %d\n", - i, err); - return err; - } - - if (!mac_bridge.valid) - continue; - - addr[5] = mac_bridge.key[0] & 0xff; - addr[4] = (mac_bridge.key[0] >> 8) & 0xff; - addr[3] = mac_bridge.key[1] & 0xff; - addr[2] = (mac_bridge.key[1] >> 8) & 0xff; - addr[1] = mac_bridge.key[2] & 0xff; - addr[0] = (mac_bridge.key[2] >> 8) & 0xff; - if (mac_bridge.val[1] & GSWIP_TABLE_MAC_BRIDGE_VAL1_STATIC) { - if (mac_bridge.val[0] & BIT(port)) { - err = cb(addr, 0, true, data); - if (err) - return err; - } - } else { - if (port == FIELD_GET(GSWIP_TABLE_MAC_BRIDGE_VAL0_PORT, - mac_bridge.val[0])) { - err = cb(addr, 0, false, data); - if (err) - return err; - } - } - } - return 0; -} - -static int gswip_port_max_mtu(struct dsa_switch *ds, int port) -{ - /* Includes 8 bytes for special header. */ - return GSWIP_MAX_PACKET_LENGTH - VLAN_ETH_HLEN - ETH_FCS_LEN; -} + * Copyright (C) 2025 Daniel Golle + * Copyright (C) 2017 - 2019 Hauke Mehrtens + * Copyright (C) 2012 John Crispin + * Copyright (C) 2010 Lantiq Deutschland + */ -static int gswip_port_change_mtu(struct dsa_switch *ds, int port, int new_mtu) -{ - struct gswip_priv *priv = ds->priv; +#include "lantiq_gswip.h" +#include "lantiq_pce.h" - /* CPU port always has maximum mtu of user ports, so use it to set - * switch frame size, including 8 byte special header. - */ - if (dsa_is_cpu_port(ds, port)) { - new_mtu += 8; - regmap_write(priv->gswip, GSWIP_MAC_FLEN, - VLAN_ETH_HLEN + new_mtu + ETH_FCS_LEN); - } +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include - /* Enable MLEN for ports with non-standard MTUs, including the special - * header on the CPU port added above. - */ - if (new_mtu != ETH_DATA_LEN) - regmap_set_bits(priv->gswip, GSWIP_MAC_CTRL_2p(port), - GSWIP_MAC_CTRL_2_MLEN); - else - regmap_clear_bits(priv->gswip, GSWIP_MAC_CTRL_2p(port), - GSWIP_MAC_CTRL_2_MLEN); +#include - return 0; -} +struct xway_gphy_match_data { + char *fe_firmware_name; + char *ge_firmware_name; +}; static void gswip_xrx200_phylink_get_caps(struct dsa_switch *ds, int port, struct phylink_config *config) @@ -1291,327 +97,6 @@ static void gswip_xrx300_phylink_get_caps(struct dsa_switch *ds, int port, MAC_10 | MAC_100 | MAC_1000; } -static void gswip_phylink_get_caps(struct dsa_switch *ds, int port, - struct phylink_config *config) -{ - struct gswip_priv *priv = ds->priv; - - priv->hw_info->phylink_get_caps(ds, port, config); -} - -static void gswip_port_set_link(struct gswip_priv *priv, int port, bool link) -{ - u32 mdio_phy; - - if (link) - mdio_phy = GSWIP_MDIO_PHY_LINK_UP; - else - mdio_phy = GSWIP_MDIO_PHY_LINK_DOWN; - - regmap_write_bits(priv->mdio, GSWIP_MDIO_PHYp(port), - GSWIP_MDIO_PHY_LINK_MASK, mdio_phy); -} - -static void gswip_port_set_speed(struct gswip_priv *priv, int port, int speed, - phy_interface_t interface) -{ - u32 mdio_phy = 0, mii_cfg = 0, mac_ctrl_0 = 0; - - switch (speed) { - case SPEED_10: - mdio_phy = GSWIP_MDIO_PHY_SPEED_M10; - - if (interface == PHY_INTERFACE_MODE_RMII) - mii_cfg = GSWIP_MII_CFG_RATE_M50; - else - mii_cfg = GSWIP_MII_CFG_RATE_M2P5; - - mac_ctrl_0 = GSWIP_MAC_CTRL_0_GMII_MII; - break; - - case SPEED_100: - mdio_phy = GSWIP_MDIO_PHY_SPEED_M100; - - if (interface == PHY_INTERFACE_MODE_RMII) - mii_cfg = GSWIP_MII_CFG_RATE_M50; - else - mii_cfg = GSWIP_MII_CFG_RATE_M25; - - mac_ctrl_0 = GSWIP_MAC_CTRL_0_GMII_MII; - break; - - case SPEED_1000: - mdio_phy = GSWIP_MDIO_PHY_SPEED_G1; - - mii_cfg = GSWIP_MII_CFG_RATE_M125; - - mac_ctrl_0 = GSWIP_MAC_CTRL_0_GMII_RGMII; - break; - } - - regmap_write_bits(priv->mdio, GSWIP_MDIO_PHYp(port), - GSWIP_MDIO_PHY_SPEED_MASK, mdio_phy); - gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_RATE_MASK, mii_cfg, port); - regmap_write_bits(priv->gswip, GSWIP_MAC_CTRL_0p(port), - GSWIP_MAC_CTRL_0_GMII_MASK, mac_ctrl_0); -} - -static void gswip_port_set_duplex(struct gswip_priv *priv, int port, int duplex) -{ - u32 mac_ctrl_0, mdio_phy; - - if (duplex == DUPLEX_FULL) { - mac_ctrl_0 = GSWIP_MAC_CTRL_0_FDUP_EN; - mdio_phy = GSWIP_MDIO_PHY_FDUP_EN; - } else { - mac_ctrl_0 = GSWIP_MAC_CTRL_0_FDUP_DIS; - mdio_phy = GSWIP_MDIO_PHY_FDUP_DIS; - } - - regmap_write_bits(priv->gswip, GSWIP_MAC_CTRL_0p(port), - GSWIP_MAC_CTRL_0_FDUP_MASK, mac_ctrl_0); - regmap_write_bits(priv->mdio, GSWIP_MDIO_PHYp(port), - GSWIP_MDIO_PHY_FDUP_MASK, mdio_phy); -} - -static void gswip_port_set_pause(struct gswip_priv *priv, int port, - bool tx_pause, bool rx_pause) -{ - u32 mac_ctrl_0, mdio_phy; - - if (tx_pause && rx_pause) { - mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_RXTX; - mdio_phy = GSWIP_MDIO_PHY_FCONTX_EN | - GSWIP_MDIO_PHY_FCONRX_EN; - } else if (tx_pause) { - mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_TX; - mdio_phy = GSWIP_MDIO_PHY_FCONTX_EN | - GSWIP_MDIO_PHY_FCONRX_DIS; - } else if (rx_pause) { - mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_RX; - mdio_phy = GSWIP_MDIO_PHY_FCONTX_DIS | - GSWIP_MDIO_PHY_FCONRX_EN; - } else { - mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_NONE; - mdio_phy = GSWIP_MDIO_PHY_FCONTX_DIS | - GSWIP_MDIO_PHY_FCONRX_DIS; - } - - regmap_write_bits(priv->gswip, GSWIP_MAC_CTRL_0p(port), - GSWIP_MAC_CTRL_0_FCON_MASK, mac_ctrl_0); - regmap_write_bits(priv->mdio, GSWIP_MDIO_PHYp(port), - GSWIP_MDIO_PHY_FCONTX_MASK | GSWIP_MDIO_PHY_FCONRX_MASK, - mdio_phy); -} - -static void gswip_phylink_mac_config(struct phylink_config *config, - unsigned int mode, - const struct phylink_link_state *state) -{ - struct dsa_port *dp = dsa_phylink_to_port(config); - struct gswip_priv *priv = dp->ds->priv; - int port = dp->index; - u32 miicfg = 0; - - miicfg |= GSWIP_MII_CFG_LDCLKDIS; - - switch (state->interface) { - case PHY_INTERFACE_MODE_SGMII: - case PHY_INTERFACE_MODE_1000BASEX: - case PHY_INTERFACE_MODE_2500BASEX: - return; - case PHY_INTERFACE_MODE_MII: - case PHY_INTERFACE_MODE_INTERNAL: - miicfg |= GSWIP_MII_CFG_MODE_MIIM; - break; - case PHY_INTERFACE_MODE_REVMII: - miicfg |= GSWIP_MII_CFG_MODE_MIIP; - break; - case PHY_INTERFACE_MODE_RMII: - miicfg |= GSWIP_MII_CFG_MODE_RMIIM; - break; - case PHY_INTERFACE_MODE_RGMII: - case PHY_INTERFACE_MODE_RGMII_ID: - case PHY_INTERFACE_MODE_RGMII_RXID: - case PHY_INTERFACE_MODE_RGMII_TXID: - miicfg |= GSWIP_MII_CFG_MODE_RGMII; - break; - case PHY_INTERFACE_MODE_GMII: - miicfg |= GSWIP_MII_CFG_MODE_GMII; - break; - default: - dev_err(dp->ds->dev, - "Unsupported interface: %d\n", state->interface); - return; - } - - gswip_mii_mask_cfg(priv, - GSWIP_MII_CFG_MODE_MASK | GSWIP_MII_CFG_RMII_CLK | - GSWIP_MII_CFG_RGMII_IBS | GSWIP_MII_CFG_LDCLKDIS, - miicfg, port); - - switch (state->interface) { - case PHY_INTERFACE_MODE_RGMII_ID: - gswip_mii_mask_pcdu(priv, GSWIP_MII_PCDU_TXDLY_MASK | - GSWIP_MII_PCDU_RXDLY_MASK, 0, port); - break; - case PHY_INTERFACE_MODE_RGMII_RXID: - gswip_mii_mask_pcdu(priv, GSWIP_MII_PCDU_RXDLY_MASK, 0, port); - break; - case PHY_INTERFACE_MODE_RGMII_TXID: - gswip_mii_mask_pcdu(priv, GSWIP_MII_PCDU_TXDLY_MASK, 0, port); - break; - default: - break; - } -} - -static void gswip_phylink_mac_link_down(struct phylink_config *config, - unsigned int mode, - phy_interface_t interface) -{ - struct dsa_port *dp = dsa_phylink_to_port(config); - struct gswip_priv *priv = dp->ds->priv; - int port = dp->index; - - gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, port); - - if (!dsa_port_is_cpu(dp)) - gswip_port_set_link(priv, port, false); -} - -static void gswip_phylink_mac_link_up(struct phylink_config *config, - struct phy_device *phydev, - unsigned int mode, - phy_interface_t interface, - int speed, int duplex, - bool tx_pause, bool rx_pause) -{ - struct dsa_port *dp = dsa_phylink_to_port(config); - struct gswip_priv *priv = dp->ds->priv; - int port = dp->index; - - if (!dsa_port_is_cpu(dp)) { - gswip_port_set_link(priv, port, true); - gswip_port_set_speed(priv, port, speed, interface); - gswip_port_set_duplex(priv, port, duplex); - gswip_port_set_pause(priv, port, tx_pause, rx_pause); - } - - gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, GSWIP_MII_CFG_EN, port); -} - -static void gswip_get_strings(struct dsa_switch *ds, int port, u32 stringset, - uint8_t *data) -{ - int i; - - if (stringset != ETH_SS_STATS) - return; - - for (i = 0; i < ARRAY_SIZE(gswip_rmon_cnt); i++) - ethtool_puts(&data, gswip_rmon_cnt[i].name); -} - -static u32 gswip_bcm_ram_entry_read(struct gswip_priv *priv, u32 table, - u32 index) -{ - u32 result, val; - int err; - - regmap_write(priv->gswip, GSWIP_BM_RAM_ADDR, index); - regmap_write_bits(priv->gswip, GSWIP_BM_RAM_CTRL, - GSWIP_BM_RAM_CTRL_ADDR_MASK | GSWIP_BM_RAM_CTRL_OPMOD | - GSWIP_BM_RAM_CTRL_BAS, - table | GSWIP_BM_RAM_CTRL_BAS); - - err = gswip_switch_r_timeout(priv, GSWIP_BM_RAM_CTRL, - GSWIP_BM_RAM_CTRL_BAS); - if (err) { - dev_err(priv->dev, "timeout while reading table: %u, index: %u\n", - table, index); - return 0; - } - - regmap_read(priv->gswip, GSWIP_BM_RAM_VAL(0), &result); - regmap_read(priv->gswip, GSWIP_BM_RAM_VAL(1), &val); - result |= val << 16; - - return result; -} - -static void gswip_get_ethtool_stats(struct dsa_switch *ds, int port, - uint64_t *data) -{ - struct gswip_priv *priv = ds->priv; - const struct gswip_rmon_cnt_desc *rmon_cnt; - int i; - u64 high; - - for (i = 0; i < ARRAY_SIZE(gswip_rmon_cnt); i++) { - rmon_cnt = &gswip_rmon_cnt[i]; - - data[i] = gswip_bcm_ram_entry_read(priv, port, - rmon_cnt->offset); - if (rmon_cnt->size == 2) { - high = gswip_bcm_ram_entry_read(priv, port, - rmon_cnt->offset + 1); - data[i] |= high << 32; - } - } -} - -static int gswip_get_sset_count(struct dsa_switch *ds, int port, int sset) -{ - if (sset != ETH_SS_STATS) - return 0; - - return ARRAY_SIZE(gswip_rmon_cnt); -} - -static struct phylink_pcs *gswip_phylink_mac_select_pcs(struct phylink_config *config, - phy_interface_t interface) -{ - struct dsa_port *dp = dsa_phylink_to_port(config); - struct gswip_priv *priv = dp->ds->priv; - - if (priv->hw_info->mac_select_pcs) - return priv->hw_info->mac_select_pcs(config, interface); - - return NULL; -} - -static const struct phylink_mac_ops gswip_phylink_mac_ops = { - .mac_config = gswip_phylink_mac_config, - .mac_link_down = gswip_phylink_mac_link_down, - .mac_link_up = gswip_phylink_mac_link_up, - .mac_select_pcs = gswip_phylink_mac_select_pcs, -}; - -static const struct dsa_switch_ops gswip_switch_ops = { - .get_tag_protocol = gswip_get_tag_protocol, - .setup = gswip_setup, - .port_setup = gswip_port_setup, - .port_enable = gswip_port_enable, - .port_disable = gswip_port_disable, - .port_bridge_join = gswip_port_bridge_join, - .port_bridge_leave = gswip_port_bridge_leave, - .port_fast_age = gswip_port_fast_age, - .port_vlan_filtering = gswip_port_vlan_filtering, - .port_vlan_add = gswip_port_vlan_add, - .port_vlan_del = gswip_port_vlan_del, - .port_stp_state_set = gswip_port_stp_state_set, - .port_fdb_add = gswip_port_fdb_add, - .port_fdb_del = gswip_port_fdb_del, - .port_fdb_dump = gswip_port_fdb_dump, - .port_change_mtu = gswip_port_change_mtu, - .port_max_mtu = gswip_port_max_mtu, - .phylink_get_caps = gswip_phylink_get_caps, - .get_strings = gswip_get_strings, - .get_ethtool_stats = gswip_get_ethtool_stats, - .get_sset_count = gswip_get_sset_count, -}; - static const struct xway_gphy_match_data xrx200a1x_gphy_data = { .fe_firmware_name = "lantiq/xrx200_phy22f_a14.bin", .ge_firmware_name = "lantiq/xrx200_phy11g_a14.bin", @@ -1832,30 +317,6 @@ static int gswip_gphy_fw_list(struct gswip_priv *priv, return err; } -static int gswip_validate_cpu_port(struct dsa_switch *ds) -{ - struct gswip_priv *priv = ds->priv; - struct dsa_port *cpu_dp; - int cpu_port = -1; - - dsa_switch_for_each_cpu_port(cpu_dp, ds) { - if (cpu_port != -1) - return dev_err_probe(ds->dev, -EINVAL, - "only a single CPU port is supported\n"); - - cpu_port = cpu_dp->index; - } - - if (cpu_port == -1) - return dev_err_probe(ds->dev, -EINVAL, "no CPU port defined\n"); - - if (BIT(cpu_port) & ~priv->hw_info->allowed_cpu_ports) - return dev_err_probe(ds->dev, -EINVAL, - "unsupported CPU port defined\n"); - - return 0; -} - static const struct regmap_config sw_regmap_config = { .name = "switch", .reg_bits = 32, @@ -1929,24 +390,9 @@ static int gswip_probe(struct platform_device *pdev) if (!priv->ds) return -ENOMEM; - priv->ds->dev = dev; - priv->ds->num_ports = priv->hw_info->max_ports; - priv->ds->priv = priv; - priv->ds->ops = &gswip_switch_ops; - priv->ds->phylink_mac_ops = &gswip_phylink_mac_ops; priv->dev = dev; - mutex_init(&priv->pce_table_lock); - regmap_read(priv->gswip, GSWIP_VERSION, &version); - /* The hardware has the 'major/minor' version bytes in the wrong order - * preventing numerical comparisons. Construct a 16-bit unsigned integer - * having the REV field as most significant byte and the MOD field as - * least significant byte. This is effectively swapping the two bytes of - * the version variable, but other than using swab16 it doesn't affect - * the source variable. - */ - priv->version = GSWIP_VERSION_REV(version) << 8 | - GSWIP_VERSION_MOD(version); + regmap_read(priv->gswip, GSWIP_VERSION, &version); np = dev->of_node; switch (version) { @@ -1976,25 +422,14 @@ static int gswip_probe(struct platform_device *pdev) "gphy fw probe failed\n"); } - err = dsa_register_switch(priv->ds); - if (err) { - dev_err_probe(dev, err, "dsa switch registration failed\n"); - goto gphy_fw_remove; - } - - err = gswip_validate_cpu_port(priv->ds); + err = gswip_probe_common(priv, version); if (err) - goto disable_switch; + goto gphy_fw_remove; platform_set_drvdata(pdev, priv); - dev_info(dev, "probed GSWIP version %lx mod %lx\n", - GSWIP_VERSION_REV(version), GSWIP_VERSION_MOD(version)); return 0; -disable_switch: - regmap_clear_bits(priv->mdio, GSWIP_MDIO_GLOB, GSWIP_MDIO_GLOB_ENABLE); - dsa_unregister_switch(priv->ds); gphy_fw_remove: for (i = 0; i < priv->num_gphy_fw; i++) gswip_gphy_fw_remove(priv, &priv->gphy_fw[i]); @@ -2010,7 +445,7 @@ static void gswip_remove(struct platform_device *pdev) return; /* disable the switch */ - regmap_clear_bits(priv->mdio, GSWIP_MDIO_GLOB, GSWIP_MDIO_GLOB_ENABLE); + gswip_disable_switch(priv); dsa_unregister_switch(priv->ds); diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.h b/drivers/net/dsa/lantiq/lantiq_gswip.h index 24d759e06e153..d86290db19b41 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.h +++ b/drivers/net/dsa/lantiq/lantiq_gswip.h @@ -278,4 +278,8 @@ struct gswip_priv { u16 version; }; +void gswip_disable_switch(struct gswip_priv *priv); + +int gswip_probe_common(struct gswip_priv *priv, u32 version); + #endif /* __LANTIQ_GSWIP_H */ diff --git a/drivers/net/dsa/lantiq/lantiq_gswip_common.c b/drivers/net/dsa/lantiq/lantiq_gswip_common.c new file mode 100644 index 0000000000000..a0e361622acb0 --- /dev/null +++ b/drivers/net/dsa/lantiq/lantiq_gswip_common.c @@ -0,0 +1,1622 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Lantiq / Intel / MaxLinear GSWIP common function library + * + * Copyright (C) 2025 Daniel Golle + * Copyright (C) 2023 - 2024 MaxLinear Inc. + * Copyright (C) 2022 Snap One, LLC. All rights reserved. + * Copyright (C) 2017 - 2019 Hauke Mehrtens + * Copyright (C) 2012 John Crispin + * Copyright (C) 2010 Lantiq Deutschland + * + * The VLAN and bridge model the GSWIP hardware uses does not directly + * matches the model DSA uses. + * + * The hardware has 64 possible table entries for bridges with one VLAN + * ID, one flow id and a list of ports for each bridge. All entries which + * match the same flow ID are combined in the mac learning table, they + * act as one global bridge. + * The hardware does not support VLAN filter on the port, but on the + * bridge, this driver converts the DSA model to the hardware. + * + * The CPU gets all the exception frames which do not match any forwarding + * rule and the CPU port is also added to all bridges. This makes it possible + * to handle all the special cases easily in software. + * At the initialization the driver allocates one bridge table entry for + * each switch port which is used when the port is used without an + * explicit bridge. This prevents the frames from being forwarded + * between all LAN ports by default. + */ + +#include "lantiq_gswip.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct gswip_pce_table_entry { + u16 index; // PCE_TBL_ADDR.ADDR = pData->table_index + u16 table; // PCE_TBL_CTRL.ADDR = pData->table + u16 key[8]; + u16 val[5]; + u16 mask; + u8 gmap; + bool type; + bool valid; + bool key_mode; +}; + +struct gswip_rmon_cnt_desc { + unsigned int size; + unsigned int offset; + const char *name; +}; + +#define MIB_DESC(_size, _offset, _name) {.size = _size, .offset = _offset, .name = _name} + +static const struct gswip_rmon_cnt_desc gswip_rmon_cnt[] = { + /** Receive Packet Count (only packets that are accepted and not discarded). */ + MIB_DESC(1, 0x1F, "RxGoodPkts"), + MIB_DESC(1, 0x23, "RxUnicastPkts"), + MIB_DESC(1, 0x22, "RxMulticastPkts"), + MIB_DESC(1, 0x21, "RxFCSErrorPkts"), + MIB_DESC(1, 0x1D, "RxUnderSizeGoodPkts"), + MIB_DESC(1, 0x1E, "RxUnderSizeErrorPkts"), + MIB_DESC(1, 0x1B, "RxOversizeGoodPkts"), + MIB_DESC(1, 0x1C, "RxOversizeErrorPkts"), + MIB_DESC(1, 0x20, "RxGoodPausePkts"), + MIB_DESC(1, 0x1A, "RxAlignErrorPkts"), + MIB_DESC(1, 0x12, "Rx64BytePkts"), + MIB_DESC(1, 0x13, "Rx127BytePkts"), + MIB_DESC(1, 0x14, "Rx255BytePkts"), + MIB_DESC(1, 0x15, "Rx511BytePkts"), + MIB_DESC(1, 0x16, "Rx1023BytePkts"), + /** Receive Size 1024-1522 (or more, if configured) Packet Count. */ + MIB_DESC(1, 0x17, "RxMaxBytePkts"), + MIB_DESC(1, 0x18, "RxDroppedPkts"), + MIB_DESC(1, 0x19, "RxFilteredPkts"), + MIB_DESC(2, 0x24, "RxGoodBytes"), + MIB_DESC(2, 0x26, "RxBadBytes"), + MIB_DESC(1, 0x11, "TxAcmDroppedPkts"), + MIB_DESC(1, 0x0C, "TxGoodPkts"), + MIB_DESC(1, 0x06, "TxUnicastPkts"), + MIB_DESC(1, 0x07, "TxMulticastPkts"), + MIB_DESC(1, 0x00, "Tx64BytePkts"), + MIB_DESC(1, 0x01, "Tx127BytePkts"), + MIB_DESC(1, 0x02, "Tx255BytePkts"), + MIB_DESC(1, 0x03, "Tx511BytePkts"), + MIB_DESC(1, 0x04, "Tx1023BytePkts"), + /** Transmit Size 1024-1522 (or more, if configured) Packet Count. */ + MIB_DESC(1, 0x05, "TxMaxBytePkts"), + MIB_DESC(1, 0x08, "TxSingleCollCount"), + MIB_DESC(1, 0x09, "TxMultCollCount"), + MIB_DESC(1, 0x0A, "TxLateCollCount"), + MIB_DESC(1, 0x0B, "TxExcessCollCount"), + MIB_DESC(1, 0x0D, "TxPauseCount"), + MIB_DESC(1, 0x10, "TxDroppedPkts"), + MIB_DESC(2, 0x0E, "TxGoodBytes"), +}; + +static u32 gswip_switch_r_timeout(struct gswip_priv *priv, u32 offset, + u32 cleared) +{ + u32 val; + + return regmap_read_poll_timeout(priv->gswip, offset, val, + !(val & cleared), 20, 50000); +} + +static void gswip_mii_mask_cfg(struct gswip_priv *priv, u32 mask, u32 set, + int port) +{ + int reg_port; + + /* MII_CFG register only exists for MII ports */ + if (!(priv->hw_info->mii_ports & BIT(port))) + return; + + reg_port = port + priv->hw_info->mii_port_reg_offset; + + regmap_write_bits(priv->mii, GSWIP_MII_CFGp(reg_port), mask, + set); +} + +static void gswip_mii_mask_pcdu(struct gswip_priv *priv, u32 mask, u32 set, + int port) +{ + int reg_port; + + /* MII_PCDU register only exists for MII ports */ + if (!(priv->hw_info->mii_ports & BIT(port))) + return; + + reg_port = port + priv->hw_info->mii_port_reg_offset; + + switch (reg_port) { + case 0: + regmap_write_bits(priv->mii, GSWIP_MII_PCDU0, mask, set); + break; + case 1: + regmap_write_bits(priv->mii, GSWIP_MII_PCDU1, mask, set); + break; + case 5: + regmap_write_bits(priv->mii, GSWIP_MII_PCDU5, mask, set); + break; + } +} + +static int gswip_mdio_poll(struct gswip_priv *priv) +{ + u32 ctrl; + + return regmap_read_poll_timeout(priv->mdio, GSWIP_MDIO_CTRL, ctrl, + !(ctrl & GSWIP_MDIO_CTRL_BUSY), 40, 4000); +} + +static int gswip_mdio_wr(struct mii_bus *bus, int addr, int reg, u16 val) +{ + struct gswip_priv *priv = bus->priv; + int err; + + err = gswip_mdio_poll(priv); + if (err) { + dev_err(&bus->dev, "waiting for MDIO bus busy timed out\n"); + return err; + } + + regmap_write(priv->mdio, GSWIP_MDIO_WRITE, val); + regmap_write(priv->mdio, GSWIP_MDIO_CTRL, + GSWIP_MDIO_CTRL_BUSY | GSWIP_MDIO_CTRL_WR | + ((addr & GSWIP_MDIO_CTRL_PHYAD_MASK) << GSWIP_MDIO_CTRL_PHYAD_SHIFT) | + (reg & GSWIP_MDIO_CTRL_REGAD_MASK)); + + return 0; +} + +static int gswip_mdio_rd(struct mii_bus *bus, int addr, int reg) +{ + struct gswip_priv *priv = bus->priv; + u32 val; + int err; + + err = gswip_mdio_poll(priv); + if (err) { + dev_err(&bus->dev, "waiting for MDIO bus busy timed out\n"); + return err; + } + + regmap_write(priv->mdio, GSWIP_MDIO_CTRL, + GSWIP_MDIO_CTRL_BUSY | GSWIP_MDIO_CTRL_RD | + ((addr & GSWIP_MDIO_CTRL_PHYAD_MASK) << GSWIP_MDIO_CTRL_PHYAD_SHIFT) | + (reg & GSWIP_MDIO_CTRL_REGAD_MASK)); + + err = gswip_mdio_poll(priv); + if (err) { + dev_err(&bus->dev, "waiting for MDIO bus busy timed out\n"); + return err; + } + + err = regmap_read(priv->mdio, GSWIP_MDIO_READ, &val); + if (err) + return err; + + return val; +} + +static int gswip_mdio(struct gswip_priv *priv) +{ + struct device_node *mdio_np, *switch_np = priv->dev->of_node; + struct device *dev = priv->dev; + struct mii_bus *bus; + int err = 0; + + mdio_np = of_get_compatible_child(switch_np, "lantiq,xrx200-mdio"); + if (!mdio_np) + mdio_np = of_get_child_by_name(switch_np, "mdio"); + + if (!of_device_is_available(mdio_np)) + goto out_put_node; + + bus = devm_mdiobus_alloc(dev); + if (!bus) { + err = -ENOMEM; + goto out_put_node; + } + + bus->priv = priv; + bus->read = gswip_mdio_rd; + bus->write = gswip_mdio_wr; + bus->name = "lantiq,xrx200-mdio"; + snprintf(bus->id, MII_BUS_ID_SIZE, "%s-mii", dev_name(priv->dev)); + bus->parent = priv->dev; + + err = devm_of_mdiobus_register(dev, bus, mdio_np); + +out_put_node: + of_node_put(mdio_np); + + return err; +} + +static int gswip_pce_table_entry_read(struct gswip_priv *priv, + struct gswip_pce_table_entry *tbl) +{ + int i; + int err; + u32 crtl; + u32 tmp; + u16 addr_mode = tbl->key_mode ? GSWIP_PCE_TBL_CTRL_OPMOD_KSRD : + GSWIP_PCE_TBL_CTRL_OPMOD_ADRD; + + mutex_lock(&priv->pce_table_lock); + + err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL, + GSWIP_PCE_TBL_CTRL_BAS); + if (err) + goto out_unlock; + + regmap_write(priv->gswip, GSWIP_PCE_TBL_ADDR, tbl->index); + regmap_write_bits(priv->gswip, GSWIP_PCE_TBL_CTRL, + GSWIP_PCE_TBL_CTRL_ADDR_MASK | + GSWIP_PCE_TBL_CTRL_OPMOD_MASK | + GSWIP_PCE_TBL_CTRL_BAS, + tbl->table | addr_mode | GSWIP_PCE_TBL_CTRL_BAS); + + err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL, + GSWIP_PCE_TBL_CTRL_BAS); + if (err) + goto out_unlock; + + for (i = 0; i < ARRAY_SIZE(tbl->key); i++) { + err = regmap_read(priv->gswip, GSWIP_PCE_TBL_KEY(i), &tmp); + if (err) + goto out_unlock; + tbl->key[i] = tmp; + } + for (i = 0; i < ARRAY_SIZE(tbl->val); i++) { + err = regmap_read(priv->gswip, GSWIP_PCE_TBL_VAL(i), &tmp); + if (err) + goto out_unlock; + tbl->val[i] = tmp; + } + + err = regmap_read(priv->gswip, GSWIP_PCE_TBL_MASK, &tmp); + if (err) + goto out_unlock; + + tbl->mask = tmp; + err = regmap_read(priv->gswip, GSWIP_PCE_TBL_CTRL, &crtl); + if (err) + goto out_unlock; + + tbl->type = !!(crtl & GSWIP_PCE_TBL_CTRL_TYPE); + tbl->valid = !!(crtl & GSWIP_PCE_TBL_CTRL_VLD); + tbl->gmap = (crtl & GSWIP_PCE_TBL_CTRL_GMAP_MASK) >> 7; + +out_unlock: + mutex_unlock(&priv->pce_table_lock); + + return err; +} + +static int gswip_pce_table_entry_write(struct gswip_priv *priv, + struct gswip_pce_table_entry *tbl) +{ + int i; + int err; + u32 crtl; + u16 addr_mode = tbl->key_mode ? GSWIP_PCE_TBL_CTRL_OPMOD_KSWR : + GSWIP_PCE_TBL_CTRL_OPMOD_ADWR; + + mutex_lock(&priv->pce_table_lock); + + err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL, + GSWIP_PCE_TBL_CTRL_BAS); + if (err) { + mutex_unlock(&priv->pce_table_lock); + return err; + } + + regmap_write(priv->gswip, GSWIP_PCE_TBL_ADDR, tbl->index); + regmap_write_bits(priv->gswip, GSWIP_PCE_TBL_CTRL, + GSWIP_PCE_TBL_CTRL_ADDR_MASK | + GSWIP_PCE_TBL_CTRL_OPMOD_MASK, + tbl->table | addr_mode); + + for (i = 0; i < ARRAY_SIZE(tbl->key); i++) + regmap_write(priv->gswip, GSWIP_PCE_TBL_KEY(i), tbl->key[i]); + + for (i = 0; i < ARRAY_SIZE(tbl->val); i++) + regmap_write(priv->gswip, GSWIP_PCE_TBL_VAL(i), tbl->val[i]); + + regmap_write_bits(priv->gswip, GSWIP_PCE_TBL_CTRL, + GSWIP_PCE_TBL_CTRL_ADDR_MASK | + GSWIP_PCE_TBL_CTRL_OPMOD_MASK, + tbl->table | addr_mode); + + regmap_write(priv->gswip, GSWIP_PCE_TBL_MASK, tbl->mask); + + regmap_read(priv->gswip, GSWIP_PCE_TBL_CTRL, &crtl); + crtl &= ~(GSWIP_PCE_TBL_CTRL_TYPE | GSWIP_PCE_TBL_CTRL_VLD | + GSWIP_PCE_TBL_CTRL_GMAP_MASK); + if (tbl->type) + crtl |= GSWIP_PCE_TBL_CTRL_TYPE; + if (tbl->valid) + crtl |= GSWIP_PCE_TBL_CTRL_VLD; + crtl |= (tbl->gmap << 7) & GSWIP_PCE_TBL_CTRL_GMAP_MASK; + crtl |= GSWIP_PCE_TBL_CTRL_BAS; + regmap_write(priv->gswip, GSWIP_PCE_TBL_CTRL, crtl); + + err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL, + GSWIP_PCE_TBL_CTRL_BAS); + + mutex_unlock(&priv->pce_table_lock); + + return err; +} + +/* Add the LAN port into a bridge with the CPU port by + * default. This prevents automatic forwarding of + * packages between the LAN ports when no explicit + * bridge is configured. + */ +static int gswip_add_single_port_br(struct gswip_priv *priv, int port, bool add) +{ + struct gswip_pce_table_entry vlan_active = {0,}; + struct gswip_pce_table_entry vlan_mapping = {0,}; + int err; + + vlan_active.index = port + 1; + vlan_active.table = GSWIP_TABLE_ACTIVE_VLAN; + vlan_active.key[0] = GSWIP_VLAN_UNAWARE_PVID; + vlan_active.val[0] = port + 1 /* fid */; + vlan_active.valid = add; + err = gswip_pce_table_entry_write(priv, &vlan_active); + if (err) { + dev_err(priv->dev, "failed to write active VLAN: %d\n", err); + return err; + } + + if (!add) + return 0; + + vlan_mapping.index = port + 1; + vlan_mapping.table = GSWIP_TABLE_VLAN_MAPPING; + vlan_mapping.val[0] = GSWIP_VLAN_UNAWARE_PVID; + vlan_mapping.val[1] = BIT(port) | dsa_cpu_ports(priv->ds); + vlan_mapping.val[2] = 0; + err = gswip_pce_table_entry_write(priv, &vlan_mapping); + if (err) { + dev_err(priv->dev, "failed to write VLAN mapping: %d\n", err); + return err; + } + + return 0; +} + +static int gswip_port_setup(struct dsa_switch *ds, int port) +{ + struct gswip_priv *priv = ds->priv; + int err; + + if (!dsa_is_cpu_port(ds, port)) { + err = gswip_add_single_port_br(priv, port, true); + if (err) + return err; + } + + return 0; +} + +static int gswip_port_enable(struct dsa_switch *ds, int port, + struct phy_device *phydev) +{ + struct gswip_priv *priv = ds->priv; + + if (!dsa_is_cpu_port(ds, port)) { + u32 mdio_phy = 0; + + if (phydev) + mdio_phy = phydev->mdio.addr & GSWIP_MDIO_PHY_ADDR_MASK; + + regmap_write_bits(priv->mdio, GSWIP_MDIO_PHYp(port), + GSWIP_MDIO_PHY_ADDR_MASK, + mdio_phy); + } + + /* RMON Counter Enable for port */ + regmap_write(priv->gswip, GSWIP_BM_PCFGp(port), GSWIP_BM_PCFG_CNTEN); + + /* enable port fetch/store dma & VLAN Modification */ + regmap_set_bits(priv->gswip, GSWIP_FDMA_PCTRLp(port), + GSWIP_FDMA_PCTRL_EN | GSWIP_FDMA_PCTRL_VLANMOD_BOTH); + regmap_set_bits(priv->gswip, GSWIP_SDMA_PCTRLp(port), + GSWIP_SDMA_PCTRL_EN); + + return 0; +} + +static void gswip_port_disable(struct dsa_switch *ds, int port) +{ + struct gswip_priv *priv = ds->priv; + + regmap_clear_bits(priv->gswip, GSWIP_FDMA_PCTRLp(port), + GSWIP_FDMA_PCTRL_EN); + regmap_clear_bits(priv->gswip, GSWIP_SDMA_PCTRLp(port), + GSWIP_SDMA_PCTRL_EN); +} + +static int gswip_pce_load_microcode(struct gswip_priv *priv) +{ + int i; + int err; + + regmap_write_bits(priv->gswip, GSWIP_PCE_TBL_CTRL, + GSWIP_PCE_TBL_CTRL_ADDR_MASK | + GSWIP_PCE_TBL_CTRL_OPMOD_MASK | + GSWIP_PCE_TBL_CTRL_OPMOD_ADWR, + GSWIP_PCE_TBL_CTRL_OPMOD_ADWR); + regmap_write(priv->gswip, GSWIP_PCE_TBL_MASK, 0); + + for (i = 0; i < priv->hw_info->pce_microcode_size; i++) { + regmap_write(priv->gswip, GSWIP_PCE_TBL_ADDR, i); + regmap_write(priv->gswip, GSWIP_PCE_TBL_VAL(0), + (*priv->hw_info->pce_microcode)[i].val_0); + regmap_write(priv->gswip, GSWIP_PCE_TBL_VAL(1), + (*priv->hw_info->pce_microcode)[i].val_1); + regmap_write(priv->gswip, GSWIP_PCE_TBL_VAL(2), + (*priv->hw_info->pce_microcode)[i].val_2); + regmap_write(priv->gswip, GSWIP_PCE_TBL_VAL(3), + (*priv->hw_info->pce_microcode)[i].val_3); + + /* start the table access: */ + regmap_set_bits(priv->gswip, GSWIP_PCE_TBL_CTRL, + GSWIP_PCE_TBL_CTRL_BAS); + err = gswip_switch_r_timeout(priv, GSWIP_PCE_TBL_CTRL, + GSWIP_PCE_TBL_CTRL_BAS); + if (err) + return err; + } + + /* tell the switch that the microcode is loaded */ + regmap_set_bits(priv->gswip, GSWIP_PCE_GCTRL_0, + GSWIP_PCE_GCTRL_0_MC_VALID); + + return 0; +} + +static void gswip_port_commit_pvid(struct gswip_priv *priv, int port) +{ + struct dsa_port *dp = dsa_to_port(priv->ds, port); + struct net_device *br = dsa_port_bridge_dev_get(dp); + u32 vinr; + int idx; + + if (!dsa_port_is_user(dp)) + return; + + if (br) { + u16 pvid = GSWIP_VLAN_UNAWARE_PVID; + + if (br_vlan_enabled(br)) + br_vlan_get_pvid(br, &pvid); + + /* VLAN-aware bridge ports with no PVID will use Active VLAN + * index 0. The expectation is that this drops all untagged and + * VID-0 tagged ingress traffic. + */ + idx = 0; + for (int i = priv->hw_info->max_ports; + i < ARRAY_SIZE(priv->vlans); i++) { + if (priv->vlans[i].bridge == br && + priv->vlans[i].vid == pvid) { + idx = i; + break; + } + } + } else { + /* The Active VLAN table index as configured by + * gswip_add_single_port_br() + */ + idx = port + 1; + } + + vinr = idx ? GSWIP_PCE_VCTRL_VINR_ALL : GSWIP_PCE_VCTRL_VINR_TAGGED; + regmap_write_bits(priv->gswip, GSWIP_PCE_VCTRL(port), + GSWIP_PCE_VCTRL_VINR, + FIELD_PREP(GSWIP_PCE_VCTRL_VINR, vinr)); + + /* Note that in GSWIP 2.2 VLAN mode the VID needs to be programmed + * directly instead of referencing the index in the Active VLAN Tablet. + * However, without the VLANMD bit (9) in PCE_GCTRL_1 (0x457) even + * GSWIP 2.2 and newer hardware maintain the GSWIP 2.1 behavior. + */ + regmap_write(priv->gswip, GSWIP_PCE_DEFPVID(port), idx); +} + +static int gswip_port_vlan_filtering(struct dsa_switch *ds, int port, + bool vlan_filtering, + struct netlink_ext_ack *extack) +{ + struct gswip_priv *priv = ds->priv; + + if (vlan_filtering) { + /* Use tag based VLAN */ + regmap_write_bits(priv->gswip, GSWIP_PCE_VCTRL(port), + GSWIP_PCE_VCTRL_VSR | + GSWIP_PCE_VCTRL_UVR | + GSWIP_PCE_VCTRL_VIMR | + GSWIP_PCE_VCTRL_VEMR | + GSWIP_PCE_VCTRL_VID0, + GSWIP_PCE_VCTRL_UVR | + GSWIP_PCE_VCTRL_VIMR | + GSWIP_PCE_VCTRL_VEMR | + GSWIP_PCE_VCTRL_VID0); + regmap_clear_bits(priv->gswip, GSWIP_PCE_PCTRL_0p(port), + GSWIP_PCE_PCTRL_0_TVM); + } else { + /* Use port based VLAN */ + regmap_write_bits(priv->gswip, GSWIP_PCE_VCTRL(port), + GSWIP_PCE_VCTRL_UVR | + GSWIP_PCE_VCTRL_VIMR | + GSWIP_PCE_VCTRL_VEMR | + GSWIP_PCE_VCTRL_VID0 | + GSWIP_PCE_VCTRL_VSR, + GSWIP_PCE_VCTRL_VSR); + regmap_set_bits(priv->gswip, GSWIP_PCE_PCTRL_0p(port), + GSWIP_PCE_PCTRL_0_TVM); + } + + gswip_port_commit_pvid(priv, port); + + return 0; +} + +static int gswip_setup(struct dsa_switch *ds) +{ + unsigned int cpu_ports = dsa_cpu_ports(ds); + struct gswip_priv *priv = ds->priv; + struct dsa_port *cpu_dp; + int err, i; + + regmap_write(priv->gswip, GSWIP_SWRES, GSWIP_SWRES_R0); + usleep_range(5000, 10000); + regmap_write(priv->gswip, GSWIP_SWRES, 0); + + /* disable port fetch/store dma on all ports */ + for (i = 0; i < priv->hw_info->max_ports; i++) { + gswip_port_disable(ds, i); + gswip_port_vlan_filtering(ds, i, false, NULL); + } + + /* enable Switch */ + regmap_set_bits(priv->mdio, GSWIP_MDIO_GLOB, GSWIP_MDIO_GLOB_ENABLE); + + err = gswip_pce_load_microcode(priv); + if (err) { + dev_err(priv->dev, "writing PCE microcode failed, %i\n", err); + return err; + } + + /* Default unknown Broadcast/Multicast/Unicast port maps */ + regmap_write(priv->gswip, GSWIP_PCE_PMAP1, cpu_ports); + regmap_write(priv->gswip, GSWIP_PCE_PMAP2, cpu_ports); + regmap_write(priv->gswip, GSWIP_PCE_PMAP3, cpu_ports); + + /* Deactivate MDIO PHY auto polling. Some PHYs as the AR8030 have an + * interoperability problem with this auto polling mechanism because + * their status registers think that the link is in a different state + * than it actually is. For the AR8030 it has the BMSR_ESTATEN bit set + * as well as ESTATUS_1000_TFULL and ESTATUS_1000_XFULL. This makes the + * auto polling state machine consider the link being negotiated with + * 1Gbit/s. Since the PHY itself is a Fast Ethernet RMII PHY this leads + * to the switch port being completely dead (RX and TX are both not + * working). + * Also with various other PHY / port combinations (PHY11G GPHY, PHY22F + * GPHY, external RGMII PEF7071/7072) any traffic would stop. Sometimes + * it would work fine for a few minutes to hours and then stop, on + * other device it would no traffic could be sent or received at all. + * Testing shows that when PHY auto polling is disabled these problems + * go away. + */ + regmap_write(priv->mdio, GSWIP_MDIO_MDC_CFG0, 0x0); + + /* Configure the MDIO Clock 2.5 MHz */ + regmap_write_bits(priv->mdio, GSWIP_MDIO_MDC_CFG1, 0xff, 0x09); + + /* bring up the mdio bus */ + err = gswip_mdio(priv); + if (err) { + dev_err(priv->dev, "mdio bus setup failed\n"); + return err; + } + + /* Disable the xMII interface and clear it's isolation bit */ + for (i = 0; i < priv->hw_info->max_ports; i++) + gswip_mii_mask_cfg(priv, + GSWIP_MII_CFG_EN | GSWIP_MII_CFG_ISOLATE, + 0, i); + + dsa_switch_for_each_cpu_port(cpu_dp, ds) { + /* enable special tag insertion on cpu port */ + regmap_set_bits(priv->gswip, GSWIP_FDMA_PCTRLp(cpu_dp->index), + GSWIP_FDMA_PCTRL_STEN); + + /* accept special tag in ingress direction */ + regmap_set_bits(priv->gswip, + GSWIP_PCE_PCTRL_0p(cpu_dp->index), + GSWIP_PCE_PCTRL_0_INGRESS); + } + + regmap_set_bits(priv->gswip, GSWIP_BM_QUEUE_GCTRL, + GSWIP_BM_QUEUE_GCTRL_GL_MOD); + + /* VLAN aware Switching */ + regmap_set_bits(priv->gswip, GSWIP_PCE_GCTRL_0, + GSWIP_PCE_GCTRL_0_VLAN); + + /* Flush MAC Table */ + regmap_set_bits(priv->gswip, GSWIP_PCE_GCTRL_0, + GSWIP_PCE_GCTRL_0_MTFL); + + err = gswip_switch_r_timeout(priv, GSWIP_PCE_GCTRL_0, + GSWIP_PCE_GCTRL_0_MTFL); + if (err) { + dev_err(priv->dev, "MAC flushing didn't finish\n"); + return err; + } + + ds->mtu_enforcement_ingress = true; + + return 0; +} + +static enum dsa_tag_protocol gswip_get_tag_protocol(struct dsa_switch *ds, + int port, + enum dsa_tag_protocol mp) +{ + struct gswip_priv *priv = ds->priv; + + return priv->hw_info->tag_protocol; +} + +static int gswip_vlan_active_create(struct gswip_priv *priv, + struct net_device *bridge, + int fid, u16 vid) +{ + struct gswip_pce_table_entry vlan_active = {0,}; + unsigned int max_ports = priv->hw_info->max_ports; + int idx = -1; + int err; + int i; + + /* Look for a free slot */ + for (i = max_ports; i < ARRAY_SIZE(priv->vlans); i++) { + if (!priv->vlans[i].bridge) { + idx = i; + break; + } + } + + if (idx == -1) + return -ENOSPC; + + if (fid == -1) + fid = idx; + + vlan_active.index = idx; + vlan_active.table = GSWIP_TABLE_ACTIVE_VLAN; + vlan_active.key[0] = vid; + vlan_active.val[0] = fid; + vlan_active.valid = true; + + err = gswip_pce_table_entry_write(priv, &vlan_active); + if (err) { + dev_err(priv->dev, "failed to write active VLAN: %d\n", err); + return err; + } + + priv->vlans[idx].bridge = bridge; + priv->vlans[idx].vid = vid; + priv->vlans[idx].fid = fid; + + return idx; +} + +static int gswip_vlan_active_remove(struct gswip_priv *priv, int idx) +{ + struct gswip_pce_table_entry vlan_active = {0,}; + int err; + + vlan_active.index = idx; + vlan_active.table = GSWIP_TABLE_ACTIVE_VLAN; + vlan_active.valid = false; + err = gswip_pce_table_entry_write(priv, &vlan_active); + if (err) + dev_err(priv->dev, "failed to delete active VLAN: %d\n", err); + priv->vlans[idx].bridge = NULL; + + return err; +} + +static int gswip_vlan_add(struct gswip_priv *priv, struct net_device *bridge, + int port, u16 vid, bool untagged, bool pvid, + bool vlan_aware) +{ + struct gswip_pce_table_entry vlan_mapping = {0,}; + unsigned int max_ports = priv->hw_info->max_ports; + unsigned int cpu_ports = dsa_cpu_ports(priv->ds); + bool active_vlan_created = false; + int fid = -1, idx = -1; + int i, err; + + /* Check if there is already a page for this bridge */ + for (i = max_ports; i < ARRAY_SIZE(priv->vlans); i++) { + if (priv->vlans[i].bridge == bridge) { + if (vlan_aware) { + if (fid != -1 && fid != priv->vlans[i].fid) + dev_err(priv->dev, "one bridge with multiple flow ids\n"); + fid = priv->vlans[i].fid; + } + if (priv->vlans[i].vid == vid) { + idx = i; + break; + } + } + } + + /* If this bridge is not programmed yet, add a Active VLAN table + * entry in a free slot and prepare the VLAN mapping table entry. + */ + if (idx == -1) { + idx = gswip_vlan_active_create(priv, bridge, fid, vid); + if (idx < 0) + return idx; + active_vlan_created = true; + + vlan_mapping.index = idx; + vlan_mapping.table = GSWIP_TABLE_VLAN_MAPPING; + } else { + /* Read the existing VLAN mapping entry from the switch */ + vlan_mapping.index = idx; + vlan_mapping.table = GSWIP_TABLE_VLAN_MAPPING; + err = gswip_pce_table_entry_read(priv, &vlan_mapping); + if (err) { + dev_err(priv->dev, "failed to read VLAN mapping: %d\n", + err); + return err; + } + } + + /* VLAN ID byte, maps to the VLAN ID of vlan active table */ + vlan_mapping.val[0] = vid; + /* Update the VLAN mapping entry and write it to the switch */ + vlan_mapping.val[1] |= cpu_ports; + vlan_mapping.val[1] |= BIT(port); + if (vlan_aware) + vlan_mapping.val[2] |= cpu_ports; + if (untagged) + vlan_mapping.val[2] &= ~BIT(port); + else + vlan_mapping.val[2] |= BIT(port); + err = gswip_pce_table_entry_write(priv, &vlan_mapping); + if (err) { + dev_err(priv->dev, "failed to write VLAN mapping: %d\n", err); + /* In case an Active VLAN was creaetd delete it again */ + if (active_vlan_created) + gswip_vlan_active_remove(priv, idx); + return err; + } + + gswip_port_commit_pvid(priv, port); + + return 0; +} + +static int gswip_vlan_remove(struct gswip_priv *priv, + struct net_device *bridge, int port, + u16 vid) +{ + struct gswip_pce_table_entry vlan_mapping = {0,}; + unsigned int max_ports = priv->hw_info->max_ports; + int idx = -1; + int i; + int err; + + /* Check if there is already a page for this bridge */ + for (i = max_ports; i < ARRAY_SIZE(priv->vlans); i++) { + if (priv->vlans[i].bridge == bridge && + priv->vlans[i].vid == vid) { + idx = i; + break; + } + } + + if (idx == -1) { + dev_err(priv->dev, "Port %d cannot find VID %u of bridge %s\n", + port, vid, bridge ? bridge->name : "(null)"); + return -ENOENT; + } + + vlan_mapping.index = idx; + vlan_mapping.table = GSWIP_TABLE_VLAN_MAPPING; + err = gswip_pce_table_entry_read(priv, &vlan_mapping); + if (err) { + dev_err(priv->dev, "failed to read VLAN mapping: %d\n", err); + return err; + } + + vlan_mapping.val[1] &= ~BIT(port); + vlan_mapping.val[2] &= ~BIT(port); + err = gswip_pce_table_entry_write(priv, &vlan_mapping); + if (err) { + dev_err(priv->dev, "failed to write VLAN mapping: %d\n", err); + return err; + } + + /* In case all ports are removed from the bridge, remove the VLAN */ + if (!(vlan_mapping.val[1] & ~dsa_cpu_ports(priv->ds))) { + err = gswip_vlan_active_remove(priv, idx); + if (err) { + dev_err(priv->dev, "failed to write active VLAN: %d\n", + err); + return err; + } + } + + gswip_port_commit_pvid(priv, port); + + return 0; +} + +static int gswip_port_bridge_join(struct dsa_switch *ds, int port, + struct dsa_bridge bridge, + bool *tx_fwd_offload, + struct netlink_ext_ack *extack) +{ + struct net_device *br = bridge.dev; + struct gswip_priv *priv = ds->priv; + int err; + + /* Set up the VLAN for VLAN-unaware bridging for this port, and remove + * it from the "single-port bridge" through which it was operating as + * standalone. + */ + err = gswip_vlan_add(priv, br, port, GSWIP_VLAN_UNAWARE_PVID, + true, true, false); + if (err) + return err; + + return gswip_add_single_port_br(priv, port, false); +} + +static void gswip_port_bridge_leave(struct dsa_switch *ds, int port, + struct dsa_bridge bridge) +{ + struct net_device *br = bridge.dev; + struct gswip_priv *priv = ds->priv; + + /* Add the port back to the "single-port bridge", and remove it from + * the VLAN-unaware PVID created for this bridge. + */ + gswip_add_single_port_br(priv, port, true); + gswip_vlan_remove(priv, br, port, GSWIP_VLAN_UNAWARE_PVID); +} + +static int gswip_port_vlan_prepare(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack) +{ + struct net_device *bridge = dsa_port_bridge_dev_get(dsa_to_port(ds, port)); + struct gswip_priv *priv = ds->priv; + unsigned int max_ports = priv->hw_info->max_ports; + int pos = max_ports; + int i, idx = -1; + + /* We only support VLAN filtering on bridges */ + if (!dsa_is_cpu_port(ds, port) && !bridge) + return -EOPNOTSUPP; + + /* Check if there is already a page for this VLAN */ + for (i = max_ports; i < ARRAY_SIZE(priv->vlans); i++) { + if (priv->vlans[i].bridge == bridge && + priv->vlans[i].vid == vlan->vid) { + idx = i; + break; + } + } + + /* If this VLAN is not programmed yet, we have to reserve + * one entry in the VLAN table. Make sure we start at the + * next position round. + */ + if (idx == -1) { + /* Look for a free slot */ + for (; pos < ARRAY_SIZE(priv->vlans); pos++) { + if (!priv->vlans[pos].bridge) { + idx = pos; + pos++; + break; + } + } + + if (idx == -1) { + NL_SET_ERR_MSG_MOD(extack, "No slot in VLAN table"); + return -ENOSPC; + } + } + + return 0; +} + +static int gswip_port_vlan_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack) +{ + struct net_device *bridge = dsa_port_bridge_dev_get(dsa_to_port(ds, port)); + struct gswip_priv *priv = ds->priv; + bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; + bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; + int err; + + if (vlan->vid == GSWIP_VLAN_UNAWARE_PVID) + return 0; + + err = gswip_port_vlan_prepare(ds, port, vlan, extack); + if (err) + return err; + + /* We have to receive all packets on the CPU port and should not + * do any VLAN filtering here. This is also called with bridge + * NULL and then we do not know for which bridge to configure + * this. + */ + if (dsa_is_cpu_port(ds, port)) + return 0; + + return gswip_vlan_add(priv, bridge, port, vlan->vid, untagged, pvid, + true); +} + +static int gswip_port_vlan_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan) +{ + struct net_device *bridge = dsa_port_bridge_dev_get(dsa_to_port(ds, port)); + struct gswip_priv *priv = ds->priv; + + if (vlan->vid == GSWIP_VLAN_UNAWARE_PVID) + return 0; + + /* We have to receive all packets on the CPU port and should not + * do any VLAN filtering here. This is also called with bridge + * NULL and then we do not know for which bridge to configure + * this. + */ + if (dsa_is_cpu_port(ds, port)) + return 0; + + return gswip_vlan_remove(priv, bridge, port, vlan->vid); +} + +static void gswip_port_fast_age(struct dsa_switch *ds, int port) +{ + struct gswip_priv *priv = ds->priv; + struct gswip_pce_table_entry mac_bridge = {0,}; + int i; + int err; + + for (i = 0; i < 2048; i++) { + mac_bridge.table = GSWIP_TABLE_MAC_BRIDGE; + mac_bridge.index = i; + + err = gswip_pce_table_entry_read(priv, &mac_bridge); + if (err) { + dev_err(priv->dev, "failed to read mac bridge: %d\n", + err); + return; + } + + if (!mac_bridge.valid) + continue; + + if (mac_bridge.val[1] & GSWIP_TABLE_MAC_BRIDGE_VAL1_STATIC) + continue; + + if (port != FIELD_GET(GSWIP_TABLE_MAC_BRIDGE_VAL0_PORT, + mac_bridge.val[0])) + continue; + + mac_bridge.valid = false; + err = gswip_pce_table_entry_write(priv, &mac_bridge); + if (err) { + dev_err(priv->dev, "failed to write mac bridge: %d\n", + err); + return; + } + } +} + +static void gswip_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) +{ + struct gswip_priv *priv = ds->priv; + u32 stp_state; + + switch (state) { + case BR_STATE_DISABLED: + regmap_clear_bits(priv->gswip, GSWIP_SDMA_PCTRLp(port), + GSWIP_SDMA_PCTRL_EN); + return; + case BR_STATE_BLOCKING: + case BR_STATE_LISTENING: + stp_state = GSWIP_PCE_PCTRL_0_PSTATE_LISTEN; + break; + case BR_STATE_LEARNING: + stp_state = GSWIP_PCE_PCTRL_0_PSTATE_LEARNING; + break; + case BR_STATE_FORWARDING: + stp_state = GSWIP_PCE_PCTRL_0_PSTATE_FORWARDING; + break; + default: + dev_err(priv->dev, "invalid STP state: %d\n", state); + return; + } + + regmap_set_bits(priv->gswip, GSWIP_SDMA_PCTRLp(port), + GSWIP_SDMA_PCTRL_EN); + regmap_write_bits(priv->gswip, GSWIP_PCE_PCTRL_0p(port), + GSWIP_PCE_PCTRL_0_PSTATE_MASK, + stp_state); +} + +static int gswip_port_fdb(struct dsa_switch *ds, int port, + struct net_device *bridge, const unsigned char *addr, + u16 vid, bool add) +{ + struct gswip_priv *priv = ds->priv; + struct gswip_pce_table_entry mac_bridge = {0,}; + unsigned int max_ports = priv->hw_info->max_ports; + int fid = -1; + int i; + int err; + + for (i = max_ports; i < ARRAY_SIZE(priv->vlans); i++) { + if (priv->vlans[i].bridge == bridge) { + fid = priv->vlans[i].fid; + break; + } + } + + if (fid == -1) { + dev_err(priv->dev, "no FID found for bridge %s\n", + bridge->name); + return -EINVAL; + } + + mac_bridge.table = GSWIP_TABLE_MAC_BRIDGE; + mac_bridge.key_mode = true; + mac_bridge.key[0] = addr[5] | (addr[4] << 8); + mac_bridge.key[1] = addr[3] | (addr[2] << 8); + mac_bridge.key[2] = addr[1] | (addr[0] << 8); + mac_bridge.key[3] = FIELD_PREP(GSWIP_TABLE_MAC_BRIDGE_KEY3_FID, fid); + mac_bridge.val[0] = add ? BIT(port) : 0; /* port map */ + mac_bridge.val[1] = GSWIP_TABLE_MAC_BRIDGE_VAL1_STATIC; + mac_bridge.valid = add; + + err = gswip_pce_table_entry_write(priv, &mac_bridge); + if (err) + dev_err(priv->dev, "failed to write mac bridge: %d\n", err); + + return err; +} + +static int gswip_port_fdb_add(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid, + struct dsa_db db) +{ + if (db.type != DSA_DB_BRIDGE) + return -EOPNOTSUPP; + + return gswip_port_fdb(ds, port, db.bridge.dev, addr, vid, true); +} + +static int gswip_port_fdb_del(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid, + struct dsa_db db) +{ + if (db.type != DSA_DB_BRIDGE) + return -EOPNOTSUPP; + + return gswip_port_fdb(ds, port, db.bridge.dev, addr, vid, false); +} + +static int gswip_port_fdb_dump(struct dsa_switch *ds, int port, + dsa_fdb_dump_cb_t *cb, void *data) +{ + struct gswip_priv *priv = ds->priv; + struct gswip_pce_table_entry mac_bridge = {0,}; + unsigned char addr[ETH_ALEN]; + int i; + int err; + + for (i = 0; i < 2048; i++) { + mac_bridge.table = GSWIP_TABLE_MAC_BRIDGE; + mac_bridge.index = i; + + err = gswip_pce_table_entry_read(priv, &mac_bridge); + if (err) { + dev_err(priv->dev, + "failed to read mac bridge entry %d: %d\n", + i, err); + return err; + } + + if (!mac_bridge.valid) + continue; + + addr[5] = mac_bridge.key[0] & 0xff; + addr[4] = (mac_bridge.key[0] >> 8) & 0xff; + addr[3] = mac_bridge.key[1] & 0xff; + addr[2] = (mac_bridge.key[1] >> 8) & 0xff; + addr[1] = mac_bridge.key[2] & 0xff; + addr[0] = (mac_bridge.key[2] >> 8) & 0xff; + if (mac_bridge.val[1] & GSWIP_TABLE_MAC_BRIDGE_VAL1_STATIC) { + if (mac_bridge.val[0] & BIT(port)) { + err = cb(addr, 0, true, data); + if (err) + return err; + } + } else { + if (port == FIELD_GET(GSWIP_TABLE_MAC_BRIDGE_VAL0_PORT, + mac_bridge.val[0])) { + err = cb(addr, 0, false, data); + if (err) + return err; + } + } + } + return 0; +} + +static int gswip_port_max_mtu(struct dsa_switch *ds, int port) +{ + /* Includes 8 bytes for special header. */ + return GSWIP_MAX_PACKET_LENGTH - VLAN_ETH_HLEN - ETH_FCS_LEN; +} + +static int gswip_port_change_mtu(struct dsa_switch *ds, int port, int new_mtu) +{ + struct gswip_priv *priv = ds->priv; + + /* CPU port always has maximum mtu of user ports, so use it to set + * switch frame size, including 8 byte special header. + */ + if (dsa_is_cpu_port(ds, port)) { + new_mtu += 8; + regmap_write(priv->gswip, GSWIP_MAC_FLEN, + VLAN_ETH_HLEN + new_mtu + ETH_FCS_LEN); + } + + /* Enable MLEN for ports with non-standard MTUs, including the special + * header on the CPU port added above. + */ + if (new_mtu != ETH_DATA_LEN) + regmap_set_bits(priv->gswip, GSWIP_MAC_CTRL_2p(port), + GSWIP_MAC_CTRL_2_MLEN); + else + regmap_clear_bits(priv->gswip, GSWIP_MAC_CTRL_2p(port), + GSWIP_MAC_CTRL_2_MLEN); + + return 0; +} + +static void gswip_phylink_get_caps(struct dsa_switch *ds, int port, + struct phylink_config *config) +{ + struct gswip_priv *priv = ds->priv; + + priv->hw_info->phylink_get_caps(ds, port, config); +} + +static void gswip_port_set_link(struct gswip_priv *priv, int port, bool link) +{ + u32 mdio_phy; + + if (link) + mdio_phy = GSWIP_MDIO_PHY_LINK_UP; + else + mdio_phy = GSWIP_MDIO_PHY_LINK_DOWN; + + regmap_write_bits(priv->mdio, GSWIP_MDIO_PHYp(port), + GSWIP_MDIO_PHY_LINK_MASK, mdio_phy); +} + +static void gswip_port_set_speed(struct gswip_priv *priv, int port, int speed, + phy_interface_t interface) +{ + u32 mdio_phy = 0, mii_cfg = 0, mac_ctrl_0 = 0; + + switch (speed) { + case SPEED_10: + mdio_phy = GSWIP_MDIO_PHY_SPEED_M10; + + if (interface == PHY_INTERFACE_MODE_RMII) + mii_cfg = GSWIP_MII_CFG_RATE_M50; + else + mii_cfg = GSWIP_MII_CFG_RATE_M2P5; + + mac_ctrl_0 = GSWIP_MAC_CTRL_0_GMII_MII; + break; + + case SPEED_100: + mdio_phy = GSWIP_MDIO_PHY_SPEED_M100; + + if (interface == PHY_INTERFACE_MODE_RMII) + mii_cfg = GSWIP_MII_CFG_RATE_M50; + else + mii_cfg = GSWIP_MII_CFG_RATE_M25; + + mac_ctrl_0 = GSWIP_MAC_CTRL_0_GMII_MII; + break; + + case SPEED_1000: + mdio_phy = GSWIP_MDIO_PHY_SPEED_G1; + + mii_cfg = GSWIP_MII_CFG_RATE_M125; + + mac_ctrl_0 = GSWIP_MAC_CTRL_0_GMII_RGMII; + break; + } + + regmap_write_bits(priv->mdio, GSWIP_MDIO_PHYp(port), + GSWIP_MDIO_PHY_SPEED_MASK, mdio_phy); + gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_RATE_MASK, mii_cfg, port); + regmap_write_bits(priv->gswip, GSWIP_MAC_CTRL_0p(port), + GSWIP_MAC_CTRL_0_GMII_MASK, mac_ctrl_0); +} + +static void gswip_port_set_duplex(struct gswip_priv *priv, int port, int duplex) +{ + u32 mac_ctrl_0, mdio_phy; + + if (duplex == DUPLEX_FULL) { + mac_ctrl_0 = GSWIP_MAC_CTRL_0_FDUP_EN; + mdio_phy = GSWIP_MDIO_PHY_FDUP_EN; + } else { + mac_ctrl_0 = GSWIP_MAC_CTRL_0_FDUP_DIS; + mdio_phy = GSWIP_MDIO_PHY_FDUP_DIS; + } + + regmap_write_bits(priv->gswip, GSWIP_MAC_CTRL_0p(port), + GSWIP_MAC_CTRL_0_FDUP_MASK, mac_ctrl_0); + regmap_write_bits(priv->mdio, GSWIP_MDIO_PHYp(port), + GSWIP_MDIO_PHY_FDUP_MASK, mdio_phy); +} + +static void gswip_port_set_pause(struct gswip_priv *priv, int port, + bool tx_pause, bool rx_pause) +{ + u32 mac_ctrl_0, mdio_phy; + + if (tx_pause && rx_pause) { + mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_RXTX; + mdio_phy = GSWIP_MDIO_PHY_FCONTX_EN | + GSWIP_MDIO_PHY_FCONRX_EN; + } else if (tx_pause) { + mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_TX; + mdio_phy = GSWIP_MDIO_PHY_FCONTX_EN | + GSWIP_MDIO_PHY_FCONRX_DIS; + } else if (rx_pause) { + mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_RX; + mdio_phy = GSWIP_MDIO_PHY_FCONTX_DIS | + GSWIP_MDIO_PHY_FCONRX_EN; + } else { + mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_NONE; + mdio_phy = GSWIP_MDIO_PHY_FCONTX_DIS | + GSWIP_MDIO_PHY_FCONRX_DIS; + } + + regmap_write_bits(priv->gswip, GSWIP_MAC_CTRL_0p(port), + GSWIP_MAC_CTRL_0_FCON_MASK, mac_ctrl_0); + regmap_write_bits(priv->mdio, GSWIP_MDIO_PHYp(port), + GSWIP_MDIO_PHY_FCONTX_MASK | GSWIP_MDIO_PHY_FCONRX_MASK, + mdio_phy); +} + +static void gswip_phylink_mac_config(struct phylink_config *config, + unsigned int mode, + const struct phylink_link_state *state) +{ + struct dsa_port *dp = dsa_phylink_to_port(config); + struct gswip_priv *priv = dp->ds->priv; + int port = dp->index; + u32 miicfg = 0; + + miicfg |= GSWIP_MII_CFG_LDCLKDIS; + + switch (state->interface) { + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_1000BASEX: + case PHY_INTERFACE_MODE_2500BASEX: + return; + case PHY_INTERFACE_MODE_MII: + case PHY_INTERFACE_MODE_INTERNAL: + miicfg |= GSWIP_MII_CFG_MODE_MIIM; + break; + case PHY_INTERFACE_MODE_REVMII: + miicfg |= GSWIP_MII_CFG_MODE_MIIP; + break; + case PHY_INTERFACE_MODE_RMII: + miicfg |= GSWIP_MII_CFG_MODE_RMIIM; + break; + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: + miicfg |= GSWIP_MII_CFG_MODE_RGMII; + break; + case PHY_INTERFACE_MODE_GMII: + miicfg |= GSWIP_MII_CFG_MODE_GMII; + break; + default: + dev_err(dp->ds->dev, + "Unsupported interface: %d\n", state->interface); + return; + } + + gswip_mii_mask_cfg(priv, + GSWIP_MII_CFG_MODE_MASK | GSWIP_MII_CFG_RMII_CLK | + GSWIP_MII_CFG_RGMII_IBS | GSWIP_MII_CFG_LDCLKDIS, + miicfg, port); + + switch (state->interface) { + case PHY_INTERFACE_MODE_RGMII_ID: + gswip_mii_mask_pcdu(priv, GSWIP_MII_PCDU_TXDLY_MASK | + GSWIP_MII_PCDU_RXDLY_MASK, 0, port); + break; + case PHY_INTERFACE_MODE_RGMII_RXID: + gswip_mii_mask_pcdu(priv, GSWIP_MII_PCDU_RXDLY_MASK, 0, port); + break; + case PHY_INTERFACE_MODE_RGMII_TXID: + gswip_mii_mask_pcdu(priv, GSWIP_MII_PCDU_TXDLY_MASK, 0, port); + break; + default: + break; + } +} + +static void gswip_phylink_mac_link_down(struct phylink_config *config, + unsigned int mode, + phy_interface_t interface) +{ + struct dsa_port *dp = dsa_phylink_to_port(config); + struct gswip_priv *priv = dp->ds->priv; + int port = dp->index; + + gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, port); + + if (!dsa_port_is_cpu(dp)) + gswip_port_set_link(priv, port, false); +} + +static void gswip_phylink_mac_link_up(struct phylink_config *config, + struct phy_device *phydev, + unsigned int mode, + phy_interface_t interface, + int speed, int duplex, + bool tx_pause, bool rx_pause) +{ + struct dsa_port *dp = dsa_phylink_to_port(config); + struct gswip_priv *priv = dp->ds->priv; + int port = dp->index; + + if (!dsa_port_is_cpu(dp)) { + gswip_port_set_link(priv, port, true); + gswip_port_set_speed(priv, port, speed, interface); + gswip_port_set_duplex(priv, port, duplex); + gswip_port_set_pause(priv, port, tx_pause, rx_pause); + } + + gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, GSWIP_MII_CFG_EN, port); +} + +static void gswip_get_strings(struct dsa_switch *ds, int port, u32 stringset, + uint8_t *data) +{ + int i; + + if (stringset != ETH_SS_STATS) + return; + + for (i = 0; i < ARRAY_SIZE(gswip_rmon_cnt); i++) + ethtool_puts(&data, gswip_rmon_cnt[i].name); +} + +static u32 gswip_bcm_ram_entry_read(struct gswip_priv *priv, u32 table, + u32 index) +{ + u32 result, val; + int err; + + regmap_write(priv->gswip, GSWIP_BM_RAM_ADDR, index); + regmap_write_bits(priv->gswip, GSWIP_BM_RAM_CTRL, + GSWIP_BM_RAM_CTRL_ADDR_MASK | GSWIP_BM_RAM_CTRL_OPMOD | + GSWIP_BM_RAM_CTRL_BAS, + table | GSWIP_BM_RAM_CTRL_BAS); + + err = gswip_switch_r_timeout(priv, GSWIP_BM_RAM_CTRL, + GSWIP_BM_RAM_CTRL_BAS); + if (err) { + dev_err(priv->dev, "timeout while reading table: %u, index: %u\n", + table, index); + return 0; + } + + regmap_read(priv->gswip, GSWIP_BM_RAM_VAL(0), &result); + regmap_read(priv->gswip, GSWIP_BM_RAM_VAL(1), &val); + result |= val << 16; + + return result; +} + +static void gswip_get_ethtool_stats(struct dsa_switch *ds, int port, + uint64_t *data) +{ + struct gswip_priv *priv = ds->priv; + const struct gswip_rmon_cnt_desc *rmon_cnt; + int i; + u64 high; + + for (i = 0; i < ARRAY_SIZE(gswip_rmon_cnt); i++) { + rmon_cnt = &gswip_rmon_cnt[i]; + + data[i] = gswip_bcm_ram_entry_read(priv, port, + rmon_cnt->offset); + if (rmon_cnt->size == 2) { + high = gswip_bcm_ram_entry_read(priv, port, + rmon_cnt->offset + 1); + data[i] |= high << 32; + } + } +} + +static int gswip_get_sset_count(struct dsa_switch *ds, int port, int sset) +{ + if (sset != ETH_SS_STATS) + return 0; + + return ARRAY_SIZE(gswip_rmon_cnt); +} + +static struct phylink_pcs *gswip_phylink_mac_select_pcs(struct phylink_config *config, + phy_interface_t interface) +{ + struct dsa_port *dp = dsa_phylink_to_port(config); + struct gswip_priv *priv = dp->ds->priv; + + if (priv->hw_info->mac_select_pcs) + return priv->hw_info->mac_select_pcs(config, interface); + + return NULL; +} + +static const struct phylink_mac_ops gswip_phylink_mac_ops = { + .mac_config = gswip_phylink_mac_config, + .mac_link_down = gswip_phylink_mac_link_down, + .mac_link_up = gswip_phylink_mac_link_up, + .mac_select_pcs = gswip_phylink_mac_select_pcs, +}; + +static const struct dsa_switch_ops gswip_switch_ops = { + .get_tag_protocol = gswip_get_tag_protocol, + .setup = gswip_setup, + .port_setup = gswip_port_setup, + .port_enable = gswip_port_enable, + .port_disable = gswip_port_disable, + .port_bridge_join = gswip_port_bridge_join, + .port_bridge_leave = gswip_port_bridge_leave, + .port_fast_age = gswip_port_fast_age, + .port_vlan_filtering = gswip_port_vlan_filtering, + .port_vlan_add = gswip_port_vlan_add, + .port_vlan_del = gswip_port_vlan_del, + .port_stp_state_set = gswip_port_stp_state_set, + .port_fdb_add = gswip_port_fdb_add, + .port_fdb_del = gswip_port_fdb_del, + .port_fdb_dump = gswip_port_fdb_dump, + .port_change_mtu = gswip_port_change_mtu, + .port_max_mtu = gswip_port_max_mtu, + .phylink_get_caps = gswip_phylink_get_caps, + .get_strings = gswip_get_strings, + .get_ethtool_stats = gswip_get_ethtool_stats, + .get_sset_count = gswip_get_sset_count, +}; + +void gswip_disable_switch(struct gswip_priv *priv) +{ + regmap_clear_bits(priv->mdio, GSWIP_MDIO_GLOB, GSWIP_MDIO_GLOB_ENABLE); +} +EXPORT_SYMBOL_GPL(gswip_disable_switch); + +static int gswip_validate_cpu_port(struct dsa_switch *ds) +{ + struct gswip_priv *priv = ds->priv; + struct dsa_port *cpu_dp; + int cpu_port = -1; + + dsa_switch_for_each_cpu_port(cpu_dp, ds) { + if (cpu_port != -1) + return dev_err_probe(ds->dev, -EINVAL, + "only a single CPU port is supported\n"); + + cpu_port = cpu_dp->index; + } + + if (cpu_port == -1) + return dev_err_probe(ds->dev, -EINVAL, "no CPU port defined\n"); + + if (BIT(cpu_port) & ~priv->hw_info->allowed_cpu_ports) + return dev_err_probe(ds->dev, -EINVAL, + "unsupported CPU port defined\n"); + + return 0; +} + +int gswip_probe_common(struct gswip_priv *priv, u32 version) +{ + int err; + + mutex_init(&priv->pce_table_lock); + + priv->ds = devm_kzalloc(priv->dev, sizeof(*priv->ds), GFP_KERNEL); + if (!priv->ds) + return -ENOMEM; + + priv->ds->dev = priv->dev; + priv->ds->num_ports = priv->hw_info->max_ports; + priv->ds->ops = &gswip_switch_ops; + priv->ds->phylink_mac_ops = &gswip_phylink_mac_ops; + priv->ds->priv = priv; + + /* The hardware has the 'major/minor' version bytes in the wrong order + * preventing numerical comparisons. Construct a 16-bit unsigned integer + * having the REV field as most significant byte and the MOD field as + * least significant byte. This is effectively swapping the two bytes of + * the version variable, but other than using swab16 it doesn't affect + * the source variable. + */ + priv->version = GSWIP_VERSION_REV(version) << 8 | + GSWIP_VERSION_MOD(version); + + err = dsa_register_switch(priv->ds); + if (err) + return dev_err_probe(priv->dev, err, "dsa switch registration failed\n"); + + err = gswip_validate_cpu_port(priv->ds); + if (err) + goto disable_switch; + + dev_info(priv->dev, "probed GSWIP version %lx mod %lx\n", + GSWIP_VERSION_REV(version), GSWIP_VERSION_MOD(version)); + + return 0; + +disable_switch: + gswip_disable_switch(priv); + dsa_unregister_switch(priv->ds); + + return err; +} +EXPORT_SYMBOL_GPL(gswip_probe_common); + +MODULE_AUTHOR("Hauke Mehrtens "); +MODULE_AUTHOR("Daniel Golle "); +MODULE_DESCRIPTION("Lantiq / Intel / MaxLinear GSWIP common functions"); +MODULE_LICENSE("GPL"); From a7d4b05f9d748fef80de89d2dd650d5d2ae2f590 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Mon, 3 Nov 2025 12:18:49 +0000 Subject: [PATCH 615/867] net: dsa: lantiq_gswip: support enable/disable learning Switch API 2.2 or later supports enabling or disabling learning on each port. Implement support for BR_LEARNING bridge flag and announce support for BR_LEARNING on GSWIP 2.2 or later. Signed-off-by: Daniel Golle Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/0aa4621e01c998378ad5812464bc17d23aa3bf62.1762170107.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/lantiq_gswip.h | 3 ++ drivers/net/dsa/lantiq/lantiq_gswip_common.c | 43 ++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.h b/drivers/net/dsa/lantiq/lantiq_gswip.h index d86290db19b41..fb7d2c02bde90 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.h +++ b/drivers/net/dsa/lantiq/lantiq_gswip.h @@ -157,6 +157,9 @@ #define GSWIP_PCE_PCTRL_0_PSTATE_LEARNING 0x3 #define GSWIP_PCE_PCTRL_0_PSTATE_FORWARDING 0x7 #define GSWIP_PCE_PCTRL_0_PSTATE_MASK GENMASK(2, 0) +/* Ethernet Switch PCE Port Control Register 3 */ +#define GSWIP_PCE_PCTRL_3p(p) (0x483 + ((p) * 0xA)) +#define GSWIP_PCE_PCTRL_3_LNDIS BIT(15) /* Learning Disable */ #define GSWIP_PCE_VCTRL(p) (0x485 + ((p) * 0xA)) #define GSWIP_PCE_VCTRL_UVR BIT(0) /* Unknown VLAN Rule */ #define GSWIP_PCE_VCTRL_VINR GENMASK(2, 1) /* VLAN Ingress Tag Rule */ diff --git a/drivers/net/dsa/lantiq/lantiq_gswip_common.c b/drivers/net/dsa/lantiq/lantiq_gswip_common.c index a0e361622acb0..f130bf6642a7b 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip_common.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip_common.c @@ -403,6 +403,47 @@ static int gswip_add_single_port_br(struct gswip_priv *priv, int port, bool add) return 0; } +static int gswip_port_set_learning(struct gswip_priv *priv, int port, + bool enable) +{ + if (!GSWIP_VERSION_GE(priv, GSWIP_VERSION_2_2)) + return -EOPNOTSUPP; + + /* learning disable bit */ + return regmap_update_bits(priv->gswip, GSWIP_PCE_PCTRL_3p(port), + GSWIP_PCE_PCTRL_3_LNDIS, + enable ? 0 : GSWIP_PCE_PCTRL_3_LNDIS); +} + +static int gswip_port_pre_bridge_flags(struct dsa_switch *ds, int port, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack) +{ + struct gswip_priv *priv = ds->priv; + unsigned long supported = 0; + + if (GSWIP_VERSION_GE(priv, GSWIP_VERSION_2_2)) + supported |= BR_LEARNING; + + if (flags.mask & ~supported) + return -EINVAL; + + return 0; +} + +static int gswip_port_bridge_flags(struct dsa_switch *ds, int port, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack) +{ + struct gswip_priv *priv = ds->priv; + + if (flags.mask & BR_LEARNING) + return gswip_port_set_learning(priv, port, + !!(flags.val & BR_LEARNING)); + + return 0; +} + static int gswip_port_setup(struct dsa_switch *ds, int port) { struct gswip_priv *priv = ds->priv; @@ -1521,6 +1562,8 @@ static const struct dsa_switch_ops gswip_switch_ops = { .port_setup = gswip_port_setup, .port_enable = gswip_port_enable, .port_disable = gswip_port_disable, + .port_pre_bridge_flags = gswip_port_pre_bridge_flags, + .port_bridge_flags = gswip_port_bridge_flags, .port_bridge_join = gswip_port_bridge_join, .port_bridge_leave = gswip_port_bridge_leave, .port_fast_age = gswip_port_fast_age, From 9ec1fc0bf2b0ee89b3195d1c1db2cdb471a8c85d Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Mon, 3 Nov 2025 12:19:00 +0000 Subject: [PATCH 616/867] net: dsa: lantiq_gswip: support Energy Efficient Ethernet Introduce support for Energy Efficient Ethernet (EEE) on hardware version 2.2 or later. Signed-off-by: Daniel Golle Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/753e45acb25e185689ca1afd8a9bd0c199d1c15b.1762170107.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/lantiq_gswip.h | 7 +++ drivers/net/dsa/lantiq/lantiq_gswip_common.c | 47 ++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.h b/drivers/net/dsa/lantiq/lantiq_gswip.h index fb7d2c02bde90..56de869fc4720 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.h +++ b/drivers/net/dsa/lantiq/lantiq_gswip.h @@ -2,6 +2,7 @@ #ifndef __LANTIQ_GSWIP_H #define __LANTIQ_GSWIP_H +#include #include #include #include @@ -193,6 +194,12 @@ #define GSWIP_MAC_CTRL_2p(p) (0x905 + ((p) * 0xC)) #define GSWIP_MAC_CTRL_2_LCHKL BIT(2) /* Frame Length Check Long Enable */ #define GSWIP_MAC_CTRL_2_MLEN BIT(3) /* Maximum Untagged Frame Lnegth */ +#define GSWIP_MAC_CTRL_4p(p) (0x907 + ((p) * 0xC)) +#define GSWIP_MAC_CTRL_4_LPIEN BIT(7) /* LPI Mode Enable */ +#define GSWIP_MAC_CTRL_4_GWAIT_MASK GENMASK(14, 8) /* LPI Wait Time 1G */ +#define GSWIP_MAC_CTRL_4_GWAIT(t) u16_encode_bits((t), GSWIP_MAC_CTRL_4_GWAIT_MASK) +#define GSWIP_MAC_CTRL_4_WAIT_MASK GENMASK(6, 0) /* LPI Wait Time 100M */ +#define GSWIP_MAC_CTRL_4_WAIT(t) u16_encode_bits((t), GSWIP_MAC_CTRL_4_WAIT_MASK) /* Ethernet Switch Fetch DMA Port Control Register */ #define GSWIP_FDMA_PCTRLp(p) (0xA80 + ((p) * 0x6)) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip_common.c b/drivers/net/dsa/lantiq/lantiq_gswip_common.c index f130bf6642a7b..092187603dea0 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip_common.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip_common.c @@ -1537,6 +1537,49 @@ static int gswip_get_sset_count(struct dsa_switch *ds, int port, int sset) return ARRAY_SIZE(gswip_rmon_cnt); } +static int gswip_set_mac_eee(struct dsa_switch *ds, int port, + struct ethtool_keee *e) +{ + if (e->tx_lpi_timer > 0x7f) + return -EINVAL; + + return 0; +} + +static void gswip_phylink_mac_disable_tx_lpi(struct phylink_config *config) +{ + struct dsa_port *dp = dsa_phylink_to_port(config); + struct gswip_priv *priv = dp->ds->priv; + + regmap_clear_bits(priv->gswip, GSWIP_MAC_CTRL_4p(dp->index), + GSWIP_MAC_CTRL_4_LPIEN); +} + +static int gswip_phylink_mac_enable_tx_lpi(struct phylink_config *config, + u32 timer, bool tx_clock_stop) +{ + struct dsa_port *dp = dsa_phylink_to_port(config); + struct gswip_priv *priv = dp->ds->priv; + + return regmap_update_bits(priv->gswip, GSWIP_MAC_CTRL_4p(dp->index), + GSWIP_MAC_CTRL_4_LPIEN | + GSWIP_MAC_CTRL_4_GWAIT_MASK | + GSWIP_MAC_CTRL_4_WAIT_MASK, + GSWIP_MAC_CTRL_4_LPIEN | + GSWIP_MAC_CTRL_4_GWAIT(timer) | + GSWIP_MAC_CTRL_4_WAIT(timer)); +} + +static bool gswip_support_eee(struct dsa_switch *ds, int port) +{ + struct gswip_priv *priv = ds->priv; + + if (GSWIP_VERSION_GE(priv, GSWIP_VERSION_2_2)) + return true; + + return false; +} + static struct phylink_pcs *gswip_phylink_mac_select_pcs(struct phylink_config *config, phy_interface_t interface) { @@ -1553,6 +1596,8 @@ static const struct phylink_mac_ops gswip_phylink_mac_ops = { .mac_config = gswip_phylink_mac_config, .mac_link_down = gswip_phylink_mac_link_down, .mac_link_up = gswip_phylink_mac_link_up, + .mac_disable_tx_lpi = gswip_phylink_mac_disable_tx_lpi, + .mac_enable_tx_lpi = gswip_phylink_mac_enable_tx_lpi, .mac_select_pcs = gswip_phylink_mac_select_pcs, }; @@ -1580,6 +1625,8 @@ static const struct dsa_switch_ops gswip_switch_ops = { .get_strings = gswip_get_strings, .get_ethtool_stats = gswip_get_ethtool_stats, .get_sset_count = gswip_get_sset_count, + .set_mac_eee = gswip_set_mac_eee, + .support_eee = gswip_support_eee, }; void gswip_disable_switch(struct gswip_priv *priv) From 3e5ef3b1709afecd86886bc8eb2425e39b09196d Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Mon, 3 Nov 2025 12:19:10 +0000 Subject: [PATCH 617/867] net: dsa: lantiq_gswip: set link parameters also for CPU port On standalone switch ICs the link parameters of the CPU port need to be setup just like user ports. The destinction in the driver to not carry out link parameter setup for the CPU port does make sense for in-SoC switches on which the CPU port is internally connected to the SoC's Ethernet MAC. Set link parameters also for the CPU port unless it is an internal interface. Note that the internal TP PHYs anyway cannot be used as CPU ports, hence it doesn't matter that they are now also covered by that condition. Signed-off-by: Daniel Golle Reviewed-by: Alexander Sverdlin Tested-by: Alexander Sverdlin Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/07c6b8d3a12296123be5e5938b454fc620f819e6.1762170107.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/lantiq_gswip_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip_common.c b/drivers/net/dsa/lantiq/lantiq_gswip_common.c index 092187603dea0..0ac87eb23bb57 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip_common.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip_common.c @@ -1459,7 +1459,7 @@ static void gswip_phylink_mac_link_up(struct phylink_config *config, struct gswip_priv *priv = dp->ds->priv; int port = dp->index; - if (!dsa_port_is_cpu(dp)) { + if (!dsa_port_is_cpu(dp) || interface != PHY_INTERFACE_MODE_INTERNAL) { gswip_port_set_link(priv, port, true); gswip_port_set_speed(priv, port, speed, interface); gswip_port_set_duplex(priv, port, duplex); From 0c56a98560c16d118d35e58a25541a3ac2717b78 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Mon, 3 Nov 2025 12:19:25 +0000 Subject: [PATCH 618/867] net: dsa: lantiq_gswip: define and use GSWIP_TABLE_MAC_BRIDGE_VAL1_VALID When adding FDB entries to the MAC bridge table on GSWIP 2.2 or later it is needed to set an (undocumented) bit to mark the entry as valid. If this bit isn't set for entries in the MAC bridge table, then those entries won't be considered as valid MAC addresses. Signed-off-by: Daniel Golle Link: https://patch.msgid.link/e02fe0d946c98920bc55b5f389a8f56382aae7df.1762170107.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/lantiq_gswip.h | 1 + drivers/net/dsa/lantiq/lantiq_gswip_common.c | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.h b/drivers/net/dsa/lantiq/lantiq_gswip.h index 56de869fc4720..42000954d8427 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.h +++ b/drivers/net/dsa/lantiq/lantiq_gswip.h @@ -224,6 +224,7 @@ #define GSWIP_TABLE_MAC_BRIDGE_KEY3_FID GENMASK(5, 0) /* Filtering identifier */ #define GSWIP_TABLE_MAC_BRIDGE_VAL0_PORT GENMASK(7, 4) /* Port on learned entries */ #define GSWIP_TABLE_MAC_BRIDGE_VAL1_STATIC BIT(0) /* Static, non-aging entry */ +#define GSWIP_TABLE_MAC_BRIDGE_VAL1_VALID BIT(1) /* Valid bit */ #define XRX200_GPHY_FW_ALIGN (16 * 1024) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip_common.c b/drivers/net/dsa/lantiq/lantiq_gswip_common.c index 0ac87eb23bb57..fdfc265b4c737 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip_common.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip_common.c @@ -1149,7 +1149,12 @@ static int gswip_port_fdb(struct dsa_switch *ds, int port, mac_bridge.key[2] = addr[1] | (addr[0] << 8); mac_bridge.key[3] = FIELD_PREP(GSWIP_TABLE_MAC_BRIDGE_KEY3_FID, fid); mac_bridge.val[0] = add ? BIT(port) : 0; /* port map */ - mac_bridge.val[1] = GSWIP_TABLE_MAC_BRIDGE_VAL1_STATIC; + if (GSWIP_VERSION_GE(priv, GSWIP_VERSION_2_2_ETC)) + mac_bridge.val[1] = add ? (GSWIP_TABLE_MAC_BRIDGE_VAL1_STATIC | + GSWIP_TABLE_MAC_BRIDGE_VAL1_VALID) : 0; + else + mac_bridge.val[1] = GSWIP_TABLE_MAC_BRIDGE_VAL1_STATIC; + mac_bridge.valid = add; err = gswip_pce_table_entry_write(priv, &mac_bridge); From e836824116b5644eb681777cd58cba915f4cbe75 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Mon, 3 Nov 2025 12:19:34 +0000 Subject: [PATCH 619/867] dt-bindings: net: dsa: lantiq,gswip: add MaxLinear RMII refclk output property Add support for the maxlinear,rmii-refclk-out boolean property on port nodes to configure the RMII reference clock to be an output rather than an input. This property is only applicable for ports in RMII mode and allows the switch to provide the reference clock for RMII-connected PHYs instead of requiring an external clock source. This corresponds to the driver changes that read this Device Tree property to configure the RMII clock direction. Signed-off-by: Daniel Golle Reviewed-by: Alexander Sverdlin Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/9813bb916ecce9bae366e6c50c081014fe5371ea.1762170107.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- .../bindings/net/dsa/lantiq,gswip.yaml | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/dsa/lantiq,gswip.yaml b/Documentation/devicetree/bindings/net/dsa/lantiq,gswip.yaml index f3154b19af78a..809d0e9d0a15b 100644 --- a/Documentation/devicetree/bindings/net/dsa/lantiq,gswip.yaml +++ b/Documentation/devicetree/bindings/net/dsa/lantiq,gswip.yaml @@ -6,8 +6,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Lantiq GSWIP Ethernet switches -allOf: - - $ref: dsa.yaml#/$defs/ethernet-ports +$ref: dsa.yaml# maintainers: - Hauke Mehrtens @@ -91,6 +90,21 @@ properties: additionalProperties: false +patternProperties: + "^(ethernet-)?ports$": + type: object + patternProperties: + "^(ethernet-)?port@[0-6]$": + $ref: dsa-port.yaml# + unevaluatedProperties: false + + properties: + maxlinear,rmii-refclk-out: + type: boolean + description: + Configure the RMII reference clock to be a clock output + rather than an input. Only applicable for RMII mode. + required: - compatible - reg From 319fd7e9d446bb90469a82f876e78785f6da0bc5 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Mon, 3 Nov 2025 12:19:47 +0000 Subject: [PATCH 620/867] net: dsa: lantiq_gswip: add vendor property to setup MII refclk output Read boolean Device Tree property "maxlinear,rmii-refclk-out" and switch the RMII reference clock to be a clock output rather than an input if it is set. Signed-off-by: Daniel Golle Reviewed-by: Alexander Sverdlin Tested-by: Alexander Sverdlin Link: https://patch.msgid.link/947d14970f74f760e4a60c777aabee64e7e4f356.1762170107.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/lantiq_gswip_common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip_common.c b/drivers/net/dsa/lantiq/lantiq_gswip_common.c index fdfc265b4c737..7b3debd45b916 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip_common.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip_common.c @@ -1402,6 +1402,8 @@ static void gswip_phylink_mac_config(struct phylink_config *config, break; case PHY_INTERFACE_MODE_RMII: miicfg |= GSWIP_MII_CFG_MODE_RMIIM; + if (of_property_read_bool(dp->dn, "maxlinear,rmii-refclk-out")) + miicfg |= GSWIP_MII_CFG_RMII_CLK; break; case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_ID: From bea0c17786116141aaf3980dc73758e3cc0d2748 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Mon, 3 Nov 2025 12:19:58 +0000 Subject: [PATCH 621/867] dt-bindings: net: dsa: lantiq,gswip: add support for MII delay properties Add support for standard tx-internal-delay-ps and rx-internal-delay-ps properties on port nodes to allow fine-tuning of RGMII clock delays. The GSWIP switch hardware supports delay values in 500 picosecond increments from 0 to 3500 picoseconds, with a post-reset default of 2000 picoseconds for both TX and RX delays. The driver currently sets the delay to 0 in case the PHY is setup to carry out the delay by the corresponding interface modes ("rgmii-id", "rgmii-rxid", "rgmii-txid"). This corresponds to the driver changes that allow adjusting MII delays using Device Tree properties instead of relying solely on the PHY interface mode. Signed-off-by: Daniel Golle Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/9e007d4f85c2c6d69e0b91f3663d99e0f6fc8eac.1762170107.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- .../bindings/net/dsa/lantiq,gswip.yaml | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/dsa/lantiq,gswip.yaml b/Documentation/devicetree/bindings/net/dsa/lantiq,gswip.yaml index 809d0e9d0a15b..929f6f8e4534b 100644 --- a/Documentation/devicetree/bindings/net/dsa/lantiq,gswip.yaml +++ b/Documentation/devicetree/bindings/net/dsa/lantiq,gswip.yaml @@ -104,6 +104,20 @@ patternProperties: description: Configure the RMII reference clock to be a clock output rather than an input. Only applicable for RMII mode. + tx-internal-delay-ps: + enum: [0, 500, 1000, 1500, 2000, 2500, 3000, 3500] + description: + RGMII TX Clock Delay defined in pico seconds. + The delay lines adjust the MII clock vs. data timing. + If this property is not present the delay is determined by + the interface mode. + rx-internal-delay-ps: + enum: [0, 500, 1000, 1500, 2000, 2500, 3000, 3500] + description: + RGMII RX Clock Delay defined in pico seconds. + The delay lines adjust the MII clock vs. data timing. + If this property is not present the delay is determined by + the interface mode. required: - compatible @@ -127,8 +141,10 @@ examples: port@0 { reg = <0>; label = "lan3"; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&phy0>; + tx-internal-delay-ps = <2000>; + rx-internal-delay-ps = <2000>; }; port@1 { From cdef8e47b638bcc35b0e05f48269a2667ec665da Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Mon, 3 Nov 2025 12:20:05 +0000 Subject: [PATCH 622/867] net: dsa: lantiq_gswip: allow adjusting MII delays Currently the MII clk vs. data delay is configured based on the PHY interface mode. In addition to that add support for setting up MII delays using the standard Device Tree properties 'tx-internal-delay-ps' and 'rx-internal-delay-ps', using the values determined by the PHY interface mode as default to maintain backward compatibility with legacy device trees. Signed-off-by: Daniel Golle Link: https://patch.msgid.link/37203e831cff87dc46e5ef9e8cbd68fb8689773d.1762170107.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/lantiq_gswip.h | 4 + drivers/net/dsa/lantiq/lantiq_gswip_common.c | 94 ++++++++++++-------- 2 files changed, 60 insertions(+), 38 deletions(-) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.h b/drivers/net/dsa/lantiq/lantiq_gswip.h index 42000954d8427..0c32ec85e1272 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.h +++ b/drivers/net/dsa/lantiq/lantiq_gswip.h @@ -82,6 +82,10 @@ #define GSWIP_MII_PCDU5 0x05 #define GSWIP_MII_PCDU_TXDLY_MASK GENMASK(2, 0) #define GSWIP_MII_PCDU_RXDLY_MASK GENMASK(9, 7) +#define GSWIP_MII_PCDU_TXDLY(x) u16_encode_bits(((x) / 500), GSWIP_MII_PCDU_TXDLY_MASK) +#define GSWIP_MII_PCDU_RXDLY(x) u16_encode_bits(((x) / 500), GSWIP_MII_PCDU_RXDLY_MASK) +#define GSWIP_MII_PCDU_RXDLY_DEFAULT 2000 /* picoseconds */ +#define GSWIP_MII_PCDU_TXDLY_DEFAULT 2000 /* picoseconds */ /* GSWIP Core Registers */ #define GSWIP_SWRES 0x000 diff --git a/drivers/net/dsa/lantiq/lantiq_gswip_common.c b/drivers/net/dsa/lantiq/lantiq_gswip_common.c index 7b3debd45b916..122ccea4057bb 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip_common.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip_common.c @@ -130,30 +130,6 @@ static void gswip_mii_mask_cfg(struct gswip_priv *priv, u32 mask, u32 set, set); } -static void gswip_mii_mask_pcdu(struct gswip_priv *priv, u32 mask, u32 set, - int port) -{ - int reg_port; - - /* MII_PCDU register only exists for MII ports */ - if (!(priv->hw_info->mii_ports & BIT(port))) - return; - - reg_port = port + priv->hw_info->mii_port_reg_offset; - - switch (reg_port) { - case 0: - regmap_write_bits(priv->mii, GSWIP_MII_PCDU0, mask, set); - break; - case 1: - regmap_write_bits(priv->mii, GSWIP_MII_PCDU1, mask, set); - break; - case 5: - regmap_write_bits(priv->mii, GSWIP_MII_PCDU5, mask, set); - break; - } -} - static int gswip_mdio_poll(struct gswip_priv *priv) { u32 ctrl; @@ -622,6 +598,61 @@ static int gswip_port_vlan_filtering(struct dsa_switch *ds, int port, return 0; } +static void gswip_mii_delay_setup(struct gswip_priv *priv, struct dsa_port *dp, + phy_interface_t interface) +{ + u32 tx_delay = GSWIP_MII_PCDU_TXDLY_DEFAULT; + u32 rx_delay = GSWIP_MII_PCDU_RXDLY_DEFAULT; + struct device_node *port_dn = dp->dn; + u16 mii_pcdu_reg; + + /* As MII_PCDU registers only exist for MII ports, silently return + * unless the port is an MII port + */ + if (!(priv->hw_info->mii_ports & BIT(dp->index))) + return; + + switch (dp->index + priv->hw_info->mii_port_reg_offset) { + case 0: + mii_pcdu_reg = GSWIP_MII_PCDU0; + break; + case 1: + mii_pcdu_reg = GSWIP_MII_PCDU1; + break; + case 5: + mii_pcdu_reg = GSWIP_MII_PCDU5; + break; + default: + return; + } + + /* legacy code to set default delays according to the interface mode */ + switch (interface) { + case PHY_INTERFACE_MODE_RGMII_ID: + tx_delay = 0; + rx_delay = 0; + break; + case PHY_INTERFACE_MODE_RGMII_RXID: + rx_delay = 0; + break; + case PHY_INTERFACE_MODE_RGMII_TXID: + tx_delay = 0; + break; + default: + break; + } + + /* allow settings delays using device tree properties */ + of_property_read_u32(port_dn, "rx-internal-delay-ps", &rx_delay); + of_property_read_u32(port_dn, "tx-internal-delay-ps", &tx_delay); + + regmap_write_bits(priv->mii, mii_pcdu_reg, + GSWIP_MII_PCDU_TXDLY_MASK | + GSWIP_MII_PCDU_RXDLY_MASK, + GSWIP_MII_PCDU_TXDLY(tx_delay) | + GSWIP_MII_PCDU_RXDLY(rx_delay)); +} + static int gswip_setup(struct dsa_switch *ds) { unsigned int cpu_ports = dsa_cpu_ports(ds); @@ -1425,20 +1456,7 @@ static void gswip_phylink_mac_config(struct phylink_config *config, GSWIP_MII_CFG_RGMII_IBS | GSWIP_MII_CFG_LDCLKDIS, miicfg, port); - switch (state->interface) { - case PHY_INTERFACE_MODE_RGMII_ID: - gswip_mii_mask_pcdu(priv, GSWIP_MII_PCDU_TXDLY_MASK | - GSWIP_MII_PCDU_RXDLY_MASK, 0, port); - break; - case PHY_INTERFACE_MODE_RGMII_RXID: - gswip_mii_mask_pcdu(priv, GSWIP_MII_PCDU_RXDLY_MASK, 0, port); - break; - case PHY_INTERFACE_MODE_RGMII_TXID: - gswip_mii_mask_pcdu(priv, GSWIP_MII_PCDU_TXDLY_MASK, 0, port); - break; - default: - break; - } + gswip_mii_delay_setup(priv, dp, state->interface); } static void gswip_phylink_mac_link_down(struct phylink_config *config, From e1bb4b36a7ae0915f16abb5fd7073d2547235fa7 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Mon, 3 Nov 2025 12:20:12 +0000 Subject: [PATCH 623/867] dt-bindings: net: dsa: lantiq,gswip: add support for MaxLinear GSW1xx switches Extend the Lantiq GSWIP device tree binding to also cover MaxLinear GSW1xx switches which are based on the same hardware IP but connected via MDIO instead of being memory-mapped. Add compatible strings for MaxLinear GSW120, GSW125, GSW140, GSW141, and GSW145 switches and adjust the schema to handle the different connection methods with conditional properties. Add MaxLinear GSW125 example showing MDIO-connected configuration. Signed-off-by: Daniel Golle Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/fc96f1dedb2b418a63e69960356dde7f6eb86424.1762170107.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- .../bindings/net/dsa/lantiq,gswip.yaml | 128 +++++++++++++++++- 1 file changed, 123 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/net/dsa/lantiq,gswip.yaml b/Documentation/devicetree/bindings/net/dsa/lantiq,gswip.yaml index 929f6f8e4534b..205b683849a53 100644 --- a/Documentation/devicetree/bindings/net/dsa/lantiq,gswip.yaml +++ b/Documentation/devicetree/bindings/net/dsa/lantiq,gswip.yaml @@ -4,7 +4,12 @@ $id: http://devicetree.org/schemas/net/dsa/lantiq,gswip.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Lantiq GSWIP Ethernet switches +title: Lantiq GSWIP and MaxLinear GSW1xx Ethernet switches + +description: + Lantiq GSWIP and MaxLinear GSW1xx switches share the same hardware IP. + Lantiq switches are embedded in SoCs and accessed via memory-mapped I/O, + while MaxLinear switches are standalone ICs connected via MDIO. $ref: dsa.yaml# @@ -17,9 +22,14 @@ properties: - lantiq,xrx200-gswip - lantiq,xrx300-gswip - lantiq,xrx330-gswip + - maxlinear,gsw120 + - maxlinear,gsw125 + - maxlinear,gsw140 + - maxlinear,gsw141 + - maxlinear,gsw145 reg: - minItems: 3 + minItems: 1 maxItems: 3 reg-names: @@ -36,9 +46,6 @@ properties: compatible: const: lantiq,xrx200-mdio - required: - - compatible - gphy-fw: type: object properties: @@ -123,6 +130,30 @@ required: - compatible - reg +allOf: + - if: + properties: + compatible: + contains: + enum: + - lantiq,xrx200-gswip + - lantiq,xrx300-gswip + - lantiq,xrx330-gswip + then: + properties: + reg: + minItems: 3 + maxItems: 3 + mdio: + required: + - compatible + else: + properties: + reg: + maxItems: 1 + reg-names: false + gphy-fw: false + unevaluatedProperties: false examples: @@ -230,3 +261,90 @@ examples: }; }; }; + + - | + #include + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + switch@1f { + compatible = "maxlinear,gsw125"; + reg = <0x1f>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + label = "lan0"; + phy-handle = <&switchphy0>; + phy-mode = "internal"; + }; + + port@1 { + reg = <1>; + label = "lan1"; + phy-handle = <&switchphy1>; + phy-mode = "internal"; + }; + + port@4 { + reg = <4>; + label = "wan"; + phy-mode = "1000base-x"; + managed = "in-band-status"; + }; + + port@5 { + reg = <5>; + phy-mode = "rgmii-id"; + tx-internal-delay-ps = <2000>; + rx-internal-delay-ps = <2000>; + ethernet = <ð0>; + + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + }; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + switchphy0: switchphy@0 { + reg = <0>; + + leds { + #address-cells = <1>; + #size-cells = <0>; + + led@0 { + reg = <0>; + color = ; + function = LED_FUNCTION_LAN; + }; + }; + }; + + switchphy1: switchphy@1 { + reg = <1>; + + leds { + #address-cells = <1>; + #size-cells = <0>; + + led@0 { + reg = <0>; + color = ; + function = LED_FUNCTION_LAN; + }; + }; + }; + }; + }; + }; From c6230446b1a6f3c91effafd99f604de455da52e5 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Mon, 3 Nov 2025 12:20:20 +0000 Subject: [PATCH 624/867] net: dsa: add tagging driver for MaxLinear GSW1xx switch family Add support for a new DSA tagging protocol driver for the MaxLinear GSW1xx switch family. The GSW1xx switches use a proprietary 8-byte special tag inserted between the source MAC address and the EtherType field to indicate the source and destination ports for frames traversing the CPU port. Implement the tag handling logic to insert the special tag on transmit and parse it on receive. Signed-off-by: Daniel Golle Reviewed-by: Alexander Sverdlin Tested-by: Alexander Sverdlin Link: https://patch.msgid.link/0e973ebfd9433c30c96f50670da9e9449a0d98f2.1762170107.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 3 +- include/net/dsa.h | 2 + include/uapi/linux/if_ether.h | 1 + net/dsa/Kconfig | 8 +++ net/dsa/Makefile | 1 + net/dsa/tag_mxl-gsw1xx.c | 116 ++++++++++++++++++++++++++++++++++ 6 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 net/dsa/tag_mxl-gsw1xx.c diff --git a/MAINTAINERS b/MAINTAINERS index 12cd8a5ab2748..0dc4aa37d9034 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14053,7 +14053,7 @@ F: tools/testing/selftests/landlock/ K: landlock K: LANDLOCK -LANTIQ / INTEL Ethernet drivers +LANTIQ / MAXLINEAR / INTEL Ethernet DSA drivers M: Hauke Mehrtens L: netdev@vger.kernel.org S: Maintained @@ -14061,6 +14061,7 @@ F: Documentation/devicetree/bindings/net/dsa/lantiq,gswip.yaml F: drivers/net/dsa/lantiq/* F: drivers/net/ethernet/lantiq_xrx200.c F: net/dsa/tag_gswip.c +F: net/dsa/tag_mxl-gsw1xx.c LANTIQ MIPS ARCHITECTURE M: John Crispin diff --git a/include/net/dsa.h b/include/net/dsa.h index 67762fdaf3c7a..2df2e2ead9a81 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -56,6 +56,7 @@ struct tc_action; #define DSA_TAG_PROTO_VSC73XX_8021Q_VALUE 28 #define DSA_TAG_PROTO_BRCM_LEGACY_FCS_VALUE 29 #define DSA_TAG_PROTO_YT921X_VALUE 30 +#define DSA_TAG_PROTO_MXL_GSW1XX_VALUE 31 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, @@ -89,6 +90,7 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_LAN937X = DSA_TAG_PROTO_LAN937X_VALUE, DSA_TAG_PROTO_VSC73XX_8021Q = DSA_TAG_PROTO_VSC73XX_8021Q_VALUE, DSA_TAG_PROTO_YT921X = DSA_TAG_PROTO_YT921X_VALUE, + DSA_TAG_PROTO_MXL_GSW1XX = DSA_TAG_PROTO_MXL_GSW1XX_VALUE, }; struct dsa_switch; diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index cfd200c87e5ea..2c93b7b731c8f 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -92,6 +92,7 @@ #define ETH_P_ETHERCAT 0x88A4 /* EtherCAT */ #define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */ #define ETH_P_802_EX1 0x88B5 /* 802.1 Local Experimental 1. */ +#define ETH_P_MXLGSW 0x88C3 /* MaxLinear GSW DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_PREAUTH 0x88C7 /* 802.11 Preauthentication */ #define ETH_P_TIPC 0x88CA /* TIPC */ #define ETH_P_LLDP 0x88CC /* Link Layer Discovery Protocol */ diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 6b94028b1fcc2..f86b30742122f 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -104,6 +104,14 @@ config NET_DSA_TAG_MTK Say Y or M if you want to enable support for tagging frames for Mediatek switches. +config NET_DSA_TAG_MXL_GSW1XX + tristate "Tag driver for MaxLinear GSW1xx switches" + help + The GSW1xx family of switches supports an 8-byte special tag which + can be used on the CPU port of the switch. + Say Y or M if you want to enable support for tagging frames for + MaxLinear GSW1xx switches. + config NET_DSA_TAG_KSZ tristate "Tag driver for Microchip 8795/937x/9477/9893 families of switches" help diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 4b011a1d5c87e..42d173f5a7013 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -28,6 +28,7 @@ obj-$(CONFIG_NET_DSA_TAG_HELLCREEK) += tag_hellcreek.o obj-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o +obj-$(CONFIG_NET_DSA_TAG_MXL_GSW1XX) += tag_mxl-gsw1xx.o obj-$(CONFIG_NET_DSA_TAG_NONE) += tag_none.o obj-$(CONFIG_NET_DSA_TAG_OCELOT) += tag_ocelot.o obj-$(CONFIG_NET_DSA_TAG_OCELOT_8021Q) += tag_ocelot_8021q.o diff --git a/net/dsa/tag_mxl-gsw1xx.c b/net/dsa/tag_mxl-gsw1xx.c new file mode 100644 index 0000000000000..701a079955f2e --- /dev/null +++ b/net/dsa/tag_mxl-gsw1xx.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * DSA driver Special Tag support for MaxLinear GSW1xx switch chips + * + * Copyright (C) 2025 Daniel Golle + * Copyright (C) 2023 - 2024 MaxLinear Inc. + */ + +#include +#include +#include +#include + +#include "tag.h" + +/* To define the outgoing port and to discover the incoming port a special + * tag is used by the GSW1xx. + * + * Dest MAC Src MAC special TAG EtherType + * ...| 1 2 3 4 5 6 | 1 2 3 4 5 6 | 1 2 3 4 5 6 7 8 | 1 2 |... + * |<--------------->| + */ + +#define GSW1XX_TAG_NAME "gsw1xx" + +/* special tag header length (RX and TX) */ +#define GSW1XX_HEADER_LEN 8 + +/* Word 0 = Ethertype -> 0x88C3 */ + +/* Word 1 */ +#define GSW1XX_TX_PORT_MAP GENMASK(7, 0) +#define GSW1XX_TX_PORT_MAP_EN BIT(15) +#define GSW1XX_TX_CLASS_EN BIT(14) +#define GSW1XX_TX_TIME_STAMP_EN BIT(13) +#define GSW1XX_TX_LRN_DIS BIT(12) +#define GSW1XX_TX_CLASS GENMASK(11, 8) + +/* special tag in RX path header */ +/* Word 2 */ +#define GSW1XX_RX_PORT_MAP GENMASK(15, 8) + +static struct sk_buff *gsw1xx_tag_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + __be16 *gsw1xx_tag; + + /* provide additional space 'GSW1XX_HEADER_LEN' bytes */ + skb_push(skb, GSW1XX_HEADER_LEN); + + /* add space between MAC address and Ethertype */ + dsa_alloc_etype_header(skb, GSW1XX_HEADER_LEN); + + /* special tag ingress */ + gsw1xx_tag = dsa_etype_header_pos_tx(skb); + gsw1xx_tag[0] = htons(ETH_P_MXLGSW); + gsw1xx_tag[1] = htons(GSW1XX_TX_PORT_MAP_EN | GSW1XX_TX_LRN_DIS | + FIELD_PREP(GSW1XX_TX_PORT_MAP, BIT(dp->index))); + + gsw1xx_tag[2] = 0; + gsw1xx_tag[3] = 0; + + return skb; +} + +static struct sk_buff *gsw1xx_tag_rcv(struct sk_buff *skb, + struct net_device *dev) +{ + int port; + __be16 *gsw1xx_tag; + + if (unlikely(!pskb_may_pull(skb, GSW1XX_HEADER_LEN))) { + dev_warn_ratelimited(&dev->dev, "Dropping packet, cannot pull SKB\n"); + return NULL; + } + + gsw1xx_tag = dsa_etype_header_pos_rx(skb); + + if (unlikely(ntohs(gsw1xx_tag[0]) != ETH_P_MXLGSW)) { + dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid special tag\n"); + dev_warn_ratelimited(&dev->dev, "Tag: %8ph\n", gsw1xx_tag); + return NULL; + } + + /* Get source port information */ + port = FIELD_GET(GSW1XX_RX_PORT_MAP, ntohs(gsw1xx_tag[1])); + skb->dev = dsa_conduit_find_user(dev, 0, port); + if (!skb->dev) { + dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid source port\n"); + dev_warn_ratelimited(&dev->dev, "Tag: %8ph\n", gsw1xx_tag); + return NULL; + } + + /* remove the GSW1xx special tag between MAC addresses and the current + * ethertype field. + */ + skb_pull_rcsum(skb, GSW1XX_HEADER_LEN); + dsa_strip_etype_header(skb, GSW1XX_HEADER_LEN); + + return skb; +} + +static const struct dsa_device_ops gsw1xx_netdev_ops = { + .name = GSW1XX_TAG_NAME, + .proto = DSA_TAG_PROTO_MXL_GSW1XX, + .xmit = gsw1xx_tag_xmit, + .rcv = gsw1xx_tag_rcv, + .needed_headroom = GSW1XX_HEADER_LEN, +}; + +MODULE_DESCRIPTION("DSA tag driver for MaxLinear GSW1xx 8 byte protocol"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_MXL_GSW1XX, GSW1XX_TAG_NAME); + +module_dsa_tag_driver(gsw1xx_netdev_ops); From 22335939ec907cca26be41f10f6cc01f0df8b0e9 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Mon, 3 Nov 2025 12:20:28 +0000 Subject: [PATCH 625/867] net: dsa: add driver for MaxLinear GSW1xx switch family Add driver for the MaxLinear GSW1xx family of Ethernet switch ICs which are based on the same IP as the Lantiq/Intel GSWIP found in the Lantiq VR9 and Intel GRX MIPS router SoCs. The main difference is that instead of using memory-mapped I/O to communicate with the host CPU these ICs are connected via MDIO (or SPI, which isn't supported by this driver). Implement the regmap API to access the switch registers over MDIO to allow reusing lantiq_gswip_common for all core functionality. The GSW1xx also comes with a SerDes port capable of 1000Base-X, SGMII and 2500Base-X, which can either be used to connect an external PHY or SFP cage, or as the CPU port. Support for the SerDes interface is implemented in this driver using the phylink_pcs interface. Signed-off-by: Daniel Golle Tested-by: Alexander Sverdlin Link: https://patch.msgid.link/b567ec1b4beb08fd37abf18b280c56d5d8253c26.1762170107.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq/Kconfig | 12 + drivers/net/dsa/lantiq/Makefile | 1 + drivers/net/dsa/lantiq/lantiq_gswip.h | 1 + drivers/net/dsa/lantiq/mxl-gsw1xx.c | 733 ++++++++++++++++++++++++ drivers/net/dsa/lantiq/mxl-gsw1xx.h | 126 ++++ drivers/net/dsa/lantiq/mxl-gsw1xx_pce.h | 154 +++++ 6 files changed, 1027 insertions(+) create mode 100644 drivers/net/dsa/lantiq/mxl-gsw1xx.c create mode 100644 drivers/net/dsa/lantiq/mxl-gsw1xx.h create mode 100644 drivers/net/dsa/lantiq/mxl-gsw1xx_pce.h diff --git a/drivers/net/dsa/lantiq/Kconfig b/drivers/net/dsa/lantiq/Kconfig index 78db82a47d092..4a9771be5d588 100644 --- a/drivers/net/dsa/lantiq/Kconfig +++ b/drivers/net/dsa/lantiq/Kconfig @@ -10,3 +10,15 @@ config NET_DSA_LANTIQ_GSWIP help This enables support for the Lantiq / Intel GSWIP 2.1 found in the xrx200 / VR9 SoC. + +config NET_DSA_MXL_GSW1XX + tristate "MaxLinear GSW1xx Ethernet switch support" + select NET_DSA_TAG_MXL_GSW1XX + select NET_DSA_LANTIQ_COMMON + help + This enables support for the MaxLinear GSW1xx family of 1GE switches + GSW120 4 port, 2 PHYs, RGMII & SGMII/2500Base-X + GSW125 4 port, 2 PHYs, RGMII & SGMII/2500Base-X, industrial temperature + GSW140 6 port, 4 PHYs, RGMII & SGMII/2500Base-X + GSW141 6 port, 4 PHYs, RGMII & SGMII + GSW145 6 port, 4 PHYs, RGMII & SGMII/2500Base-X, industrial temperature diff --git a/drivers/net/dsa/lantiq/Makefile b/drivers/net/dsa/lantiq/Makefile index 65ffa7bb71aa7..85fce605310b3 100644 --- a/drivers/net/dsa/lantiq/Makefile +++ b/drivers/net/dsa/lantiq/Makefile @@ -1,2 +1,3 @@ obj-$(CONFIG_NET_DSA_LANTIQ_GSWIP) += lantiq_gswip.o obj-$(CONFIG_NET_DSA_LANTIQ_COMMON) += lantiq_gswip_common.o +obj-$(CONFIG_NET_DSA_MXL_GSW1XX) += mxl-gsw1xx.o diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.h b/drivers/net/dsa/lantiq/lantiq_gswip.h index 0c32ec85e1272..9c38e51a75e80 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.h +++ b/drivers/net/dsa/lantiq/lantiq_gswip.h @@ -255,6 +255,7 @@ struct gswip_hw_info { unsigned int allowed_cpu_ports; unsigned int mii_ports; int mii_port_reg_offset; + bool supports_2500m; const struct gswip_pce_microcode (*pce_microcode)[]; size_t pce_microcode_size; enum dsa_tag_protocol tag_protocol; diff --git a/drivers/net/dsa/lantiq/mxl-gsw1xx.c b/drivers/net/dsa/lantiq/mxl-gsw1xx.c new file mode 100644 index 0000000000000..0816c61a47f12 --- /dev/null +++ b/drivers/net/dsa/lantiq/mxl-gsw1xx.c @@ -0,0 +1,733 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* DSA Driver for MaxLinear GSW1xx switch devices + * + * Copyright (C) 2025 Daniel Golle + * Copyright (C) 2023 - 2024 MaxLinear Inc. + * Copyright (C) 2022 Snap One, LLC. All rights reserved. + * Copyright (C) 2017 - 2019 Hauke Mehrtens + * Copyright (C) 2012 John Crispin + * Copyright (C) 2010 Lantiq Deutschland + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "lantiq_gswip.h" +#include "mxl-gsw1xx.h" +#include "mxl-gsw1xx_pce.h" + +struct gsw1xx_priv { + struct mdio_device *mdio_dev; + int smdio_badr; + struct regmap *sgmii; + struct regmap *gpio; + struct regmap *clk; + struct regmap *shell; + struct phylink_pcs pcs; + phy_interface_t tbi_interface; + struct gswip_priv gswip; +}; + +static int gsw1xx_config_smdio_badr(struct gsw1xx_priv *priv, + unsigned int reg) +{ + struct mii_bus *bus = priv->mdio_dev->bus; + int sw_addr = priv->mdio_dev->addr; + int smdio_badr = priv->smdio_badr; + int res; + + if (smdio_badr == GSW1XX_SMDIO_BADR_UNKNOWN || + reg - smdio_badr >= GSW1XX_SMDIO_BADR || + smdio_badr > reg) { + /* Configure the Switch Base Address */ + smdio_badr = reg & ~GENMASK(3, 0); + res = __mdiobus_write(bus, sw_addr, GSW1XX_SMDIO_BADR, smdio_badr); + if (res < 0) { + dev_err(&priv->mdio_dev->dev, + "%s: Error %d, configuring switch base\n", + __func__, res); + return res; + } + priv->smdio_badr = smdio_badr; + } + + return smdio_badr; +} + +static int gsw1xx_regmap_read(void *context, unsigned int reg, + unsigned int *val) +{ + struct gsw1xx_priv *priv = context; + struct mii_bus *bus = priv->mdio_dev->bus; + int sw_addr = priv->mdio_dev->addr; + int smdio_badr; + int res; + + smdio_badr = gsw1xx_config_smdio_badr(priv, reg); + if (smdio_badr < 0) + return smdio_badr; + + res = __mdiobus_read(bus, sw_addr, reg - smdio_badr); + if (res < 0) { + dev_err(&priv->mdio_dev->dev, "%s: Error %d reading 0x%x\n", + __func__, res, reg); + return res; + } + + *val = res; + + return 0; +} + +static int gsw1xx_regmap_write(void *context, unsigned int reg, + unsigned int val) +{ + struct gsw1xx_priv *priv = context; + struct mii_bus *bus = priv->mdio_dev->bus; + int sw_addr = priv->mdio_dev->addr; + int smdio_badr; + int res; + + smdio_badr = gsw1xx_config_smdio_badr(priv, reg); + if (smdio_badr < 0) + return smdio_badr; + + res = __mdiobus_write(bus, sw_addr, reg - smdio_badr, val); + if (res < 0) + dev_err(&priv->mdio_dev->dev, + "%s: Error %d, writing 0x%x:0x%x\n", __func__, res, reg, + val); + + return res; +} + +static const struct regmap_bus gsw1xx_regmap_bus = { + .reg_write = gsw1xx_regmap_write, + .reg_read = gsw1xx_regmap_read, +}; + +static void gsw1xx_mdio_regmap_lock(void *mdio_lock) +{ + mutex_lock_nested(mdio_lock, MDIO_MUTEX_NESTED); +} + +static void gsw1xx_mdio_regmap_unlock(void *mdio_lock) +{ + mutex_unlock(mdio_lock); +} + +static unsigned int gsw1xx_pcs_inband_caps(struct phylink_pcs *pcs, + phy_interface_t interface) +{ + return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE; +} + +static struct gsw1xx_priv *pcs_to_gsw1xx(struct phylink_pcs *pcs) +{ + return container_of(pcs, struct gsw1xx_priv, pcs); +} + +static int gsw1xx_pcs_enable(struct phylink_pcs *pcs) +{ + struct gsw1xx_priv *priv = pcs_to_gsw1xx(pcs); + + /* Deassert SGMII shell reset */ + return regmap_clear_bits(priv->shell, GSW1XX_SHELL_RST_REQ, + GSW1XX_RST_REQ_SGMII_SHELL); +} + +static void gsw1xx_pcs_disable(struct phylink_pcs *pcs) +{ + struct gsw1xx_priv *priv = pcs_to_gsw1xx(pcs); + + /* Assert SGMII shell reset */ + regmap_set_bits(priv->shell, GSW1XX_SHELL_RST_REQ, + GSW1XX_RST_REQ_SGMII_SHELL); + + priv->tbi_interface = PHY_INTERFACE_MODE_NA; +} + +static void gsw1xx_pcs_get_state(struct phylink_pcs *pcs, + unsigned int neg_mode, + struct phylink_link_state *state) +{ + struct gsw1xx_priv *priv = pcs_to_gsw1xx(pcs); + int ret; + u32 val; + + ret = regmap_read(priv->sgmii, GSW1XX_SGMII_TBI_TBISTAT, &val); + if (ret < 0) + return; + + state->link = !!(val & GSW1XX_SGMII_TBI_TBISTAT_LINK); + state->an_complete = !!(val & GSW1XX_SGMII_TBI_TBISTAT_AN_COMPLETE); + + ret = regmap_read(priv->sgmii, GSW1XX_SGMII_TBI_LPSTAT, &val); + if (ret < 0) + return; + + state->duplex = (val & GSW1XX_SGMII_TBI_LPSTAT_DUPLEX) ? + DUPLEX_FULL : DUPLEX_HALF; + if (val & GSW1XX_SGMII_TBI_LPSTAT_PAUSE_RX) + state->pause |= MLO_PAUSE_RX; + + if (val & GSW1XX_SGMII_TBI_LPSTAT_PAUSE_TX) + state->pause |= MLO_PAUSE_TX; + + switch (FIELD_GET(GSW1XX_SGMII_TBI_LPSTAT_SPEED, val)) { + case GSW1XX_SGMII_TBI_LPSTAT_SPEED_10: + state->speed = SPEED_10; + break; + case GSW1XX_SGMII_TBI_LPSTAT_SPEED_100: + state->speed = SPEED_100; + break; + case GSW1XX_SGMII_TBI_LPSTAT_SPEED_1000: + state->speed = SPEED_1000; + break; + case GSW1XX_SGMII_TBI_LPSTAT_SPEED_NOSGMII: + if (state->interface == PHY_INTERFACE_MODE_1000BASEX) + state->speed = SPEED_1000; + else if (state->interface == PHY_INTERFACE_MODE_2500BASEX) + state->speed = SPEED_2500; + else + state->speed = SPEED_UNKNOWN; + break; + } +} + +static int gsw1xx_pcs_phy_xaui_write(struct gsw1xx_priv *priv, u16 addr, + u16 data) +{ + int ret, val; + + ret = regmap_write(priv->sgmii, GSW1XX_SGMII_PHY_D, data); + if (ret < 0) + return ret; + + ret = regmap_write(priv->sgmii, GSW1XX_SGMII_PHY_A, addr); + if (ret < 0) + return ret; + + ret = regmap_write(priv->sgmii, GSW1XX_SGMII_PHY_C, + GSW1XX_SGMII_PHY_WRITE | + GSW1XX_SGMII_PHY_RESET_N); + if (ret < 0) + return ret; + + return regmap_read_poll_timeout(priv->sgmii, GSW1XX_SGMII_PHY_C, + val, val & GSW1XX_SGMII_PHY_STATUS, + 1000, 100000); +} + +static int gsw1xx_pcs_reset(struct gsw1xx_priv *priv) +{ + int ret; + u16 val; + + /* Assert and deassert SGMII shell reset */ + ret = regmap_set_bits(priv->shell, GSW1XX_SHELL_RST_REQ, + GSW1XX_RST_REQ_SGMII_SHELL); + if (ret < 0) + return ret; + + ret = regmap_clear_bits(priv->shell, GSW1XX_SHELL_RST_REQ, + GSW1XX_RST_REQ_SGMII_SHELL); + if (ret < 0) + return ret; + + /* Hardware Bringup FSM Enable */ + ret = regmap_write(priv->sgmii, GSW1XX_SGMII_PHY_HWBU_CTRL, + GSW1XX_SGMII_PHY_HWBU_CTRL_EN_HWBU_FSM | + GSW1XX_SGMII_PHY_HWBU_CTRL_HW_FSM_EN); + if (ret < 0) + return ret; + + /* Configure SGMII PHY Receiver */ + val = FIELD_PREP(GSW1XX_SGMII_PHY_RX0_CFG2_EQ, + GSW1XX_SGMII_PHY_RX0_CFG2_EQ_DEF) | + GSW1XX_SGMII_PHY_RX0_CFG2_LOS_EN | + GSW1XX_SGMII_PHY_RX0_CFG2_TERM_EN | + FIELD_PREP(GSW1XX_SGMII_PHY_RX0_CFG2_FILT_CNT, + GSW1XX_SGMII_PHY_RX0_CFG2_FILT_CNT_DEF); + + /* TODO: Take care of inverted RX pair once generic property is + * available + */ + + ret = regmap_write(priv->sgmii, GSW1XX_SGMII_PHY_RX0_CFG2, val); + if (ret < 0) + return ret; + + val = FIELD_PREP(GSW1XX_SGMII_PHY_TX0_CFG3_VBOOST_LEVEL, + GSW1XX_SGMII_PHY_TX0_CFG3_VBOOST_LEVEL_DEF); + + /* TODO: Take care of inverted TX pair once generic property is + * available + */ + + ret = regmap_write(priv->sgmii, GSW1XX_SGMII_PHY_TX0_CFG3, val); + if (ret < 0) + return ret; + + /* Reset and Release TBI */ + val = GSW1XX_SGMII_TBI_TBICTL_INITTBI | GSW1XX_SGMII_TBI_TBICTL_ENTBI | + GSW1XX_SGMII_TBI_TBICTL_CRSTRR | GSW1XX_SGMII_TBI_TBICTL_CRSOFF; + ret = regmap_write(priv->sgmii, GSW1XX_SGMII_TBI_TBICTL, val); + if (ret < 0) + return ret; + val &= ~GSW1XX_SGMII_TBI_TBICTL_INITTBI; + ret = regmap_write(priv->sgmii, GSW1XX_SGMII_TBI_TBICTL, val); + if (ret < 0) + return ret; + + /* Release Tx Data Buffers */ + ret = regmap_set_bits(priv->sgmii, GSW1XX_SGMII_PCS_TXB_CTL, + GSW1XX_SGMII_PCS_TXB_CTL_INIT_TX_TXB); + if (ret < 0) + return ret; + ret = regmap_clear_bits(priv->sgmii, GSW1XX_SGMII_PCS_TXB_CTL, + GSW1XX_SGMII_PCS_TXB_CTL_INIT_TX_TXB); + if (ret < 0) + return ret; + + /* Release Rx Data Buffers */ + ret = regmap_set_bits(priv->sgmii, GSW1XX_SGMII_PCS_RXB_CTL, + GSW1XX_SGMII_PCS_RXB_CTL_INIT_RX_RXB); + if (ret < 0) + return ret; + return regmap_clear_bits(priv->sgmii, GSW1XX_SGMII_PCS_RXB_CTL, + GSW1XX_SGMII_PCS_RXB_CTL_INIT_RX_RXB); +} + +static int gsw1xx_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, + phy_interface_t interface, + const unsigned long *advertising, + bool permit_pause_to_mac) +{ + struct gsw1xx_priv *priv = pcs_to_gsw1xx(pcs); + u16 txaneg, anegctl, nco_ctrl; + bool reconf = false; + int ret = 0; + + /* do not unnecessarily disrupt link and skip resetting the hardware in + * case the PCS has previously been successfully configured for this + * interface mode + */ + if (priv->tbi_interface == interface) + reconf = true; + + /* mark PCS configuration as incomplete */ + priv->tbi_interface = PHY_INTERFACE_MODE_NA; + + if (!reconf) + ret = gsw1xx_pcs_reset(priv); + + if (ret) + return ret; + + /* override bootstrap pin settings + * OVRANEG sets ANEG Mode, Enable ANEG and restart ANEG to be + * taken from bits ANMODE, ANEGEN, RANEG of the ANEGCTL register. + * OVERABL sets ability bits in tx_config_reg to be taken from + * the TXANEGH and TXANEGL registers. + */ + anegctl = GSW1XX_SGMII_TBI_ANEGCTL_OVRANEG | + GSW1XX_SGMII_TBI_ANEGCTL_OVRABL; + + switch (phylink_get_link_timer_ns(interface)) { + case 10000: + anegctl |= FIELD_PREP(GSW1XX_SGMII_TBI_ANEGCTL_LT, + GSW1XX_SGMII_TBI_ANEGCTL_LT_10US); + break; + case 1600000: + anegctl |= FIELD_PREP(GSW1XX_SGMII_TBI_ANEGCTL_LT, + GSW1XX_SGMII_TBI_ANEGCTL_LT_1_6MS); + break; + case 5000000: + anegctl |= FIELD_PREP(GSW1XX_SGMII_TBI_ANEGCTL_LT, + GSW1XX_SGMII_TBI_ANEGCTL_LT_5MS); + break; + case 10000000: + anegctl |= FIELD_PREP(GSW1XX_SGMII_TBI_ANEGCTL_LT, + GSW1XX_SGMII_TBI_ANEGCTL_LT_10MS); + break; + default: + return -EINVAL; + } + + if (neg_mode & PHYLINK_PCS_NEG_INBAND) + anegctl |= GSW1XX_SGMII_TBI_ANEGCTL_ANEGEN; + + txaneg = phylink_mii_c22_pcs_encode_advertisement(interface, advertising); + + if (interface == PHY_INTERFACE_MODE_SGMII) { + /* lacking a defined reverse-SGMII interface mode this + * driver only supports SGMII (MAC side) for now + */ + anegctl |= FIELD_PREP(GSW1XX_SGMII_TBI_ANEGCTL_ANMODE, + GSW1XX_SGMII_TBI_ANEGCTL_ANMODE_SGMII_MAC); + txaneg |= ADVERTISE_LPACK; + } else if (interface == PHY_INTERFACE_MODE_1000BASEX || + interface == PHY_INTERFACE_MODE_2500BASEX) { + anegctl |= FIELD_PREP(GSW1XX_SGMII_TBI_ANEGCTL_ANMODE, + GSW1XX_SGMII_TBI_ANEGCTL_ANMODE_1000BASEX); + } else { + dev_err(priv->gswip.dev, "%s: wrong interface mode %s\n", + __func__, phy_modes(interface)); + return -EINVAL; + } + + ret = regmap_write(priv->sgmii, GSW1XX_SGMII_TBI_TXANEGH, + FIELD_GET(GENMASK(15, 8), txaneg)); + if (ret < 0) + return ret; + ret = regmap_write(priv->sgmii, GSW1XX_SGMII_TBI_TXANEGL, + FIELD_GET(GENMASK(7, 0), txaneg)); + if (ret < 0) + return ret; + ret = regmap_write(priv->sgmii, GSW1XX_SGMII_TBI_ANEGCTL, anegctl); + if (ret < 0) + return ret; + + if (!reconf) { + /* setup SerDes clock speed */ + if (interface == PHY_INTERFACE_MODE_2500BASEX) + nco_ctrl = GSW1XX_SGMII_2G5 | GSW1XX_SGMII_2G5_NCO2; + else + nco_ctrl = GSW1XX_SGMII_1G | GSW1XX_SGMII_1G_NCO1; + + ret = regmap_update_bits(priv->clk, GSW1XX_CLK_NCO_CTRL, + GSW1XX_SGMII_HSP_MASK | + GSW1XX_SGMII_SEL, + nco_ctrl); + if (ret) + return ret; + + ret = gsw1xx_pcs_phy_xaui_write(priv, 0x30, 0x80); + if (ret) + return ret; + } + + /* PCS configuration has now been completed, store mode to prevent + * disrupting the link in case of future calls of this function for the + * same interface mode. + */ + priv->tbi_interface = interface; + + return 0; +} + +static void gsw1xx_pcs_an_restart(struct phylink_pcs *pcs) +{ + struct gsw1xx_priv *priv = pcs_to_gsw1xx(pcs); + + regmap_set_bits(priv->sgmii, GSW1XX_SGMII_TBI_ANEGCTL, + GSW1XX_SGMII_TBI_ANEGCTL_RANEG); +} + +static void gsw1xx_pcs_link_up(struct phylink_pcs *pcs, + unsigned int neg_mode, + phy_interface_t interface, int speed, + int duplex) +{ + struct gsw1xx_priv *priv = pcs_to_gsw1xx(pcs); + u16 lpstat; + + /* When in-band AN is enabled hardware will set lpstat */ + if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED) + return; + + /* Force speed and duplex settings */ + if (interface == PHY_INTERFACE_MODE_SGMII) { + if (speed == SPEED_10) + lpstat = FIELD_PREP(GSW1XX_SGMII_TBI_LPSTAT_SPEED, + GSW1XX_SGMII_TBI_LPSTAT_SPEED_10); + else if (speed == SPEED_100) + lpstat = FIELD_PREP(GSW1XX_SGMII_TBI_LPSTAT_SPEED, + GSW1XX_SGMII_TBI_LPSTAT_SPEED_100); + else + lpstat = FIELD_PREP(GSW1XX_SGMII_TBI_LPSTAT_SPEED, + GSW1XX_SGMII_TBI_LPSTAT_SPEED_1000); + } else { + lpstat = FIELD_PREP(GSW1XX_SGMII_TBI_LPSTAT_SPEED, + GSW1XX_SGMII_TBI_LPSTAT_SPEED_NOSGMII); + } + + if (duplex == DUPLEX_FULL) + lpstat |= GSW1XX_SGMII_TBI_LPSTAT_DUPLEX; + + regmap_write(priv->sgmii, GSW1XX_SGMII_TBI_LPSTAT, lpstat); +} + +static const struct phylink_pcs_ops gsw1xx_pcs_ops = { + .pcs_inband_caps = gsw1xx_pcs_inband_caps, + .pcs_enable = gsw1xx_pcs_enable, + .pcs_disable = gsw1xx_pcs_disable, + .pcs_get_state = gsw1xx_pcs_get_state, + .pcs_config = gsw1xx_pcs_config, + .pcs_an_restart = gsw1xx_pcs_an_restart, + .pcs_link_up = gsw1xx_pcs_link_up, +}; + +static void gsw1xx_phylink_get_caps(struct dsa_switch *ds, int port, + struct phylink_config *config) +{ + struct gswip_priv *priv = ds->priv; + + config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | + MAC_10 | MAC_100 | MAC_1000; + + switch (port) { + case 0: + case 1: + case 2: + case 3: + __set_bit(PHY_INTERFACE_MODE_INTERNAL, + config->supported_interfaces); + break; + case 4: /* port 4: SGMII */ + __set_bit(PHY_INTERFACE_MODE_SGMII, + config->supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_1000BASEX, + config->supported_interfaces); + if (priv->hw_info->supports_2500m) { + __set_bit(PHY_INTERFACE_MODE_2500BASEX, + config->supported_interfaces); + config->mac_capabilities |= MAC_2500FD; + } + return; /* no support for EEE on SGMII port */ + case 5: /* port 5: RGMII or RMII */ + __set_bit(PHY_INTERFACE_MODE_RMII, + config->supported_interfaces); + phy_interface_set_rgmii(config->supported_interfaces); + break; + } + + config->lpi_capabilities = MAC_100FD | MAC_1000FD; + config->lpi_timer_default = 20; + memcpy(config->lpi_interfaces, config->supported_interfaces, + sizeof(config->lpi_interfaces)); +} + +static struct phylink_pcs *gsw1xx_phylink_mac_select_pcs(struct phylink_config *config, + phy_interface_t interface) +{ + struct dsa_port *dp = dsa_phylink_to_port(config); + struct gswip_priv *gswip_priv = dp->ds->priv; + struct gsw1xx_priv *gsw1xx_priv = container_of(gswip_priv, + struct gsw1xx_priv, + gswip); + + switch (dp->index) { + case GSW1XX_SGMII_PORT: + return &gsw1xx_priv->pcs; + default: + return NULL; + } +} + +static struct regmap *gsw1xx_regmap_init(struct gsw1xx_priv *priv, + const char *name, + unsigned int reg_base, + unsigned int max_register) +{ + const struct regmap_config config = { + .name = name, + .reg_bits = 16, + .val_bits = 16, + .reg_base = reg_base, + .max_register = max_register, + .lock = gsw1xx_mdio_regmap_lock, + .unlock = gsw1xx_mdio_regmap_unlock, + .lock_arg = &priv->mdio_dev->bus->mdio_lock, + }; + + return devm_regmap_init(&priv->mdio_dev->dev, &gsw1xx_regmap_bus, + priv, &config); +} + +static int gsw1xx_probe(struct mdio_device *mdiodev) +{ + struct device *dev = &mdiodev->dev; + struct gsw1xx_priv *priv; + u32 version; + int ret; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->mdio_dev = mdiodev; + priv->smdio_badr = GSW1XX_SMDIO_BADR_UNKNOWN; + + priv->gswip.dev = dev; + priv->gswip.hw_info = of_device_get_match_data(dev); + if (!priv->gswip.hw_info) + return -EINVAL; + + priv->gswip.gswip = gsw1xx_regmap_init(priv, "switch", + GSW1XX_SWITCH_BASE, 0xfff); + if (IS_ERR(priv->gswip.gswip)) + return PTR_ERR(priv->gswip.gswip); + + priv->gswip.mdio = gsw1xx_regmap_init(priv, "mdio", GSW1XX_MMDIO_BASE, + 0xff); + if (IS_ERR(priv->gswip.mdio)) + return PTR_ERR(priv->gswip.mdio); + + priv->gswip.mii = gsw1xx_regmap_init(priv, "mii", GSW1XX_RGMII_BASE, + 0xff); + if (IS_ERR(priv->gswip.mii)) + return PTR_ERR(priv->gswip.mii); + + priv->sgmii = gsw1xx_regmap_init(priv, "sgmii", GSW1XX_SGMII_BASE, + 0xfff); + if (IS_ERR(priv->sgmii)) + return PTR_ERR(priv->sgmii); + + priv->gpio = gsw1xx_regmap_init(priv, "gpio", GSW1XX_GPIO_BASE, 0xff); + if (IS_ERR(priv->gpio)) + return PTR_ERR(priv->gpio); + + priv->clk = gsw1xx_regmap_init(priv, "clk", GSW1XX_CLK_BASE, 0xff); + if (IS_ERR(priv->clk)) + return PTR_ERR(priv->clk); + + priv->shell = gsw1xx_regmap_init(priv, "shell", GSW1XX_SHELL_BASE, + 0xff); + if (IS_ERR(priv->shell)) + return PTR_ERR(priv->shell); + + priv->pcs.ops = &gsw1xx_pcs_ops; + priv->pcs.poll = true; + __set_bit(PHY_INTERFACE_MODE_SGMII, + priv->pcs.supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_1000BASEX, + priv->pcs.supported_interfaces); + if (priv->gswip.hw_info->supports_2500m) + __set_bit(PHY_INTERFACE_MODE_2500BASEX, + priv->pcs.supported_interfaces); + priv->tbi_interface = PHY_INTERFACE_MODE_NA; + + /* assert SGMII reset to power down SGMII unit */ + ret = regmap_set_bits(priv->shell, GSW1XX_SHELL_RST_REQ, + GSW1XX_RST_REQ_SGMII_SHELL); + if (ret < 0) + return ret; + + /* configure GPIO pin-mux for MMDIO in case of external PHY connected to + * SGMII or RGMII as slave interface + */ + regmap_set_bits(priv->gpio, GPIO_ALTSEL0, 3); + regmap_set_bits(priv->gpio, GPIO_ALTSEL1, 3); + + ret = regmap_read(priv->gswip.gswip, GSWIP_VERSION, &version); + if (ret) + return ret; + + ret = gswip_probe_common(&priv->gswip, version); + if (ret) + return ret; + + dev_set_drvdata(dev, &priv->gswip); + + return 0; +} + +static void gsw1xx_remove(struct mdio_device *mdiodev) +{ + struct gswip_priv *priv = dev_get_drvdata(&mdiodev->dev); + + if (!priv) + return; + + gswip_disable_switch(priv); + + dsa_unregister_switch(priv->ds); +} + +static void gsw1xx_shutdown(struct mdio_device *mdiodev) +{ + struct gswip_priv *priv = dev_get_drvdata(&mdiodev->dev); + + if (!priv) + return; + + dev_set_drvdata(&mdiodev->dev, NULL); + + gswip_disable_switch(priv); +} + +static const struct gswip_hw_info gsw12x_data = { + .max_ports = GSW1XX_PORTS, + .allowed_cpu_ports = BIT(GSW1XX_MII_PORT) | BIT(GSW1XX_SGMII_PORT), + .mii_ports = BIT(GSW1XX_MII_PORT), + .mii_port_reg_offset = -GSW1XX_MII_PORT, + .mac_select_pcs = gsw1xx_phylink_mac_select_pcs, + .phylink_get_caps = &gsw1xx_phylink_get_caps, + .supports_2500m = true, + .pce_microcode = &gsw1xx_pce_microcode, + .pce_microcode_size = ARRAY_SIZE(gsw1xx_pce_microcode), + .tag_protocol = DSA_TAG_PROTO_MXL_GSW1XX, +}; + +static const struct gswip_hw_info gsw140_data = { + .max_ports = GSW1XX_PORTS, + .allowed_cpu_ports = BIT(GSW1XX_MII_PORT) | BIT(GSW1XX_SGMII_PORT), + .mii_ports = BIT(GSW1XX_MII_PORT), + .mii_port_reg_offset = -GSW1XX_MII_PORT, + .mac_select_pcs = gsw1xx_phylink_mac_select_pcs, + .phylink_get_caps = &gsw1xx_phylink_get_caps, + .supports_2500m = true, + .pce_microcode = &gsw1xx_pce_microcode, + .pce_microcode_size = ARRAY_SIZE(gsw1xx_pce_microcode), + .tag_protocol = DSA_TAG_PROTO_MXL_GSW1XX, +}; + +static const struct gswip_hw_info gsw141_data = { + .max_ports = GSW1XX_PORTS, + .allowed_cpu_ports = BIT(GSW1XX_MII_PORT) | BIT(GSW1XX_SGMII_PORT), + .mii_ports = BIT(GSW1XX_MII_PORT), + .mii_port_reg_offset = -GSW1XX_MII_PORT, + .mac_select_pcs = gsw1xx_phylink_mac_select_pcs, + .phylink_get_caps = gsw1xx_phylink_get_caps, + .pce_microcode = &gsw1xx_pce_microcode, + .pce_microcode_size = ARRAY_SIZE(gsw1xx_pce_microcode), + .tag_protocol = DSA_TAG_PROTO_MXL_GSW1XX, +}; + +/* + * GSW125 is the industrial temperature version of GSW120. + * GSW145 is the industrial temperature version of GSW140. + */ +static const struct of_device_id gsw1xx_of_match[] = { + { .compatible = "maxlinear,gsw120", .data = &gsw12x_data }, + { .compatible = "maxlinear,gsw125", .data = &gsw12x_data }, + { .compatible = "maxlinear,gsw140", .data = &gsw140_data }, + { .compatible = "maxlinear,gsw141", .data = &gsw141_data }, + { .compatible = "maxlinear,gsw145", .data = &gsw140_data }, + { /* sentinel */ }, +}; + +MODULE_DEVICE_TABLE(of, gsw1xx_of_match); + +static struct mdio_driver gsw1xx_driver = { + .probe = gsw1xx_probe, + .remove = gsw1xx_remove, + .shutdown = gsw1xx_shutdown, + .mdiodrv.driver = { + .name = "mxl-gsw1xx", + .of_match_table = gsw1xx_of_match, + }, +}; + +mdio_module_driver(gsw1xx_driver); + +MODULE_AUTHOR("Daniel Golle "); +MODULE_DESCRIPTION("Driver for MaxLinear GSW1xx ethernet switch"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/dsa/lantiq/mxl-gsw1xx.h b/drivers/net/dsa/lantiq/mxl-gsw1xx.h new file mode 100644 index 0000000000000..38e03c048a26c --- /dev/null +++ b/drivers/net/dsa/lantiq/mxl-gsw1xx.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Register definitions for MaxLinear GSW1xx series switches + * + * Copyright (C) 2025 Daniel Golle + * Copyright (C) 2023 - 2024 MaxLinear Inc. + */ +#ifndef __MXL_GSW1XX_H +#define __MXL_GSW1XX_H + +#include + +#define GSW1XX_PORTS 6 +/* Port used for RGMII or optional RMII */ +#define GSW1XX_MII_PORT 5 +/* Port used for SGMII */ +#define GSW1XX_SGMII_PORT 4 + +#define GSW1XX_SYS_CLK_FREQ 340000000 + +/* SMDIO switch register base address */ +#define GSW1XX_SMDIO_BADR 0x1f +#define GSW1XX_SMDIO_BADR_UNKNOWN -1 + +/* GSW1XX SGMII PCS */ +#define GSW1XX_SGMII_BASE 0xd000 +#define GSW1XX_SGMII_PHY_HWBU_CTRL 0x009 +#define GSW1XX_SGMII_PHY_HWBU_CTRL_EN_HWBU_FSM BIT(0) +#define GSW1XX_SGMII_PHY_HWBU_CTRL_HW_FSM_EN BIT(3) +#define GSW1XX_SGMII_TBI_TXANEGH 0x300 +#define GSW1XX_SGMII_TBI_TXANEGL 0x301 +#define GSW1XX_SGMII_TBI_ANEGCTL 0x304 +#define GSW1XX_SGMII_TBI_ANEGCTL_LT GENMASK(1, 0) +#define GSW1XX_SGMII_TBI_ANEGCTL_LT_10US 0 +#define GSW1XX_SGMII_TBI_ANEGCTL_LT_1_6MS 1 +#define GSW1XX_SGMII_TBI_ANEGCTL_LT_5MS 2 +#define GSW1XX_SGMII_TBI_ANEGCTL_LT_10MS 3 +#define GSW1XX_SGMII_TBI_ANEGCTL_ANEGEN BIT(2) +#define GSW1XX_SGMII_TBI_ANEGCTL_RANEG BIT(3) +#define GSW1XX_SGMII_TBI_ANEGCTL_OVRABL BIT(4) +#define GSW1XX_SGMII_TBI_ANEGCTL_OVRANEG BIT(5) +#define GSW1XX_SGMII_TBI_ANEGCTL_ANMODE GENMASK(7, 6) +#define GSW1XX_SGMII_TBI_ANEGCTL_ANMODE_1000BASEX 1 +#define GSW1XX_SGMII_TBI_ANEGCTL_ANMODE_SGMII_PHY 2 +#define GSW1XX_SGMII_TBI_ANEGCTL_ANMODE_SGMII_MAC 3 +#define GSW1XX_SGMII_TBI_ANEGCTL_BCOMP BIT(15) + +#define GSW1XX_SGMII_TBI_TBICTL 0x305 +#define GSW1XX_SGMII_TBI_TBICTL_INITTBI BIT(0) +#define GSW1XX_SGMII_TBI_TBICTL_ENTBI BIT(1) +#define GSW1XX_SGMII_TBI_TBICTL_CRSTRR BIT(4) +#define GSW1XX_SGMII_TBI_TBICTL_CRSOFF BIT(5) +#define GSW1XX_SGMII_TBI_TBISTAT 0x309 +#define GSW1XX_SGMII_TBI_TBISTAT_LINK BIT(0) +#define GSW1XX_SGMII_TBI_TBISTAT_AN_COMPLETE BIT(1) +#define GSW1XX_SGMII_TBI_LPSTAT 0x30a +#define GSW1XX_SGMII_TBI_LPSTAT_DUPLEX BIT(0) +#define GSW1XX_SGMII_TBI_LPSTAT_PAUSE_RX BIT(1) +#define GSW1XX_SGMII_TBI_LPSTAT_PAUSE_TX BIT(2) +#define GSW1XX_SGMII_TBI_LPSTAT_SPEED GENMASK(6, 5) +#define GSW1XX_SGMII_TBI_LPSTAT_SPEED_10 0 +#define GSW1XX_SGMII_TBI_LPSTAT_SPEED_100 1 +#define GSW1XX_SGMII_TBI_LPSTAT_SPEED_1000 2 +#define GSW1XX_SGMII_TBI_LPSTAT_SPEED_NOSGMII 3 +#define GSW1XX_SGMII_PHY_D 0x100 +#define GSW1XX_SGMII_PHY_A 0x101 +#define GSW1XX_SGMII_PHY_C 0x102 +#define GSW1XX_SGMII_PHY_STATUS BIT(0) +#define GSW1XX_SGMII_PHY_READ BIT(4) +#define GSW1XX_SGMII_PHY_WRITE BIT(8) +#define GSW1XX_SGMII_PHY_RESET_N BIT(12) +#define GSW1XX_SGMII_PCS_RXB_CTL 0x401 +#define GSW1XX_SGMII_PCS_RXB_CTL_INIT_RX_RXB BIT(1) +#define GSW1XX_SGMII_PCS_TXB_CTL 0x404 +#define GSW1XX_SGMII_PCS_TXB_CTL_INIT_TX_TXB BIT(1) + +#define GSW1XX_SGMII_PHY_RX0_CFG2 0x004 +#define GSW1XX_SGMII_PHY_RX0_CFG2_EQ GENMASK(2, 0) +#define GSW1XX_SGMII_PHY_RX0_CFG2_EQ_DEF 2 +#define GSW1XX_SGMII_PHY_RX0_CFG2_INVERT BIT(3) +#define GSW1XX_SGMII_PHY_RX0_CFG2_LOS_EN BIT(4) +#define GSW1XX_SGMII_PHY_RX0_CFG2_TERM_EN BIT(5) +#define GSW1XX_SGMII_PHY_RX0_CFG2_FILT_CNT GENMASK(12, 6) +#define GSW1XX_SGMII_PHY_RX0_CFG2_FILT_CNT_DEF 20 + +#define GSW1XX_SGMII_PHY_TX0_CFG3 0x007 +#define GSW1XX_SGMII_PHY_TX0_CFG3_VBOOST_EN BIT(12) +#define GSW1XX_SGMII_PHY_TX0_CFG3_VBOOST_LEVEL GENMASK(11, 9) +#define GSW1XX_SGMII_PHY_TX0_CFG3_VBOOST_LEVEL_DEF 4 +#define GSW1XX_SGMII_PHY_TX0_CFG3_INVERT BIT(8) + +/* GSW1XX PDI Registers */ +#define GSW1XX_SWITCH_BASE 0xe000 + +/* GSW1XX MII Registers */ +#define GSW1XX_RGMII_BASE 0xf100 + +/* GSW1XX GPIO Registers */ +#define GSW1XX_GPIO_BASE 0xf300 +#define GPIO_ALTSEL0 0x83 +#define GPIO_ALTSEL0_EXTPHY_MUX_VAL 0x03c3 +#define GPIO_ALTSEL1 0x84 +#define GPIO_ALTSEL1_EXTPHY_MUX_VAL 0x003f + +/* MDIO bus controller */ +#define GSW1XX_MMDIO_BASE 0xf400 + +/* generic IC registers */ +#define GSW1XX_SHELL_BASE 0xfa00 +#define GSW1XX_SHELL_RST_REQ 0x01 +#define GSW1XX_RST_REQ_SGMII_SHELL BIT(5) +/* RGMII PAD Slew Control Register */ +#define GSW1XX_SHELL_RGMII_SLEW_CFG 0x78 +#define RGMII_SLEW_CFG_RX_2_5_V BIT(4) +#define RGMII_SLEW_CFG_TX_2_5_V BIT(5) + +/* SGMII clock related settings */ +#define GSW1XX_CLK_BASE 0xf900 +#define GSW1XX_CLK_NCO_CTRL 0x68 +#define GSW1XX_SGMII_HSP_MASK GENMASK(3, 2) +#define GSW1XX_SGMII_SEL BIT(1) +#define GSW1XX_SGMII_1G 0x0 +#define GSW1XX_SGMII_2G5 0xc +#define GSW1XX_SGMII_1G_NCO1 0x0 +#define GSW1XX_SGMII_2G5_NCO2 0x2 + +#endif /* __MXL_GSW1XX_H */ diff --git a/drivers/net/dsa/lantiq/mxl-gsw1xx_pce.h b/drivers/net/dsa/lantiq/mxl-gsw1xx_pce.h new file mode 100644 index 0000000000000..eefcd411a3403 --- /dev/null +++ b/drivers/net/dsa/lantiq/mxl-gsw1xx_pce.h @@ -0,0 +1,154 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * PCE microcode code update for driver for MaxLinear GSW1xx switch chips + * + * Copyright (C) 2023 - 2024 MaxLinear Inc. + * Copyright (C) 2022 Snap One, LLC. All rights reserved. + * Copyright (C) 2017 - 2019 Hauke Mehrtens + * Copyright (C) 2012 John Crispin + * Copyright (C) 2010 Lantiq Deutschland + */ + +#include "lantiq_gswip.h" + +#define INSTR 0 +#define IPV6 1 +#define LENACCU 2 + +/* GSWIP_2.X */ +enum { + OUT_MAC0 = 0, + OUT_MAC1, + OUT_MAC2, + OUT_MAC3, + OUT_MAC4, + OUT_MAC5, + OUT_ETHTYP, + OUT_VTAG0, + OUT_VTAG1, + OUT_ITAG0, + OUT_ITAG1, /* 10 */ + OUT_ITAG2, + OUT_ITAG3, + OUT_IP0, + OUT_IP1, + OUT_IP2, + OUT_IP3, + OUT_SIP0, + OUT_SIP1, + OUT_SIP2, + OUT_SIP3, /* 20 */ + OUT_SIP4, + OUT_SIP5, + OUT_SIP6, + OUT_SIP7, + OUT_DIP0, + OUT_DIP1, + OUT_DIP2, + OUT_DIP3, + OUT_DIP4, + OUT_DIP5, /* 30 */ + OUT_DIP6, + OUT_DIP7, + OUT_SESID, + OUT_PROT, + OUT_APP0, + OUT_APP1, + OUT_IGMP0, + OUT_IGMP1, + OUT_STAG0 = 61, + OUT_STAG1 = 62, + OUT_NONE = 63, +}; + +/* parser's microcode flag type */ +enum { + FLAG_ITAG = 0, + FLAG_VLAN, + FLAG_SNAP, + FLAG_PPPOE, + FLAG_IPV6, + FLAG_IPV6FL, + FLAG_IPV4, + FLAG_IGMP, + FLAG_TU, + FLAG_HOP, + FLAG_NN1, /* 10 */ + FLAG_NN2, + FLAG_END, + FLAG_NO, /* 13 */ + FLAG_SVLAN, /* 14 */ +}; + +#define PCE_MC_M(val, msk, ns, out, len, type, flags, ipv4_len) \ + { (val), (msk), ((ns) << 10 | (out) << 4 | (len) >> 1),\ + ((len) & 1) << 15 | (type) << 13 | (flags) << 9 | (ipv4_len) << 8 } + +/* V22_2X (IPv6 issue fixed) */ +static const struct gswip_pce_microcode gsw1xx_pce_microcode[] = { + /* value mask ns fields L type flags ipv4_len */ + PCE_MC_M(0x88c3, 0xFFFF, 1, OUT_ITAG0, 4, INSTR, FLAG_ITAG, 0), + PCE_MC_M(0x8100, 0xFFFF, 4, OUT_STAG0, 2, INSTR, FLAG_SVLAN, 0), + PCE_MC_M(0x88A8, 0xFFFF, 4, OUT_STAG0, 2, INSTR, FLAG_SVLAN, 0), + PCE_MC_M(0x9100, 0xFFFF, 4, OUT_STAG0, 2, INSTR, FLAG_SVLAN, 0), + PCE_MC_M(0x8100, 0xFFFF, 5, OUT_VTAG0, 2, INSTR, FLAG_VLAN, 0), + PCE_MC_M(0x88A8, 0xFFFF, 6, OUT_VTAG0, 2, INSTR, FLAG_VLAN, 0), + PCE_MC_M(0x9100, 0xFFFF, 4, OUT_VTAG0, 2, INSTR, FLAG_VLAN, 0), + PCE_MC_M(0x8864, 0xFFFF, 20, OUT_ETHTYP, 1, INSTR, FLAG_NO, 0), + PCE_MC_M(0x0800, 0xFFFF, 24, OUT_ETHTYP, 1, INSTR, FLAG_NO, 0), + PCE_MC_M(0x86DD, 0xFFFF, 25, OUT_ETHTYP, 1, INSTR, FLAG_NO, 0), + PCE_MC_M(0x8863, 0xFFFF, 19, OUT_ETHTYP, 1, INSTR, FLAG_NO, 0), + PCE_MC_M(0x0000, 0xF800, 13, OUT_NONE, 0, INSTR, FLAG_NO, 0), + PCE_MC_M(0x0000, 0x0000, 44, OUT_ETHTYP, 1, INSTR, FLAG_NO, 0), + PCE_MC_M(0x0600, 0x0600, 44, OUT_ETHTYP, 1, INSTR, FLAG_NO, 0), + PCE_MC_M(0x0000, 0x0000, 15, OUT_NONE, 1, INSTR, FLAG_NO, 0), + PCE_MC_M(0xAAAA, 0xFFFF, 17, OUT_NONE, 1, INSTR, FLAG_NO, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_NONE, 0, INSTR, FLAG_NO, 0), + PCE_MC_M(0x0300, 0xFF00, 45, OUT_NONE, 0, INSTR, FLAG_SNAP, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_NONE, 0, INSTR, FLAG_NO, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_DIP7, 3, INSTR, FLAG_NO, 0), + PCE_MC_M(0x0000, 0x0000, 21, OUT_DIP7, 3, INSTR, FLAG_PPPOE, 0), + PCE_MC_M(0x0021, 0xFFFF, 24, OUT_NONE, 1, INSTR, FLAG_NO, 0), + PCE_MC_M(0x0057, 0xFFFF, 25, OUT_NONE, 1, INSTR, FLAG_NO, 0), + PCE_MC_M(0x0000, 0x0000, 44, OUT_NONE, 0, INSTR, FLAG_NO, 0), + PCE_MC_M(0x4000, 0xF000, 27, OUT_IP0, 4, INSTR, FLAG_IPV4, 1), + PCE_MC_M(0x6000, 0xF000, 30, OUT_IP0, 3, INSTR, FLAG_IPV6, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_NONE, 0, INSTR, FLAG_NO, 0), + PCE_MC_M(0x0000, 0x0000, 28, OUT_IP3, 2, INSTR, FLAG_NO, 0), + PCE_MC_M(0x0000, 0x0000, 29, OUT_SIP0, 4, INSTR, FLAG_NO, 0), + PCE_MC_M(0x0000, 0x0000, 44, OUT_NONE, 0, LENACCU, FLAG_NO, 0), + PCE_MC_M(0x1100, 0xFF00, 43, OUT_PROT, 1, INSTR, FLAG_NO, 0), + PCE_MC_M(0x0600, 0xFF00, 43, OUT_PROT, 1, INSTR, FLAG_NO, 0), + PCE_MC_M(0x0000, 0xFF00, 36, OUT_IP3, 17, INSTR, FLAG_HOP, 0), + PCE_MC_M(0x2B00, 0xFF00, 36, OUT_IP3, 17, INSTR, FLAG_NN1, 0), + PCE_MC_M(0x3C00, 0xFF00, 36, OUT_IP3, 17, INSTR, FLAG_NN2, 0), + PCE_MC_M(0x0000, 0x0000, 43, OUT_PROT, 1, INSTR, FLAG_NO, 0), + PCE_MC_M(0x0000, 0x00F0, 38, OUT_NONE, 0, INSTR, FLAG_NO, 0), + PCE_MC_M(0x0000, 0x0000, 44, OUT_NONE, 0, INSTR, FLAG_NO, 0), + PCE_MC_M(0x0000, 0xFF00, 36, OUT_NONE, 0, IPV6, FLAG_HOP, 0), + PCE_MC_M(0x2B00, 0xFF00, 36, OUT_NONE, 0, IPV6, FLAG_NN1, 0), + PCE_MC_M(0x3C00, 0xFF00, 36, OUT_NONE, 0, IPV6, FLAG_NN2, 0), + PCE_MC_M(0x0000, 0x00FC, 44, OUT_PROT, 0, IPV6, FLAG_NO, 0), + PCE_MC_M(0x0000, 0x0000, 44, OUT_NONE, 0, IPV6, FLAG_NO, 0), + PCE_MC_M(0x0000, 0x0000, 44, OUT_SIP0, 16, INSTR, FLAG_NO, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_APP0, 4, INSTR, FLAG_IGMP, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_NONE, 0, INSTR, FLAG_END, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_NONE, 0, INSTR, FLAG_END, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_NONE, 0, INSTR, FLAG_END, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_NONE, 0, INSTR, FLAG_END, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_NONE, 0, INSTR, FLAG_END, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_NONE, 0, INSTR, FLAG_END, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_NONE, 0, INSTR, FLAG_END, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_NONE, 0, INSTR, FLAG_END, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_NONE, 0, INSTR, FLAG_END, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_NONE, 0, INSTR, FLAG_END, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_NONE, 0, INSTR, FLAG_END, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_NONE, 0, INSTR, FLAG_END, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_NONE, 0, INSTR, FLAG_END, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_NONE, 0, INSTR, FLAG_END, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_NONE, 0, INSTR, FLAG_END, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_NONE, 0, INSTR, FLAG_END, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_NONE, 0, INSTR, FLAG_END, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_NONE, 0, INSTR, FLAG_END, 0), + PCE_MC_M(0x0000, 0x0000, 45, OUT_NONE, 0, INSTR, FLAG_END, 0), +}; From 12ed3e5a03a8a5150977c2fa7e4dd739320592bc Mon Sep 17 00:00:00 2001 From: Aleksandr Loktionov Date: Thu, 30 Oct 2025 14:59:45 +0100 Subject: [PATCH 626/867] ice: add flow parsing for GTP and new protocol field support Introduce new protocol header types and field sizes to support GTPU, GTPC tunneling protocols. - Add field size macros for GTP TEID, QFI, and other headers - Extend ice_flow_field_info and enum definitions - Update hash macros for new protocols - Add support for IPv6 prefix matching and fragment headers This patch lays the groundwork for enhanced RSS and flow classification capabilities. Co-developed-by: Dan Nowlin Signed-off-by: Dan Nowlin Co-developed-by: Junfeng Guo Signed-off-by: Junfeng Guo Co-developed-by: Ting Xu Signed-off-by: Ting Xu Signed-off-by: Przemek Kitszel Reviewed-by: Simon Horman Signed-off-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_flow.c | 217 +++++++++++++++++- drivers/net/ethernet/intel/ice/ice_flow.h | 94 +++++++- .../ethernet/intel/ice/ice_protocol_type.h | 20 ++ 3 files changed, 322 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c index 6d5c939dc8a51..a2f2a612428d5 100644 --- a/drivers/net/ethernet/intel/ice/ice_flow.c +++ b/drivers/net/ethernet/intel/ice/ice_flow.c @@ -5,6 +5,38 @@ #include "ice_flow.h" #include +/* Size of known protocol header fields */ +#define ICE_FLOW_FLD_SZ_ETH_TYPE 2 +#define ICE_FLOW_FLD_SZ_VLAN 2 +#define ICE_FLOW_FLD_SZ_IPV4_ADDR 4 +#define ICE_FLOW_FLD_SZ_IPV6_ADDR 16 +#define ICE_FLOW_FLD_SZ_IPV6_PRE32_ADDR 4 +#define ICE_FLOW_FLD_SZ_IPV6_PRE48_ADDR 6 +#define ICE_FLOW_FLD_SZ_IPV6_PRE64_ADDR 8 +#define ICE_FLOW_FLD_SZ_IPV4_ID 2 +#define ICE_FLOW_FLD_SZ_IPV6_ID 4 +#define ICE_FLOW_FLD_SZ_IP_CHKSUM 2 +#define ICE_FLOW_FLD_SZ_TCP_CHKSUM 2 +#define ICE_FLOW_FLD_SZ_UDP_CHKSUM 2 +#define ICE_FLOW_FLD_SZ_SCTP_CHKSUM 4 +#define ICE_FLOW_FLD_SZ_IP_DSCP 1 +#define ICE_FLOW_FLD_SZ_IP_TTL 1 +#define ICE_FLOW_FLD_SZ_IP_PROT 1 +#define ICE_FLOW_FLD_SZ_PORT 2 +#define ICE_FLOW_FLD_SZ_TCP_FLAGS 1 +#define ICE_FLOW_FLD_SZ_ICMP_TYPE 1 +#define ICE_FLOW_FLD_SZ_ICMP_CODE 1 +#define ICE_FLOW_FLD_SZ_ARP_OPER 2 +#define ICE_FLOW_FLD_SZ_GRE_KEYID 4 +#define ICE_FLOW_FLD_SZ_GTP_TEID 4 +#define ICE_FLOW_FLD_SZ_GTP_QFI 2 +#define ICE_FLOW_FLD_SZ_PFCP_SEID 8 +#define ICE_FLOW_FLD_SZ_ESP_SPI 4 +#define ICE_FLOW_FLD_SZ_AH_SPI 4 +#define ICE_FLOW_FLD_SZ_NAT_T_ESP_SPI 4 +#define ICE_FLOW_FLD_SZ_L2TPV2_SESS_ID 2 +#define ICE_FLOW_FLD_SZ_L2TPV2_LEN_SESS_ID 2 + /* Describe properties of a protocol header field */ struct ice_flow_field_info { enum ice_flow_seg_hdr hdr; @@ -20,6 +52,7 @@ struct ice_flow_field_info { .mask = 0, \ } +/* QFI: 6-bit field in GTP-U PDU Session Container (3GPP TS 38.415) */ #define ICE_FLOW_FLD_INFO_MSK(_hdr, _offset_bytes, _size_bytes, _mask) { \ .hdr = _hdr, \ .off = (_offset_bytes) * BITS_PER_BYTE, \ @@ -61,7 +94,33 @@ struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = { /* ICE_FLOW_FIELD_IDX_IPV6_SA */ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 8, sizeof(struct in6_addr)), /* ICE_FLOW_FIELD_IDX_IPV6_DA */ - ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 24, sizeof(struct in6_addr)), + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 24, ICE_FLOW_FLD_SZ_IPV6_ADDR), + /* ICE_FLOW_FIELD_IDX_IPV4_CHKSUM */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV4, 10, ICE_FLOW_FLD_SZ_IP_CHKSUM), + /* ICE_FLOW_FIELD_IDX_IPV4_FRAG */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV_FRAG, 4, + ICE_FLOW_FLD_SZ_IPV4_ID), + /* ICE_FLOW_FIELD_IDX_IPV6_FRAG */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV_FRAG, 4, + ICE_FLOW_FLD_SZ_IPV6_ID), + /* ICE_FLOW_FIELD_IDX_IPV6_PRE32_SA */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 8, + ICE_FLOW_FLD_SZ_IPV6_PRE32_ADDR), + /* ICE_FLOW_FIELD_IDX_IPV6_PRE32_DA */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 24, + ICE_FLOW_FLD_SZ_IPV6_PRE32_ADDR), + /* ICE_FLOW_FIELD_IDX_IPV6_PRE48_SA */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 8, + ICE_FLOW_FLD_SZ_IPV6_PRE48_ADDR), + /* ICE_FLOW_FIELD_IDX_IPV6_PRE48_DA */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 24, + ICE_FLOW_FLD_SZ_IPV6_PRE48_ADDR), + /* ICE_FLOW_FIELD_IDX_IPV6_PRE64_SA */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 8, + ICE_FLOW_FLD_SZ_IPV6_PRE64_ADDR), + /* ICE_FLOW_FIELD_IDX_IPV6_PRE64_DA */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 24, + ICE_FLOW_FLD_SZ_IPV6_PRE64_ADDR), /* Transport */ /* ICE_FLOW_FIELD_IDX_TCP_SRC_PORT */ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_TCP, 0, sizeof(__be16)), @@ -76,7 +135,14 @@ struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = { /* ICE_FLOW_FIELD_IDX_SCTP_DST_PORT */ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 2, sizeof(__be16)), /* ICE_FLOW_FIELD_IDX_TCP_FLAGS */ - ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_TCP, 13, 1), + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_TCP, 13, ICE_FLOW_FLD_SZ_TCP_FLAGS), + /* ICE_FLOW_FIELD_IDX_TCP_CHKSUM */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_TCP, 16, ICE_FLOW_FLD_SZ_TCP_CHKSUM), + /* ICE_FLOW_FIELD_IDX_UDP_CHKSUM */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_UDP, 6, ICE_FLOW_FLD_SZ_UDP_CHKSUM), + /* ICE_FLOW_FIELD_IDX_SCTP_CHKSUM */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 8, + ICE_FLOW_FLD_SZ_SCTP_CHKSUM), /* ARP */ /* ICE_FLOW_FIELD_IDX_ARP_SIP */ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 14, sizeof(struct in_addr)), @@ -108,9 +174,17 @@ struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = { ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_GTPU_EH, 22, sizeof(__be16), 0x3f00), /* ICE_FLOW_FIELD_IDX_GTPU_UP_TEID */ - ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_UP, 12, sizeof(__be32)), + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_UP, 12, + ICE_FLOW_FLD_SZ_GTP_TEID), + /* ICE_FLOW_FIELD_IDX_GTPU_UP_QFI */ + ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_GTPU_UP, 22, + ICE_FLOW_FLD_SZ_GTP_QFI, 0x3f00), /* ICE_FLOW_FIELD_IDX_GTPU_DWN_TEID */ - ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_DWN, 12, sizeof(__be32)), + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_DWN, 12, + ICE_FLOW_FLD_SZ_GTP_TEID), + /* ICE_FLOW_FIELD_IDX_GTPU_DWN_QFI */ + ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_GTPU_DWN, 22, + ICE_FLOW_FLD_SZ_GTP_QFI, 0x3f00), /* PPPoE */ /* ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID */ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_PPPOE, 2, sizeof(__be16)), @@ -128,7 +202,16 @@ struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = { ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_AH, 4, sizeof(__be32)), /* NAT_T_ESP */ /* ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI */ - ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_NAT_T_ESP, 8, sizeof(__be32)), + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_NAT_T_ESP, 8, + ICE_FLOW_FLD_SZ_NAT_T_ESP_SPI), + /* L2TPV2 */ + /* ICE_FLOW_FIELD_IDX_L2TPV2_SESS_ID */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_L2TPV2, 12, + ICE_FLOW_FLD_SZ_L2TPV2_SESS_ID), + /* L2TPV2_LEN */ + /* ICE_FLOW_FIELD_IDX_L2TPV2_LEN_SESS_ID */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_L2TPV2, 14, + ICE_FLOW_FLD_SZ_L2TPV2_LEN_SESS_ID), }; /* Bitmaps indicating relevant packet types for a particular protocol header @@ -2324,6 +2407,130 @@ static void ice_rss_set_symm(struct ice_hw *hw, struct ice_flow_prof *prof) } } +/** + * ice_rss_cfg_raw_symm - Configure symmetric RSS for a raw parser profile + * @hw: device HW + * @prof: parser profile describing extracted FV (field vector) entries + * @prof_id: RSS profile identifier used to program symmetry registers + * + * The routine scans the parser profile's FV entries and looks for + * direction-sensitive pairs (L3 src/dst, L4 src/dst). When a pair is found, + * it programs XOR-based symmetry so that flows hash identically regardless + * of packet direction. This preserves CPU affinity for the same 5-tuple. + * + * Notes: + * - The size of each logical field (IPv4/IPv6 address, L4 port) is expressed + * in units of ICE_FLOW_FV_EXTRACT_SZ so we can step across fv[] correctly. + * - We guard against out-of-bounds access before looking at fv[i + len]. + */ +static void ice_rss_cfg_raw_symm(struct ice_hw *hw, + const struct ice_parser_profile *prof, + u64 prof_id) +{ + for (size_t i = 0; i < prof->fv_num; i++) { + u8 proto_id = prof->fv[i].proto_id; + u16 src_off = 0, dst_off = 0; + size_t src_idx, dst_idx; + bool is_matched = false; + unsigned int len = 0; + + switch (proto_id) { + /* IPv4 address pairs (outer/inner variants) */ + case ICE_PROT_IPV4_OF_OR_S: + case ICE_PROT_IPV4_IL: + case ICE_PROT_IPV4_IL_IL: + len = ICE_FLOW_FLD_SZ_IPV4_ADDR / + ICE_FLOW_FV_EXTRACT_SZ; + src_off = ICE_FLOW_FIELD_IPV4_SRC_OFFSET; + dst_off = ICE_FLOW_FIELD_IPV4_DST_OFFSET; + break; + + /* IPv6 address pairs (outer/inner variants) */ + case ICE_PROT_IPV6_OF_OR_S: + case ICE_PROT_IPV6_IL: + case ICE_PROT_IPV6_IL_IL: + len = ICE_FLOW_FLD_SZ_IPV6_ADDR / + ICE_FLOW_FV_EXTRACT_SZ; + src_off = ICE_FLOW_FIELD_IPV6_SRC_OFFSET; + dst_off = ICE_FLOW_FIELD_IPV6_DST_OFFSET; + break; + + /* L4 port pairs (TCP/UDP/SCTP) */ + case ICE_PROT_TCP_IL: + case ICE_PROT_UDP_IL_OR_S: + case ICE_PROT_SCTP_IL: + len = ICE_FLOW_FLD_SZ_PORT / ICE_FLOW_FV_EXTRACT_SZ; + src_off = ICE_FLOW_FIELD_SRC_PORT_OFFSET; + dst_off = ICE_FLOW_FIELD_DST_PORT_OFFSET; + break; + + default: + continue; + } + + /* Bounds check before accessing fv[i + len]. */ + if (i + len >= prof->fv_num) + continue; + + /* Verify src/dst pairing for this protocol id. */ + is_matched = prof->fv[i].offset == src_off && + prof->fv[i + len].proto_id == proto_id && + prof->fv[i + len].offset == dst_off; + if (!is_matched) + continue; + + /* Program XOR symmetry for this field pair. */ + src_idx = i; + dst_idx = i + len; + + ice_rss_config_xor(hw, prof_id, src_idx, dst_idx, len); + + /* Skip over the pair we just handled; the loop's ++i advances + * one more element, hence the --i after the jump. + */ + i += (2 * len); + /* not strictly needed; keeps static analyzers happy */ + if (i == 0) + break; + --i; + } +} + +/* Max registers index per packet profile */ +#define ICE_SYMM_REG_INDEX_MAX 6 + +/** + * ice_rss_update_raw_symm - update symmetric hash configuration + * for raw pattern + * @hw: pointer to the hardware structure + * @cfg: configure parameters for raw pattern + * @id: profile tracking ID + * + * Update symmetric hash configuration for raw pattern if required. + * Otherwise only clear to default. + */ +void +ice_rss_update_raw_symm(struct ice_hw *hw, + struct ice_rss_raw_cfg *cfg, u64 id) +{ + struct ice_prof_map *map; + u8 prof_id, m; + + mutex_lock(&hw->blk[ICE_BLK_RSS].es.prof_map_lock); + map = ice_search_prof_id(hw, ICE_BLK_RSS, id); + if (map) + prof_id = map->prof_id; + mutex_unlock(&hw->blk[ICE_BLK_RSS].es.prof_map_lock); + if (!map) + return; + /* clear to default */ + for (m = 0; m < ICE_SYMM_REG_INDEX_MAX; m++) + wr32(hw, GLQF_HSYMM(prof_id, m), 0); + + if (cfg->symm) + ice_rss_cfg_raw_symm(hw, &cfg->prof, prof_id); +} + /** * ice_add_rss_cfg_sync - add an RSS configuration * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h index 52f906d89eca1..6c6cdc8addb12 100644 --- a/drivers/net/ethernet/intel/ice/ice_flow.h +++ b/drivers/net/ethernet/intel/ice/ice_flow.h @@ -22,6 +22,15 @@ #define ICE_FLOW_HASH_IPV6 \ (BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA) | \ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA)) +#define ICE_FLOW_HASH_IPV6_PRE32 \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PRE32_SA) | \ + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PRE32_DA)) +#define ICE_FLOW_HASH_IPV6_PRE48 \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PRE48_SA) | \ + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PRE48_DA)) +#define ICE_FLOW_HASH_IPV6_PRE64 \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PRE64_SA) | \ + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PRE64_DA)) #define ICE_FLOW_HASH_TCP_PORT \ (BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT) | \ BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT)) @@ -40,6 +49,33 @@ #define ICE_HASH_SCTP_IPV4 (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_SCTP_PORT) #define ICE_HASH_SCTP_IPV6 (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_SCTP_PORT) +#define ICE_HASH_TCP_IPV6_PRE32 \ + (ICE_FLOW_HASH_IPV6_PRE32 | ICE_FLOW_HASH_TCP_PORT) +#define ICE_HASH_UDP_IPV6_PRE32 \ + (ICE_FLOW_HASH_IPV6_PRE32 | ICE_FLOW_HASH_UDP_PORT) +#define ICE_HASH_SCTP_IPV6_PRE32 \ + (ICE_FLOW_HASH_IPV6_PRE32 | ICE_FLOW_HASH_SCTP_PORT) +#define ICE_HASH_TCP_IPV6_PRE48 \ + (ICE_FLOW_HASH_IPV6_PRE48 | ICE_FLOW_HASH_TCP_PORT) +#define ICE_HASH_UDP_IPV6_PRE48 \ + (ICE_FLOW_HASH_IPV6_PRE48 | ICE_FLOW_HASH_UDP_PORT) +#define ICE_HASH_SCTP_IPV6_PRE48 \ + (ICE_FLOW_HASH_IPV6_PRE48 | ICE_FLOW_HASH_SCTP_PORT) +#define ICE_HASH_TCP_IPV6_PRE64 \ + (ICE_FLOW_HASH_IPV6_PRE64 | ICE_FLOW_HASH_TCP_PORT) +#define ICE_HASH_UDP_IPV6_PRE64 \ + (ICE_FLOW_HASH_IPV6_PRE64 | ICE_FLOW_HASH_UDP_PORT) +#define ICE_HASH_SCTP_IPV6_PRE64 \ + (ICE_FLOW_HASH_IPV6_PRE64 | ICE_FLOW_HASH_SCTP_PORT) + +#define ICE_FLOW_HASH_GTP_TEID \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_GTPC_TEID)) + +#define ICE_FLOW_HASH_GTP_IPV4_TEID \ + (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_GTP_TEID) +#define ICE_FLOW_HASH_GTP_IPV6_TEID \ + (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_GTP_TEID) + #define ICE_FLOW_HASH_GTP_C_TEID \ (BIT_ULL(ICE_FLOW_FIELD_IDX_GTPC_TEID)) @@ -128,6 +164,23 @@ #define ICE_FLOW_HASH_NAT_T_ESP_IPV6_SPI \ (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_NAT_T_ESP_SPI) +#define ICE_FLOW_HASH_L2TPV2_SESS_ID \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV2_SESS_ID)) +#define ICE_FLOW_HASH_L2TPV2_SESS_ID_ETH \ + (ICE_FLOW_HASH_ETH | ICE_FLOW_HASH_L2TPV2_SESS_ID) + +#define ICE_FLOW_HASH_L2TPV2_LEN_SESS_ID \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV2_LEN_SESS_ID)) +#define ICE_FLOW_HASH_L2TPV2_LEN_SESS_ID_ETH \ + (ICE_FLOW_HASH_ETH | ICE_FLOW_HASH_L2TPV2_LEN_SESS_ID) + +#define ICE_FLOW_FIELD_IPV4_SRC_OFFSET 12 +#define ICE_FLOW_FIELD_IPV4_DST_OFFSET 16 +#define ICE_FLOW_FIELD_IPV6_SRC_OFFSET 8 +#define ICE_FLOW_FIELD_IPV6_DST_OFFSET 24 +#define ICE_FLOW_FIELD_SRC_PORT_OFFSET 0 +#define ICE_FLOW_FIELD_DST_PORT_OFFSET 2 + /* Protocol header fields within a packet segment. A segment consists of one or * more protocol headers that make up a logical group of protocol headers. Each * logical group of protocol headers encapsulates or is encapsulated using/by @@ -160,10 +213,13 @@ enum ice_flow_seg_hdr { ICE_FLOW_SEG_HDR_AH = 0x00200000, ICE_FLOW_SEG_HDR_NAT_T_ESP = 0x00400000, ICE_FLOW_SEG_HDR_ETH_NON_IP = 0x00800000, + ICE_FLOW_SEG_HDR_GTPU_NON_IP = 0x01000000, + ICE_FLOW_SEG_HDR_L2TPV2 = 0x10000000, /* The following is an additive bit for ICE_FLOW_SEG_HDR_IPV4 and - * ICE_FLOW_SEG_HDR_IPV6 which include the IPV4 other PTYPEs + * ICE_FLOW_SEG_HDR_IPV6. */ - ICE_FLOW_SEG_HDR_IPV_OTHER = 0x20000000, + ICE_FLOW_SEG_HDR_IPV_FRAG = 0x40000000, + ICE_FLOW_SEG_HDR_IPV_OTHER = 0x80000000, }; /* These segments all have the same PTYPES, but are otherwise distinguished by @@ -200,6 +256,15 @@ enum ice_flow_field { ICE_FLOW_FIELD_IDX_IPV4_DA, ICE_FLOW_FIELD_IDX_IPV6_SA, ICE_FLOW_FIELD_IDX_IPV6_DA, + ICE_FLOW_FIELD_IDX_IPV4_CHKSUM, + ICE_FLOW_FIELD_IDX_IPV4_ID, + ICE_FLOW_FIELD_IDX_IPV6_ID, + ICE_FLOW_FIELD_IDX_IPV6_PRE32_SA, + ICE_FLOW_FIELD_IDX_IPV6_PRE32_DA, + ICE_FLOW_FIELD_IDX_IPV6_PRE48_SA, + ICE_FLOW_FIELD_IDX_IPV6_PRE48_DA, + ICE_FLOW_FIELD_IDX_IPV6_PRE64_SA, + ICE_FLOW_FIELD_IDX_IPV6_PRE64_DA, /* L4 */ ICE_FLOW_FIELD_IDX_TCP_SRC_PORT, ICE_FLOW_FIELD_IDX_TCP_DST_PORT, @@ -208,6 +273,9 @@ enum ice_flow_field { ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT, ICE_FLOW_FIELD_IDX_SCTP_DST_PORT, ICE_FLOW_FIELD_IDX_TCP_FLAGS, + ICE_FLOW_FIELD_IDX_TCP_CHKSUM, + ICE_FLOW_FIELD_IDX_UDP_CHKSUM, + ICE_FLOW_FIELD_IDX_SCTP_CHKSUM, /* ARP */ ICE_FLOW_FIELD_IDX_ARP_SIP, ICE_FLOW_FIELD_IDX_ARP_DIP, @@ -228,13 +296,13 @@ enum ice_flow_field { ICE_FLOW_FIELD_IDX_GTPU_EH_QFI, /* GTPU_UP */ ICE_FLOW_FIELD_IDX_GTPU_UP_TEID, + ICE_FLOW_FIELD_IDX_GTPU_UP_QFI, /* GTPU_DWN */ ICE_FLOW_FIELD_IDX_GTPU_DWN_TEID, - /* PPPoE */ + ICE_FLOW_FIELD_IDX_GTPU_DWN_QFI, ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID, /* PFCP */ ICE_FLOW_FIELD_IDX_PFCP_SEID, - /* L2TPv3 */ ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID, /* ESP */ ICE_FLOW_FIELD_IDX_ESP_SPI, @@ -242,10 +310,16 @@ enum ice_flow_field { ICE_FLOW_FIELD_IDX_AH_SPI, /* NAT_T ESP */ ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI, + /* L2TPV2 SESSION ID*/ + ICE_FLOW_FIELD_IDX_L2TPV2_SESS_ID, + /* L2TPV2_LEN SESSION ID */ + ICE_FLOW_FIELD_IDX_L2TPV2_LEN_SESS_ID, /* The total number of enums must not exceed 64 */ ICE_FLOW_FIELD_IDX_MAX }; +static_assert(ICE_FLOW_FIELD_IDX_MAX <= 64, "The total number of enums must not exceed 64"); + #define ICE_FLOW_HASH_FLD_IPV4_SA BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) #define ICE_FLOW_HASH_FLD_IPV6_SA BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA) #define ICE_FLOW_HASH_FLD_IPV4_DA BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) @@ -296,6 +370,10 @@ enum ice_rss_cfg_hdr_type { /* take inner headers as inputset for packet with outer ipv6. */ ICE_RSS_INNER_HEADERS_W_OUTER_IPV6, /* take outer headers first then inner headers as inputset */ + /* take inner as inputset for GTPoGRE with outer IPv4 + GRE. */ + ICE_RSS_INNER_HEADERS_W_OUTER_IPV4_GRE, + /* take inner as inputset for GTPoGRE with outer IPv6 + GRE. */ + ICE_RSS_INNER_HEADERS_W_OUTER_IPV6_GRE, ICE_RSS_ANY_HEADERS }; @@ -406,6 +484,12 @@ struct ice_flow_prof { bool symm; /* Symmetric Hash for RSS */ }; +struct ice_rss_raw_cfg { + struct ice_parser_profile prof; + bool raw_ena; + bool symm; +}; + struct ice_rss_cfg { struct list_head l_entry; /* bitmap of VSIs added to the RSS entry */ @@ -444,4 +528,6 @@ int ice_add_rss_cfg(struct ice_hw *hw, struct ice_vsi *vsi, int ice_rem_rss_cfg(struct ice_hw *hw, u16 vsi_handle, const struct ice_rss_hash_cfg *cfg); u64 ice_get_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u32 hdrs, bool *symm); +void ice_rss_update_raw_symm(struct ice_hw *hw, + struct ice_rss_raw_cfg *cfg, u64 id); #endif /* _ICE_FLOW_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h index 7c09ea0f03ba6..725167d557a8a 100644 --- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h +++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h @@ -82,26 +82,46 @@ enum ice_sw_tunnel_type { enum ice_prot_id { ICE_PROT_ID_INVAL = 0, ICE_PROT_MAC_OF_OR_S = 1, + ICE_PROT_MAC_O2 = 2, ICE_PROT_MAC_IL = 4, + ICE_PROT_MAC_IN_MAC = 7, ICE_PROT_ETYPE_OL = 9, ICE_PROT_ETYPE_IL = 10, + ICE_PROT_PAY = 15, + ICE_PROT_EVLAN_O = 16, + ICE_PROT_VLAN_O = 17, + ICE_PROT_VLAN_IF = 18, + ICE_PROT_MPLS_OL_MINUS_1 = 27, + ICE_PROT_MPLS_OL_OR_OS = 28, + ICE_PROT_MPLS_IL = 29, ICE_PROT_IPV4_OF_OR_S = 32, ICE_PROT_IPV4_IL = 33, + ICE_PROT_IPV4_IL_IL = 34, ICE_PROT_IPV6_OF_OR_S = 40, ICE_PROT_IPV6_IL = 41, + ICE_PROT_IPV6_IL_IL = 42, + ICE_PROT_IPV6_NEXT_PROTO = 43, + ICE_PROT_IPV6_FRAG = 47, ICE_PROT_TCP_IL = 49, ICE_PROT_UDP_OF = 52, ICE_PROT_UDP_IL_OR_S = 53, ICE_PROT_GRE_OF = 64, + ICE_PROT_NSH_F = 84, ICE_PROT_ESP_F = 88, ICE_PROT_ESP_2 = 89, ICE_PROT_SCTP_IL = 96, ICE_PROT_ICMP_IL = 98, ICE_PROT_ICMPV6_IL = 100, + ICE_PROT_VRRP_F = 101, + ICE_PROT_OSPF = 102, ICE_PROT_PPPOE = 103, ICE_PROT_L2TPV3 = 104, + ICE_PROT_ATAOE_OF = 114, + ICE_PROT_CTRL_OF = 116, + ICE_PROT_LLDP_OF = 117, ICE_PROT_ARP_OF = 118, ICE_PROT_META_ID = 255, /* when offset == metadata */ + ICE_PROT_EAPOL_OF = 120, ICE_PROT_INVALID = 255 /* when offset == ICE_FV_OFFSET_INVAL */ }; From 38724a474c0fc37b6604e8b20c75d87446fc2fd1 Mon Sep 17 00:00:00 2001 From: Aleksandr Loktionov Date: Thu, 30 Oct 2025 14:59:46 +0100 Subject: [PATCH 627/867] ice: add virtchnl definitions and static data for GTP RSS Add virtchnl protocol header and field definitions for advanced RSS configuration including GTPC, GTPU, L2TPv2, ECPRI, PPP, GRE, and IP fragment headers. - Define new virtchnl protocol header types - Add RSS field selectors for tunnel protocols - Extend static mapping arrays for protocol field matching - Add L2TPv2 session ID and length+session ID field support This provides the foundational definitions needed for VF RSS configuration of tunnel protocols. Co-developed-by: Dan Nowlin Signed-off-by: Dan Nowlin Co-developed-by: Jie Wang Signed-off-by: Jie Wang Co-developed-by: Junfeng Guo Signed-off-by: Junfeng Guo Co-developed-by: Qi Zhang Signed-off-by: Qi Zhang Co-developed-by: Ting Xu Signed-off-by: Ting Xu Signed-off-by: Przemek Kitszel Signed-off-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_vf_lib.h | 48 +++++ drivers/net/ethernet/intel/ice/virt/rss.c | 219 +++++++++++++++++++- include/linux/avf/virtchnl.h | 50 +++++ 3 files changed, 316 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h index b007089071767..7a9c75d1d07cb 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h @@ -53,6 +53,46 @@ struct ice_mdd_vf_events { u16 last_printed; }; +enum ice_hash_ip_ctx_type { + ICE_HASH_IP_CTX_IP = 0, + ICE_HASH_IP_CTX_IP_ESP, + ICE_HASH_IP_CTX_IP_UDP_ESP, + ICE_HASH_IP_CTX_IP_AH, + ICE_HASH_IP_CTX_IP_PFCP, + ICE_HASH_IP_CTX_IP_UDP, + ICE_HASH_IP_CTX_IP_TCP, + ICE_HASH_IP_CTX_IP_SCTP, + ICE_HASH_IP_CTX_MAX, +}; + +struct ice_vf_hash_ip_ctx { + struct ice_rss_hash_cfg ctx[ICE_HASH_IP_CTX_MAX]; +}; + +enum ice_hash_gtpu_ctx_type { + ICE_HASH_GTPU_CTX_EH_IP = 0, + ICE_HASH_GTPU_CTX_EH_IP_UDP, + ICE_HASH_GTPU_CTX_EH_IP_TCP, + ICE_HASH_GTPU_CTX_UP_IP, + ICE_HASH_GTPU_CTX_UP_IP_UDP, + ICE_HASH_GTPU_CTX_UP_IP_TCP, + ICE_HASH_GTPU_CTX_DW_IP, + ICE_HASH_GTPU_CTX_DW_IP_UDP, + ICE_HASH_GTPU_CTX_DW_IP_TCP, + ICE_HASH_GTPU_CTX_MAX, +}; + +struct ice_vf_hash_gtpu_ctx { + struct ice_rss_hash_cfg ctx[ICE_HASH_GTPU_CTX_MAX]; +}; + +struct ice_vf_hash_ctx { + struct ice_vf_hash_ip_ctx v4; + struct ice_vf_hash_ip_ctx v6; + struct ice_vf_hash_gtpu_ctx ipv4; + struct ice_vf_hash_gtpu_ctx ipv6; +}; + /* Structure to store fdir fv entry */ struct ice_fdir_prof_info { struct ice_parser_profile prof; @@ -66,6 +106,12 @@ struct ice_vf_qs_bw { u8 tc; }; +/* Structure to store RSS field vector entry */ +struct ice_rss_prof_info { + struct ice_parser_profile prof; + bool symm; +}; + /* VF operations */ struct ice_vf_ops { enum ice_disq_rst_src reset_type; @@ -106,6 +152,8 @@ struct ice_vf { u16 ctrl_vsi_idx; struct ice_vf_fdir fdir; struct ice_fdir_prof_info fdir_prof_info[ICE_MAX_PTGS]; + struct ice_rss_prof_info rss_prof_info[ICE_MAX_PTGS]; + struct ice_vf_hash_ctx hash_ctx; u64 rss_hashcfg; /* RSS hash configuration */ struct ice_sw *vf_sw_id; /* switch ID the VF VSIs connect to */ struct virtchnl_version_info vf_ver; diff --git a/drivers/net/ethernet/intel/ice/virt/rss.c b/drivers/net/ethernet/intel/ice/virt/rss.c index cbdbb32d512b2..ee0d1ec32d566 100644 --- a/drivers/net/ethernet/intel/ice/virt/rss.c +++ b/drivers/net/ethernet/intel/ice/virt/rss.c @@ -36,6 +36,11 @@ static const struct ice_vc_hdr_match_type ice_vc_hdr_list[] = { {VIRTCHNL_PROTO_HDR_ESP, ICE_FLOW_SEG_HDR_ESP}, {VIRTCHNL_PROTO_HDR_AH, ICE_FLOW_SEG_HDR_AH}, {VIRTCHNL_PROTO_HDR_PFCP, ICE_FLOW_SEG_HDR_PFCP_SESSION}, + {VIRTCHNL_PROTO_HDR_GTPC, ICE_FLOW_SEG_HDR_GTPC}, + {VIRTCHNL_PROTO_HDR_L2TPV2, ICE_FLOW_SEG_HDR_L2TPV2}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, ICE_FLOW_SEG_HDR_IPV_FRAG}, + {VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG, ICE_FLOW_SEG_HDR_IPV_FRAG}, + {VIRTCHNL_PROTO_HDR_GRE, ICE_FLOW_SEG_HDR_GRE}, }; struct ice_vc_hash_field_match_type { @@ -87,8 +92,125 @@ ice_vc_hash_field_match_type ice_vc_hash_field_list[] = { FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, - {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + {VIRTCHNL_PROTO_HDR_IPV4, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_FRAG_PKID), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_ID)}, + {VIRTCHNL_PROTO_HDR_IPV4, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST), + ICE_FLOW_HASH_IPV4}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_FRAG_PKID), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_ID)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_IPV4_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_CHKSUM)}, {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC), BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA)}, {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST), @@ -110,6 +232,35 @@ ice_vc_hash_field_match_type ice_vc_hash_field_list[] = { ICE_FLOW_HASH_IPV6 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG_PKID), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_ID)}, + {VIRTCHNL_PROTO_HDR_IPV6, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PREFIX64_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PREFIX64_DST), + ICE_FLOW_HASH_IPV6_PRE64}, + {VIRTCHNL_PROTO_HDR_IPV6, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PREFIX64_SRC), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PRE64_SA)}, + {VIRTCHNL_PROTO_HDR_IPV6, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PREFIX64_DST), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PRE64_DA)}, + {VIRTCHNL_PROTO_HDR_IPV6, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PREFIX64_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PREFIX64_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), + ICE_FLOW_HASH_IPV6_PRE64 | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV6, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PREFIX64_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PRE64_SA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV6, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PREFIX64_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PRE64_DA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, {VIRTCHNL_PROTO_HDR_TCP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT), BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT)}, @@ -120,6 +271,25 @@ ice_vc_hash_field_match_type ice_vc_hash_field_list[] = { FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT) | FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT), ICE_FLOW_HASH_TCP_PORT}, + {VIRTCHNL_PROTO_HDR_TCP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_TCP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_TCP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_TCP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_CHKSUM), + ICE_FLOW_HASH_TCP_PORT | + BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_CHKSUM)}, {VIRTCHNL_PROTO_HDR_UDP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT), BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT)}, @@ -130,6 +300,25 @@ ice_vc_hash_field_match_type ice_vc_hash_field_list[] = { FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT) | FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT), ICE_FLOW_HASH_UDP_PORT}, + {VIRTCHNL_PROTO_HDR_UDP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_UDP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_UDP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_UDP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_CHKSUM), + ICE_FLOW_HASH_UDP_PORT | + BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_CHKSUM)}, {VIRTCHNL_PROTO_HDR_SCTP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT), BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT)}, @@ -140,6 +329,25 @@ ice_vc_hash_field_match_type ice_vc_hash_field_list[] = { FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT) | FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT), ICE_FLOW_HASH_SCTP_PORT}, + {VIRTCHNL_PROTO_HDR_SCTP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_SCTP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_SCTP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_CHKSUM), + BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT) | + BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_CHKSUM)}, + {VIRTCHNL_PROTO_HDR_SCTP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_CHKSUM), + ICE_FLOW_HASH_SCTP_PORT | + BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_CHKSUM)}, {VIRTCHNL_PROTO_HDR_PPPOE, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PPPOE_SESS_ID), BIT_ULL(ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID)}, @@ -155,6 +363,15 @@ ice_vc_hash_field_match_type ice_vc_hash_field_list[] = { BIT_ULL(ICE_FLOW_FIELD_IDX_AH_SPI)}, {VIRTCHNL_PROTO_HDR_PFCP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PFCP_SEID), BIT_ULL(ICE_FLOW_FIELD_IDX_PFCP_SEID)}, + {VIRTCHNL_PROTO_HDR_GTPC, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_GTPC_TEID), + BIT_ULL(ICE_FLOW_FIELD_IDX_GTPC_TEID)}, + {VIRTCHNL_PROTO_HDR_L2TPV2, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_L2TPV2_SESS_ID), + BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV2_SESS_ID)}, + {VIRTCHNL_PROTO_HDR_L2TPV2, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_L2TPV2_LEN_SESS_ID), + BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV2_LEN_SESS_ID)}, }; /** diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 5be1881abbb66..11bdab5522fd5 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -1253,6 +1253,17 @@ enum virtchnl_proto_hdr_type { VIRTCHNL_PROTO_HDR_ESP, VIRTCHNL_PROTO_HDR_AH, VIRTCHNL_PROTO_HDR_PFCP, + VIRTCHNL_PROTO_HDR_GTPC, + VIRTCHNL_PROTO_HDR_ECPRI, + VIRTCHNL_PROTO_HDR_L2TPV2, + VIRTCHNL_PROTO_HDR_PPP, + /* IPv4 and IPv6 Fragment header types are only associated to + * VIRTCHNL_PROTO_HDR_IPV4 and VIRTCHNL_PROTO_HDR_IPV6 respectively, + * cannot be used independently. + */ + VIRTCHNL_PROTO_HDR_IPV4_FRAG, + VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG, + VIRTCHNL_PROTO_HDR_GRE, }; /* Protocol header field within a protocol header. */ @@ -1275,6 +1286,7 @@ enum virtchnl_proto_hdr_field { VIRTCHNL_PROTO_HDR_IPV4_DSCP, VIRTCHNL_PROTO_HDR_IPV4_TTL, VIRTCHNL_PROTO_HDR_IPV4_PROT, + VIRTCHNL_PROTO_HDR_IPV4_CHKSUM, /* IPV6 */ VIRTCHNL_PROTO_HDR_IPV6_SRC = PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_IPV6), @@ -1282,18 +1294,34 @@ enum virtchnl_proto_hdr_field { VIRTCHNL_PROTO_HDR_IPV6_TC, VIRTCHNL_PROTO_HDR_IPV6_HOP_LIMIT, VIRTCHNL_PROTO_HDR_IPV6_PROT, + /* IPV6 Prefix */ + VIRTCHNL_PROTO_HDR_IPV6_PREFIX32_SRC, + VIRTCHNL_PROTO_HDR_IPV6_PREFIX32_DST, + VIRTCHNL_PROTO_HDR_IPV6_PREFIX40_SRC, + VIRTCHNL_PROTO_HDR_IPV6_PREFIX40_DST, + VIRTCHNL_PROTO_HDR_IPV6_PREFIX48_SRC, + VIRTCHNL_PROTO_HDR_IPV6_PREFIX48_DST, + VIRTCHNL_PROTO_HDR_IPV6_PREFIX56_SRC, + VIRTCHNL_PROTO_HDR_IPV6_PREFIX56_DST, + VIRTCHNL_PROTO_HDR_IPV6_PREFIX64_SRC, + VIRTCHNL_PROTO_HDR_IPV6_PREFIX64_DST, + VIRTCHNL_PROTO_HDR_IPV6_PREFIX96_SRC, + VIRTCHNL_PROTO_HDR_IPV6_PREFIX96_DST, /* TCP */ VIRTCHNL_PROTO_HDR_TCP_SRC_PORT = PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_TCP), VIRTCHNL_PROTO_HDR_TCP_DST_PORT, + VIRTCHNL_PROTO_HDR_TCP_CHKSUM, /* UDP */ VIRTCHNL_PROTO_HDR_UDP_SRC_PORT = PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_UDP), VIRTCHNL_PROTO_HDR_UDP_DST_PORT, + VIRTCHNL_PROTO_HDR_UDP_CHKSUM, /* SCTP */ VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT = PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_SCTP), VIRTCHNL_PROTO_HDR_SCTP_DST_PORT, + VIRTCHNL_PROTO_HDR_SCTP_CHKSUM, /* GTPU_IP */ VIRTCHNL_PROTO_HDR_GTPU_IP_TEID = PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_GTPU_IP), @@ -1317,6 +1345,28 @@ enum virtchnl_proto_hdr_field { VIRTCHNL_PROTO_HDR_PFCP_S_FIELD = PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_PFCP), VIRTCHNL_PROTO_HDR_PFCP_SEID, + /* GTPC */ + VIRTCHNL_PROTO_HDR_GTPC_TEID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_GTPC), + /* ECPRI */ + VIRTCHNL_PROTO_HDR_ECPRI_MSG_TYPE = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_ECPRI), + VIRTCHNL_PROTO_HDR_ECPRI_PC_RTC_ID, + /* IPv4 Dummy Fragment */ + VIRTCHNL_PROTO_HDR_IPV4_FRAG_PKID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_IPV4_FRAG), + /* IPv6 Extension Fragment */ + VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG_PKID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG), + /* GTPU_DWN/UP */ + VIRTCHNL_PROTO_HDR_GTPU_DWN_QFI = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_DWN), + VIRTCHNL_PROTO_HDR_GTPU_UP_QFI = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_UP), + /* L2TPv2 */ + VIRTCHNL_PROTO_HDR_L2TPV2_SESS_ID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_L2TPV2), + VIRTCHNL_PROTO_HDR_L2TPV2_LEN_SESS_ID, }; struct virtchnl_proto_hdr { From 3a6d87e2eaaced60411b32c06191a5971c1303c2 Mon Sep 17 00:00:00 2001 From: Aleksandr Loktionov Date: Thu, 30 Oct 2025 14:59:47 +0100 Subject: [PATCH 628/867] ice: implement GTP RSS context tracking and configuration This commit implements the core RSS context management and configuration logic for GTP (GTPU) protocol support in VF RSS operations. Key implementation features: - GTPU hash context management with pre/post processing functions - Context index calculation and mapping for different GTPU scenarios - Integration with main RSS configuration flow via wrapper functions - Support for IPv4/IPv6 GTPU RSS configurations - Rollback mechanism for handling RSS rule conflicts - Hash context reset and cleanup functionality The implementation provides comprehensive GTPU RSS support by: 1. Adding ice_add_rss_cfg_pre_gtpu() for preprocessing GTPU contexts 2. Adding ice_add_rss_cfg_post_gtpu() for postprocessing configurations 3. Adding ice_calc_gtpu_ctx_idx() for context index calculation 4. Integrating GTPU logic into ice_add_rss_cfg_wrap() and ice_rem_rss_cfg_wrap() 5. Supporting context tracking in VF hash_ctx structures This completes the GTP RSS infrastructure enabling VFs to configure RSS hashing on GTP-encapsulated traffic. Co-developed-by: Dan Nowlin Signed-off-by: Dan Nowlin Co-developed-by: Jie Wang Signed-off-by: Jie Wang Co-developed-by: Junfeng Guo Signed-off-by: Junfeng Guo Co-developed-by: Qi Zhang Signed-off-by: Qi Zhang Co-developed-by: Ting Xu Signed-off-by: Ting Xu Signed-off-by: Przemek Kitszel Signed-off-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/virt/rss.c | 1094 ++++++++++++++++++++- 1 file changed, 1040 insertions(+), 54 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/virt/rss.c b/drivers/net/ethernet/intel/ice/virt/rss.c index ee0d1ec32d566..085e69ec0cfce 100644 --- a/drivers/net/ethernet/intel/ice/virt/rss.c +++ b/drivers/net/ethernet/intel/ice/virt/rss.c @@ -374,6 +374,43 @@ ice_vc_hash_field_match_type ice_vc_hash_field_list[] = { BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV2_LEN_SESS_ID)}, }; +static int +ice_vc_rss_hash_update(struct ice_hw *hw, struct ice_vsi *vsi, u8 hash_type) +{ + struct ice_vsi_ctx *ctx; + int ret; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + /* clear previous hash_type */ + ctx->info.q_opt_rss = vsi->info.q_opt_rss & + ~ICE_AQ_VSI_Q_OPT_RSS_HASH_M; + /* hash_type is passed in as ICE_AQ_VSI_Q_OPT_RSS_info.q_opt_rss |= FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_HASH_M, + hash_type); + + /* Preserve existing queueing option setting */ + ctx->info.q_opt_tc = vsi->info.q_opt_tc; + ctx->info.q_opt_flags = vsi->info.q_opt_flags; + + ctx->info.valid_sections = + cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID); + + ret = ice_update_vsi(hw, vsi->idx, ctx, NULL); + if (ret) { + dev_err(ice_hw_to_dev(hw), "update VSI for RSS failed, err %d aq_err %s\n", + ret, libie_aq_str(hw->adminq.sq_last_status)); + } else { + vsi->info.q_opt_rss = ctx->info.q_opt_rss; + } + + kfree(ctx); + + return ret; +} + /** * ice_vc_validate_pattern * @vf: pointer to the VF info @@ -488,6 +525,11 @@ static bool ice_vc_parse_rss_cfg(struct ice_hw *hw, const struct ice_vc_hash_field_match_type *hf_list; const struct ice_vc_hdr_match_type *hdr_list; int i, hf_list_len, hdr_list_len; + bool outer_ipv4 = false; + bool outer_ipv6 = false; + bool inner_hdr = false; + bool has_gre = false; + u32 *addl_hdrs = &hash_cfg->addl_hdrs; u64 *hash_flds = &hash_cfg->hash_flds; @@ -507,17 +549,17 @@ static bool ice_vc_parse_rss_cfg(struct ice_hw *hw, for (i = 0; i < rss_cfg->proto_hdrs.count; i++) { struct virtchnl_proto_hdr *proto_hdr = &rss_cfg->proto_hdrs.proto_hdr[i]; - bool hdr_found = false; + u32 hdr_found = 0; int j; - /* Find matched ice headers according to virtchnl headers. */ + /* Find matched ice headers according to virtchnl headers. + * Also figure out the outer type of GTPU headers. + */ for (j = 0; j < hdr_list_len; j++) { struct ice_vc_hdr_match_type hdr_map = hdr_list[j]; - if (proto_hdr->type == hdr_map.vc_hdr) { - *addl_hdrs |= hdr_map.ice_hdr; - hdr_found = true; - } + if (proto_hdr->type == hdr_map.vc_hdr) + hdr_found = hdr_map.ice_hdr; } if (!hdr_found) @@ -535,8 +577,98 @@ static bool ice_vc_parse_rss_cfg(struct ice_hw *hw, break; } } + + if (proto_hdr->type == VIRTCHNL_PROTO_HDR_IPV4 && !inner_hdr) + outer_ipv4 = true; + else if (proto_hdr->type == VIRTCHNL_PROTO_HDR_IPV6 && + !inner_hdr) + outer_ipv6 = true; + /* for GRE and L2TPv2, take inner header as input set if no + * any field is selected from outer headers. + * for GTPU, take inner header and GTPU teid as input set. + */ + else if ((proto_hdr->type == VIRTCHNL_PROTO_HDR_GTPU_IP || + proto_hdr->type == VIRTCHNL_PROTO_HDR_GTPU_EH || + proto_hdr->type == VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_DWN || + proto_hdr->type == + VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_UP) || + ((proto_hdr->type == VIRTCHNL_PROTO_HDR_L2TPV2 || + proto_hdr->type == VIRTCHNL_PROTO_HDR_GRE) && + *hash_flds == 0)) { + /* set inner_hdr flag, and clean up outer header */ + inner_hdr = true; + + /* clear outer headers */ + *addl_hdrs = 0; + + if (outer_ipv4 && outer_ipv6) + return false; + + if (outer_ipv4) + hash_cfg->hdr_type = ICE_RSS_INNER_HEADERS_W_OUTER_IPV4; + else if (outer_ipv6) + hash_cfg->hdr_type = ICE_RSS_INNER_HEADERS_W_OUTER_IPV6; + else + hash_cfg->hdr_type = ICE_RSS_INNER_HEADERS; + + if (has_gre && outer_ipv4) + hash_cfg->hdr_type = + ICE_RSS_INNER_HEADERS_W_OUTER_IPV4_GRE; + if (has_gre && outer_ipv6) + hash_cfg->hdr_type = + ICE_RSS_INNER_HEADERS_W_OUTER_IPV6_GRE; + + if (proto_hdr->type == VIRTCHNL_PROTO_HDR_GRE) + has_gre = true; + } + + *addl_hdrs |= hdr_found; + + /* refine hash hdrs and fields for IP fragment */ + if (VIRTCHNL_TEST_PROTO_HDR_FIELD(proto_hdr, + VIRTCHNL_PROTO_HDR_IPV4_FRAG_PKID) && + proto_hdr->type == VIRTCHNL_PROTO_HDR_IPV4_FRAG) { + *addl_hdrs |= ICE_FLOW_SEG_HDR_IPV_FRAG; + *addl_hdrs &= ~(ICE_FLOW_SEG_HDR_IPV_OTHER); + *hash_flds |= BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_ID); + VIRTCHNL_DEL_PROTO_HDR_FIELD(proto_hdr, + VIRTCHNL_PROTO_HDR_IPV4_FRAG_PKID); + } + if (VIRTCHNL_TEST_PROTO_HDR_FIELD(proto_hdr, + VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG_PKID) && + proto_hdr->type == VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG) { + *addl_hdrs |= ICE_FLOW_SEG_HDR_IPV_FRAG; + *addl_hdrs &= ~(ICE_FLOW_SEG_HDR_IPV_OTHER); + *hash_flds |= BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_ID); + VIRTCHNL_DEL_PROTO_HDR_FIELD(proto_hdr, + VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG_PKID); + } + } + + /* refine gtpu header if we take outer as input set for a no inner + * ip gtpu flow. + */ + if (hash_cfg->hdr_type == ICE_RSS_OUTER_HEADERS && + *addl_hdrs & ICE_FLOW_SEG_HDR_GTPU_IP) { + *addl_hdrs &= ~(ICE_FLOW_SEG_HDR_GTPU_IP); + *addl_hdrs |= ICE_FLOW_SEG_HDR_GTPU_NON_IP; } + /* refine hash field for esp and nat-t-esp. */ + if ((*addl_hdrs & ICE_FLOW_SEG_HDR_UDP) && + (*addl_hdrs & ICE_FLOW_SEG_HDR_ESP)) { + *addl_hdrs &= ~(ICE_FLOW_SEG_HDR_ESP | ICE_FLOW_SEG_HDR_UDP); + *addl_hdrs |= ICE_FLOW_SEG_HDR_NAT_T_ESP; + *hash_flds &= ~(BIT_ULL(ICE_FLOW_FIELD_IDX_ESP_SPI)); + *hash_flds |= BIT_ULL(ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI); + } + + /* refine hash hdrs for L4 udp/tcp/sctp. */ + if (*addl_hdrs & (ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | + ICE_FLOW_SEG_HDR_SCTP) && + *addl_hdrs & ICE_FLOW_SEG_HDR_IPV_OTHER) + *addl_hdrs &= ~ICE_FLOW_SEG_HDR_IPV_OTHER; + return true; } @@ -553,6 +685,874 @@ static bool ice_vf_adv_rss_offload_ena(u32 caps) return !!(caps & VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF); } +/** + * ice_is_hash_cfg_valid - Check whether an RSS hash context is valid + * @cfg: RSS hash configuration to test + * + * Return: true if both @cfg->hash_flds and @cfg->addl_hdrs are non-zero; false otherwise. + */ +static bool ice_is_hash_cfg_valid(struct ice_rss_hash_cfg *cfg) +{ + return cfg->hash_flds && cfg->addl_hdrs; +} + +/** + * ice_hash_cfg_reset - Reset an RSS hash context + * @cfg: RSS hash configuration to reset + * + * Reset fields of @cfg that store the active rule information. + */ +static void ice_hash_cfg_reset(struct ice_rss_hash_cfg *cfg) +{ + cfg->hash_flds = 0; + cfg->addl_hdrs = 0; + cfg->hdr_type = ICE_RSS_OUTER_HEADERS; + cfg->symm = 0; +} + +/** + * ice_hash_cfg_record - Record an RSS hash context + * @ctx: destination (global) RSS hash configuration + * @cfg: source RSS hash configuration to record + * + * Copy the active rule information from @cfg into @ctx. + */ +static void ice_hash_cfg_record(struct ice_rss_hash_cfg *ctx, + struct ice_rss_hash_cfg *cfg) +{ + ctx->hash_flds = cfg->hash_flds; + ctx->addl_hdrs = cfg->addl_hdrs; + ctx->hdr_type = cfg->hdr_type; + ctx->symm = cfg->symm; +} + +/** + * ice_hash_moveout - Delete an RSS configuration (keep context) + * @vf: VF pointer + * @cfg: RSS hash configuration + * + * Return: 0 on success (including when already absent); -ENOENT if @cfg is + * invalid or VSI is missing; -EBUSY on hardware removal failure. + */ +static int +ice_hash_moveout(struct ice_vf *vf, struct ice_rss_hash_cfg *cfg) +{ + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_vsi *vsi = ice_get_vf_vsi(vf); + struct ice_hw *hw = &vf->pf->hw; + int ret; + + if (!ice_is_hash_cfg_valid(cfg) || !vsi) + return -ENOENT; + + ret = ice_rem_rss_cfg(hw, vsi->idx, cfg); + if (ret && ret != -ENOENT) { + dev_err(dev, "ice_rem_rss_cfg failed for VF %d, VSI %d, error:%d\n", + vf->vf_id, vf->lan_vsi_idx, ret); + return -EBUSY; + } + + return 0; +} + +/** + * ice_hash_moveback - Add an RSS hash configuration for a VF + * @vf: VF pointer + * @cfg: RSS hash configuration to apply + * + * Add @cfg to @vf if the context is valid and VSI exists; programs HW. + * + * Return: + * * 0 on success + * * -ENOENT if @cfg is invalid or VSI is missing + * * -EBUSY if hardware programming fails + */ +static int +ice_hash_moveback(struct ice_vf *vf, struct ice_rss_hash_cfg *cfg) +{ + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_vsi *vsi = ice_get_vf_vsi(vf); + struct ice_hw *hw = &vf->pf->hw; + int ret; + + if (!ice_is_hash_cfg_valid(cfg) || !vsi) + return -ENOENT; + + ret = ice_add_rss_cfg(hw, vsi, cfg); + if (ret) { + dev_err(dev, "ice_add_rss_cfg failed for VF %d, VSI %d, error:%d\n", + vf->vf_id, vf->lan_vsi_idx, ret); + return -EBUSY; + } + + return 0; +} + +/** + * ice_hash_remove - remove a RSS configuration + * @vf: pointer to the VF info + * @cfg: pointer to the RSS hash configuration + * + * This function will delete a RSS hash configuration and also delete the + * hash context which stores the rule info. + * + * Return: 0 on success, or a negative error code on failure. + */ +static int +ice_hash_remove(struct ice_vf *vf, struct ice_rss_hash_cfg *cfg) +{ + int ret; + + ret = ice_hash_moveout(vf, cfg); + if (ret && ret != -ENOENT) + return ret; + + ice_hash_cfg_reset(cfg); + + return 0; +} + +struct ice_gtpu_ctx_action { + u32 ctx_idx; + const u32 *remove_list; + int remove_count; + const u32 *moveout_list; + int moveout_count; +}; + +/** + * ice_add_rss_cfg_pre_gtpu - Pre-process the GTPU RSS configuration + * @vf: pointer to the VF info + * @ctx: pointer to the context of the GTPU hash + * @ctx_idx: index of the hash context + * + * Pre-processes the GTPU hash configuration before adding a new + * hash context. It removes or reorders existing hash configurations that may + * conflict with the new one. For example, if a GTPU_UP or GTPU_DWN rule is + * configured after a GTPU_EH rule, the GTPU_EH hash will be matched first due + * to TCAM write and match order (top-down). In such cases, the GTPU_EH rule + * must be moved after the GTPU_UP/DWN rule. Conversely, if a GTPU_EH rule is + * configured after a GTPU_UP/DWN rule, the UP/DWN rules should be removed to + * avoid conflict. + * + * Return: 0 on success or a negative error code on failure + */ +static int ice_add_rss_cfg_pre_gtpu(struct ice_vf *vf, + struct ice_vf_hash_gtpu_ctx *ctx, + u32 ctx_idx) +{ + int ret, i; + + static const u32 remove_eh_ip[] = { + ICE_HASH_GTPU_CTX_EH_IP_UDP, ICE_HASH_GTPU_CTX_EH_IP_TCP, + ICE_HASH_GTPU_CTX_UP_IP, ICE_HASH_GTPU_CTX_UP_IP_UDP, + ICE_HASH_GTPU_CTX_UP_IP_TCP, ICE_HASH_GTPU_CTX_DW_IP, + ICE_HASH_GTPU_CTX_DW_IP_UDP, ICE_HASH_GTPU_CTX_DW_IP_TCP, + }; + + static const u32 remove_eh_ip_udp[] = { + ICE_HASH_GTPU_CTX_UP_IP_UDP, + ICE_HASH_GTPU_CTX_DW_IP_UDP, + }; + static const u32 moveout_eh_ip_udp[] = { + ICE_HASH_GTPU_CTX_UP_IP, + ICE_HASH_GTPU_CTX_UP_IP_TCP, + ICE_HASH_GTPU_CTX_DW_IP, + ICE_HASH_GTPU_CTX_DW_IP_TCP, + }; + + static const u32 remove_eh_ip_tcp[] = { + ICE_HASH_GTPU_CTX_UP_IP_TCP, + ICE_HASH_GTPU_CTX_DW_IP_TCP, + }; + static const u32 moveout_eh_ip_tcp[] = { + ICE_HASH_GTPU_CTX_UP_IP, + ICE_HASH_GTPU_CTX_UP_IP_UDP, + ICE_HASH_GTPU_CTX_DW_IP, + ICE_HASH_GTPU_CTX_DW_IP_UDP, + }; + + static const u32 remove_up_ip[] = { + ICE_HASH_GTPU_CTX_UP_IP_UDP, + ICE_HASH_GTPU_CTX_UP_IP_TCP, + }; + static const u32 moveout_up_ip[] = { + ICE_HASH_GTPU_CTX_EH_IP, + ICE_HASH_GTPU_CTX_EH_IP_UDP, + ICE_HASH_GTPU_CTX_EH_IP_TCP, + }; + + static const u32 moveout_up_ip_udp_tcp[] = { + ICE_HASH_GTPU_CTX_EH_IP, + ICE_HASH_GTPU_CTX_EH_IP_UDP, + ICE_HASH_GTPU_CTX_EH_IP_TCP, + }; + + static const u32 remove_dw_ip[] = { + ICE_HASH_GTPU_CTX_DW_IP_UDP, + ICE_HASH_GTPU_CTX_DW_IP_TCP, + }; + static const u32 moveout_dw_ip[] = { + ICE_HASH_GTPU_CTX_EH_IP, + ICE_HASH_GTPU_CTX_EH_IP_UDP, + ICE_HASH_GTPU_CTX_EH_IP_TCP, + }; + + static const struct ice_gtpu_ctx_action actions[] = { + { ICE_HASH_GTPU_CTX_EH_IP, remove_eh_ip, + ARRAY_SIZE(remove_eh_ip), NULL, 0 }, + { ICE_HASH_GTPU_CTX_EH_IP_UDP, remove_eh_ip_udp, + ARRAY_SIZE(remove_eh_ip_udp), moveout_eh_ip_udp, + ARRAY_SIZE(moveout_eh_ip_udp) }, + { ICE_HASH_GTPU_CTX_EH_IP_TCP, remove_eh_ip_tcp, + ARRAY_SIZE(remove_eh_ip_tcp), moveout_eh_ip_tcp, + ARRAY_SIZE(moveout_eh_ip_tcp) }, + { ICE_HASH_GTPU_CTX_UP_IP, remove_up_ip, + ARRAY_SIZE(remove_up_ip), moveout_up_ip, + ARRAY_SIZE(moveout_up_ip) }, + { ICE_HASH_GTPU_CTX_UP_IP_UDP, NULL, 0, moveout_up_ip_udp_tcp, + ARRAY_SIZE(moveout_up_ip_udp_tcp) }, + { ICE_HASH_GTPU_CTX_UP_IP_TCP, NULL, 0, moveout_up_ip_udp_tcp, + ARRAY_SIZE(moveout_up_ip_udp_tcp) }, + { ICE_HASH_GTPU_CTX_DW_IP, remove_dw_ip, + ARRAY_SIZE(remove_dw_ip), moveout_dw_ip, + ARRAY_SIZE(moveout_dw_ip) }, + { ICE_HASH_GTPU_CTX_DW_IP_UDP, NULL, 0, moveout_dw_ip, + ARRAY_SIZE(moveout_dw_ip) }, + { ICE_HASH_GTPU_CTX_DW_IP_TCP, NULL, 0, moveout_dw_ip, + ARRAY_SIZE(moveout_dw_ip) }, + }; + + for (i = 0; i < ARRAY_SIZE(actions); i++) { + if (actions[i].ctx_idx != ctx_idx) + continue; + + if (actions[i].remove_list) { + for (int j = 0; j < actions[i].remove_count; j++) { + u16 rm = actions[i].remove_list[j]; + + ret = ice_hash_remove(vf, &ctx->ctx[rm]); + if (ret && ret != -ENOENT) + return ret; + } + } + + if (actions[i].moveout_list) { + for (int j = 0; j < actions[i].moveout_count; j++) { + u16 mv = actions[i].moveout_list[j]; + + ret = ice_hash_moveout(vf, &ctx->ctx[mv]); + if (ret && ret != -ENOENT) + return ret; + } + } + break; + } + + return 0; +} + +/** + * ice_add_rss_cfg_pre_ip - Pre-process IP-layer RSS configuration + * @vf: VF pointer + * @ctx: IP L4 hash context (ESP/UDP-ESP/AH/PFCP and UDP/TCP/SCTP) + * + * Remove covered/recorded IP RSS configurations prior to adding a new one. + * + * Return: 0 on success; negative error code on failure. + */ +static int +ice_add_rss_cfg_pre_ip(struct ice_vf *vf, struct ice_vf_hash_ip_ctx *ctx) +{ + int i, ret; + + for (i = 1; i < ICE_HASH_IP_CTX_MAX; i++) + if (ice_is_hash_cfg_valid(&ctx->ctx[i])) { + ret = ice_hash_remove(vf, &ctx->ctx[i]); + if (ret) + return ret; + } + + return 0; +} + +/** + * ice_calc_gtpu_ctx_idx - Calculate GTPU hash context index + * @hdrs: Bitmask of protocol headers prefixed with ICE_FLOW_SEG_HDR_* + * + * Determine the GTPU hash context index based on the combination of + * encapsulation headers (GTPU_EH, GTPU_UP, GTPU_DWN) and transport + * protocols (UDP, TCP) within IPv4 or IPv6 flows. + * + * Return: A valid context index (0-8) if the header combination is supported, + * or ICE_HASH_GTPU_CTX_MAX if the combination is invalid. + */ +static enum ice_hash_gtpu_ctx_type ice_calc_gtpu_ctx_idx(u32 hdrs) +{ + u32 eh_idx, ip_idx; + + if (hdrs & ICE_FLOW_SEG_HDR_GTPU_EH) + eh_idx = 0; + else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_UP) + eh_idx = 1; + else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_DWN) + eh_idx = 2; + else + return ICE_HASH_GTPU_CTX_MAX; + + ip_idx = 0; + if (hdrs & ICE_FLOW_SEG_HDR_UDP) + ip_idx = 1; + else if (hdrs & ICE_FLOW_SEG_HDR_TCP) + ip_idx = 2; + + if (hdrs & (ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6)) + return eh_idx * 3 + ip_idx; + else + return ICE_HASH_GTPU_CTX_MAX; +} + +/** + * ice_map_ip_ctx_idx - map the index of the IP L4 hash context + * @hdrs: protocol headers prefix with ICE_FLOW_SEG_HDR_XXX. + * + * The IP L4 hash context use the index to classify for IPv4/IPv6 with + * ESP/UDP_ESP/AH/PFCP and non-tunnel UDP/TCP/SCTP + * this function map the index based on the protocol headers. + * + * Return: The mapped IP context index on success, or ICE_HASH_IP_CTX_MAX + * if no matching context is found. + */ +static u8 ice_map_ip_ctx_idx(u32 hdrs) +{ + u8 i; + + static struct { + u32 hdrs; + u8 ctx_idx; + } ip_ctx_idx_map[] = { + { ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV_OTHER | + ICE_FLOW_SEG_HDR_ESP, + ICE_HASH_IP_CTX_IP_ESP }, + { ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV_OTHER | + ICE_FLOW_SEG_HDR_NAT_T_ESP, + ICE_HASH_IP_CTX_IP_UDP_ESP }, + { ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV_OTHER | + ICE_FLOW_SEG_HDR_AH, + ICE_HASH_IP_CTX_IP_AH }, + { ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV_OTHER | + ICE_FLOW_SEG_HDR_PFCP_SESSION, + ICE_HASH_IP_CTX_IP_PFCP }, + { ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN | + ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_UDP, + ICE_HASH_IP_CTX_IP_UDP }, + { ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN | + ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_TCP, + ICE_HASH_IP_CTX_IP_TCP }, + { ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN | + ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_SCTP, + ICE_HASH_IP_CTX_IP_SCTP }, + { ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN | + ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV_OTHER, + ICE_HASH_IP_CTX_IP }, + { ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_IPV_OTHER | + ICE_FLOW_SEG_HDR_ESP, + ICE_HASH_IP_CTX_IP_ESP }, + { ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_IPV_OTHER | + ICE_FLOW_SEG_HDR_NAT_T_ESP, + ICE_HASH_IP_CTX_IP_UDP_ESP }, + { ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_IPV_OTHER | + ICE_FLOW_SEG_HDR_AH, + ICE_HASH_IP_CTX_IP_AH }, + { ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_IPV_OTHER | + ICE_FLOW_SEG_HDR_PFCP_SESSION, + ICE_HASH_IP_CTX_IP_PFCP }, + { ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN | + ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_UDP, + ICE_HASH_IP_CTX_IP_UDP }, + { ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN | + ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_TCP, + ICE_HASH_IP_CTX_IP_TCP }, + { ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN | + ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_SCTP, + ICE_HASH_IP_CTX_IP_SCTP }, + { ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN | + ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_IPV_OTHER, + ICE_HASH_IP_CTX_IP }, + /* the remaining mappings are used for default RSS */ + { ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_UDP, + ICE_HASH_IP_CTX_IP_UDP }, + { ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_TCP, + ICE_HASH_IP_CTX_IP_TCP }, + { ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_SCTP, + ICE_HASH_IP_CTX_IP_SCTP }, + { ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV_OTHER, + ICE_HASH_IP_CTX_IP }, + { ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_UDP, + ICE_HASH_IP_CTX_IP_UDP }, + { ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_TCP, + ICE_HASH_IP_CTX_IP_TCP }, + { ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_SCTP, + ICE_HASH_IP_CTX_IP_SCTP }, + { ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_IPV_OTHER, + ICE_HASH_IP_CTX_IP }, + }; + + for (i = 0; i < ARRAY_SIZE(ip_ctx_idx_map); i++) { + if (hdrs == ip_ctx_idx_map[i].hdrs) + return ip_ctx_idx_map[i].ctx_idx; + } + + return ICE_HASH_IP_CTX_MAX; +} + +/** + * ice_add_rss_cfg_pre - Prepare RSS configuration context for a VF + * @vf: pointer to the VF structure + * @cfg: pointer to the RSS hash configuration + * + * Prepare the RSS hash context for a given VF based on the additional + * protocol headers specified in @cfg. This includes pre-configuration + * for IP and GTPU-based flows. + * + * If the configuration matches a known IP context, the function sets up + * the appropriate IP hash context. If the configuration includes GTPU + * headers, it prepares the GTPU-specific context accordingly. + * + * Return: 0 on success, or a negative error code on failure. + */ +static int +ice_add_rss_cfg_pre(struct ice_vf *vf, struct ice_rss_hash_cfg *cfg) +{ + u32 ice_gtpu_ctx_idx = ice_calc_gtpu_ctx_idx(cfg->addl_hdrs); + u8 ip_ctx_idx = ice_map_ip_ctx_idx(cfg->addl_hdrs); + + if (ip_ctx_idx == ICE_HASH_IP_CTX_IP) { + int ret = 0; + + if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV4) + ret = ice_add_rss_cfg_pre_ip(vf, &vf->hash_ctx.v4); + else if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV6) + ret = ice_add_rss_cfg_pre_ip(vf, &vf->hash_ctx.v6); + + if (ret) + return ret; + } + + if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV4) { + return ice_add_rss_cfg_pre_gtpu(vf, &vf->hash_ctx.ipv4, + ice_gtpu_ctx_idx); + } else if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV6) { + return ice_add_rss_cfg_pre_gtpu(vf, &vf->hash_ctx.ipv6, + ice_gtpu_ctx_idx); + } + + return 0; +} + +/** + * ice_add_rss_cfg_post_gtpu - Post-process GTPU RSS configuration + * @vf: pointer to the VF info + * @ctx: pointer to the context of the GTPU hash + * @cfg: pointer to the RSS hash configuration + * @ctx_idx: index of the hash context + * + * Post-processes the GTPU hash configuration after a new hash + * context has been successfully added. It updates the context with the new + * configuration and restores any previously removed hash contexts that need + * to be re-applied. This ensures proper TCAM rule ordering and avoids + * conflicts between overlapping GTPU rules. + * + * Return: 0 on success or a negative error code on failure + */ +static int ice_add_rss_cfg_post_gtpu(struct ice_vf *vf, + struct ice_vf_hash_gtpu_ctx *ctx, + struct ice_rss_hash_cfg *cfg, u32 ctx_idx) +{ + /* GTPU hash moveback lookup table indexed by context ID. + * Each entry is a bitmap indicating which contexts need moveback + * operations when the corresponding context index is processed. + */ + static const unsigned long + ice_gtpu_moveback_tbl[ICE_HASH_GTPU_CTX_MAX] = { + [ICE_HASH_GTPU_CTX_EH_IP] = 0, + [ICE_HASH_GTPU_CTX_EH_IP_UDP] = + BIT(ICE_HASH_GTPU_CTX_UP_IP) | + BIT(ICE_HASH_GTPU_CTX_UP_IP_TCP) | + BIT(ICE_HASH_GTPU_CTX_DW_IP) | + BIT(ICE_HASH_GTPU_CTX_DW_IP_TCP), + [ICE_HASH_GTPU_CTX_EH_IP_TCP] = + BIT(ICE_HASH_GTPU_CTX_UP_IP) | + BIT(ICE_HASH_GTPU_CTX_UP_IP_UDP) | + BIT(ICE_HASH_GTPU_CTX_DW_IP) | + BIT(ICE_HASH_GTPU_CTX_DW_IP_UDP), + [ICE_HASH_GTPU_CTX_UP_IP] = + BIT(ICE_HASH_GTPU_CTX_EH_IP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_UDP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_TCP), + [ICE_HASH_GTPU_CTX_UP_IP_UDP] = + BIT(ICE_HASH_GTPU_CTX_EH_IP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_UDP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_TCP), + [ICE_HASH_GTPU_CTX_UP_IP_TCP] = + BIT(ICE_HASH_GTPU_CTX_EH_IP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_UDP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_TCP), + [ICE_HASH_GTPU_CTX_DW_IP] = + BIT(ICE_HASH_GTPU_CTX_EH_IP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_UDP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_TCP), + [ICE_HASH_GTPU_CTX_DW_IP_UDP] = + BIT(ICE_HASH_GTPU_CTX_EH_IP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_UDP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_TCP), + [ICE_HASH_GTPU_CTX_DW_IP_TCP] = + BIT(ICE_HASH_GTPU_CTX_EH_IP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_UDP) | + BIT(ICE_HASH_GTPU_CTX_EH_IP_TCP), + }; + unsigned long moveback_mask; + int ret; + int i; + + if (unlikely(ctx_idx >= ICE_HASH_GTPU_CTX_MAX)) + return 0; + + ctx->ctx[ctx_idx].addl_hdrs = cfg->addl_hdrs; + ctx->ctx[ctx_idx].hash_flds = cfg->hash_flds; + ctx->ctx[ctx_idx].hdr_type = cfg->hdr_type; + ctx->ctx[ctx_idx].symm = cfg->symm; + + moveback_mask = ice_gtpu_moveback_tbl[ctx_idx]; + for_each_set_bit(i, &moveback_mask, ICE_HASH_GTPU_CTX_MAX) { + ret = ice_hash_moveback(vf, &ctx->ctx[i]); + if (ret && ret != -ENOENT) + return ret; + } + + return 0; +} + +static int +ice_add_rss_cfg_post(struct ice_vf *vf, struct ice_rss_hash_cfg *cfg) +{ + u32 ice_gtpu_ctx_idx = ice_calc_gtpu_ctx_idx(cfg->addl_hdrs); + u8 ip_ctx_idx = ice_map_ip_ctx_idx(cfg->addl_hdrs); + + if (ip_ctx_idx && ip_ctx_idx < ICE_HASH_IP_CTX_MAX) { + if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV4) + ice_hash_cfg_record(&vf->hash_ctx.v4.ctx[ip_ctx_idx], cfg); + else if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV6) + ice_hash_cfg_record(&vf->hash_ctx.v6.ctx[ip_ctx_idx], cfg); + } + + if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV4) { + return ice_add_rss_cfg_post_gtpu(vf, &vf->hash_ctx.ipv4, + cfg, ice_gtpu_ctx_idx); + } else if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV6) { + return ice_add_rss_cfg_post_gtpu(vf, &vf->hash_ctx.ipv6, + cfg, ice_gtpu_ctx_idx); + } + + return 0; +} + +/** + * ice_rem_rss_cfg_post - post-process the RSS configuration + * @vf: pointer to the VF info + * @cfg: pointer to the RSS hash configuration + * + * Post process the RSS hash configuration after deleting a hash + * config. Such as, it will reset the hash context for the GTPU hash. + */ +static void +ice_rem_rss_cfg_post(struct ice_vf *vf, struct ice_rss_hash_cfg *cfg) +{ + u32 ice_gtpu_ctx_idx = ice_calc_gtpu_ctx_idx(cfg->addl_hdrs); + u8 ip_ctx_idx = ice_map_ip_ctx_idx(cfg->addl_hdrs); + + if (ip_ctx_idx && ip_ctx_idx < ICE_HASH_IP_CTX_MAX) { + if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV4) + ice_hash_cfg_reset(&vf->hash_ctx.v4.ctx[ip_ctx_idx]); + else if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV6) + ice_hash_cfg_reset(&vf->hash_ctx.v6.ctx[ip_ctx_idx]); + } + + if (ice_gtpu_ctx_idx >= ICE_HASH_GTPU_CTX_MAX) + return; + + if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV4) + ice_hash_cfg_reset(&vf->hash_ctx.ipv4.ctx[ice_gtpu_ctx_idx]); + else if (cfg->addl_hdrs & ICE_FLOW_SEG_HDR_IPV6) + ice_hash_cfg_reset(&vf->hash_ctx.ipv6.ctx[ice_gtpu_ctx_idx]); +} + +/** + * ice_rem_rss_cfg_wrap - Wrapper for deleting an RSS configuration + * @vf: pointer to the VF info + * @cfg: pointer to the RSS hash configuration + * + * Wrapper function to delete a flow profile base on an RSS configuration, + * and also post process the hash context base on the rollback mechanism + * which handle some rules conflict by ice_add_rss_cfg_wrap. + * + * Return: 0 on success; negative error code on failure. + */ +static int +ice_rem_rss_cfg_wrap(struct ice_vf *vf, struct ice_rss_hash_cfg *cfg) +{ + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_vsi *vsi = ice_get_vf_vsi(vf); + struct ice_hw *hw = &vf->pf->hw; + int ret; + + ret = ice_rem_rss_cfg(hw, vsi->idx, cfg); + /* We just ignore -ENOENT, because if two configurations share the same + * profile remove one of them actually removes both, since the + * profile is deleted. + */ + if (ret && ret != -ENOENT) { + dev_err(dev, "ice_rem_rss_cfg failed for VF %d, VSI %d, error:%d\n", + vf->vf_id, vf->lan_vsi_idx, ret); + return ret; + } + + ice_rem_rss_cfg_post(vf, cfg); + + return 0; +} + +/** + * ice_add_rss_cfg_wrap - Wrapper for adding an RSS configuration + * @vf: pointer to the VF info + * @cfg: pointer to the RSS hash configuration + * + * Add a flow profile based on an RSS configuration. Use a rollback + * mechanism to handle rule conflicts due to TCAM + * write sequence from top to down. + * + * Return: 0 on success; negative error code on failure. + */ +static int +ice_add_rss_cfg_wrap(struct ice_vf *vf, struct ice_rss_hash_cfg *cfg) +{ + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_vsi *vsi = ice_get_vf_vsi(vf); + struct ice_hw *hw = &vf->pf->hw; + int ret; + + if (ice_add_rss_cfg_pre(vf, cfg)) + return -EINVAL; + + ret = ice_add_rss_cfg(hw, vsi, cfg); + if (ret) { + dev_err(dev, "ice_add_rss_cfg failed for VF %d, VSI %d, error:%d\n", + vf->vf_id, vf->lan_vsi_idx, ret); + return ret; + } + + if (ice_add_rss_cfg_post(vf, cfg)) + ret = -EINVAL; + + return ret; +} + +/** + * ice_parse_raw_rss_pattern - Parse raw pattern spec and mask for RSS + * @vf: pointer to the VF info + * @proto: pointer to the virtchnl protocol header + * @raw_cfg: pointer to the RSS raw pattern configuration + * + * Parser function to get spec and mask from virtchnl message, and parse + * them to get the corresponding profile and offset. The profile is used + * to add RSS configuration. + * + * Return: 0 on success; negative error code on failure. + */ +static int +ice_parse_raw_rss_pattern(struct ice_vf *vf, struct virtchnl_proto_hdrs *proto, + struct ice_rss_raw_cfg *raw_cfg) +{ + struct ice_parser_result pkt_parsed; + struct ice_hw *hw = &vf->pf->hw; + struct ice_parser_profile prof; + struct ice_parser *psr; + u8 *pkt_buf, *msk_buf; + u16 pkt_len; + int ret = 0; + + pkt_len = proto->raw.pkt_len; + if (!pkt_len) + return -EINVAL; + if (pkt_len > VIRTCHNL_MAX_SIZE_RAW_PACKET) + pkt_len = VIRTCHNL_MAX_SIZE_RAW_PACKET; + + pkt_buf = kzalloc(pkt_len, GFP_KERNEL); + msk_buf = kzalloc(pkt_len, GFP_KERNEL); + if (!pkt_buf || !msk_buf) { + ret = -ENOMEM; + goto free_alloc; + } + + memcpy(pkt_buf, proto->raw.spec, pkt_len); + memcpy(msk_buf, proto->raw.mask, pkt_len); + + psr = ice_parser_create(hw); + if (IS_ERR(psr)) { + ret = PTR_ERR(psr); + goto free_alloc; + } + + ret = ice_parser_run(psr, pkt_buf, pkt_len, &pkt_parsed); + if (ret) + goto parser_destroy; + + ret = ice_parser_profile_init(&pkt_parsed, pkt_buf, msk_buf, + pkt_len, ICE_BLK_RSS, &prof); + if (ret) + goto parser_destroy; + + memcpy(&raw_cfg->prof, &prof, sizeof(prof)); + +parser_destroy: + ice_parser_destroy(psr); +free_alloc: + kfree(pkt_buf); + kfree(msk_buf); + return ret; +} + +/** + * ice_add_raw_rss_cfg - add RSS configuration for raw pattern + * @vf: pointer to the VF info + * @cfg: pointer to the RSS raw pattern configuration + * + * This function adds the RSS configuration for raw pattern. + * Check if current profile is matched. If not, remove the old + * one and add the new profile to HW directly. Update the symmetric + * hash configuration as well. + * + * Return: 0 on success; negative error code on failure. + */ +static int +ice_add_raw_rss_cfg(struct ice_vf *vf, struct ice_rss_raw_cfg *cfg) +{ + struct ice_parser_profile *prof = &cfg->prof; + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_rss_prof_info *rss_prof; + struct ice_hw *hw = &vf->pf->hw; + int i, ptg, ret = 0; + u16 vsi_handle; + u64 id; + + vsi_handle = vf->lan_vsi_idx; + id = find_first_bit(prof->ptypes, ICE_FLOW_PTYPE_MAX); + + ptg = hw->blk[ICE_BLK_RSS].xlt1.t[id]; + rss_prof = &vf->rss_prof_info[ptg]; + + /* check if ptg already has a profile */ + if (rss_prof->prof.fv_num) { + for (i = 0; i < ICE_MAX_FV_WORDS; i++) { + if (rss_prof->prof.fv[i].proto_id != + prof->fv[i].proto_id || + rss_prof->prof.fv[i].offset != + prof->fv[i].offset) + break; + } + + /* current profile is matched, check symmetric hash */ + if (i == ICE_MAX_FV_WORDS) { + if (rss_prof->symm != cfg->symm) + goto update_symm; + return ret; + } + + /* current profile is not matched, remove it */ + ret = + ice_rem_prof_id_flow(hw, ICE_BLK_RSS, + ice_get_hw_vsi_num(hw, vsi_handle), + id); + if (ret) { + dev_err(dev, "remove RSS flow failed\n"); + return ret; + } + + ret = ice_rem_prof(hw, ICE_BLK_RSS, id); + if (ret) { + dev_err(dev, "remove RSS profile failed\n"); + return ret; + } + } + + /* add new profile */ + ret = ice_flow_set_parser_prof(hw, vsi_handle, 0, prof, ICE_BLK_RSS); + if (ret) { + dev_err(dev, "HW profile add failed\n"); + return ret; + } + + memcpy(&rss_prof->prof, prof, sizeof(struct ice_parser_profile)); + +update_symm: + rss_prof->symm = cfg->symm; + ice_rss_update_raw_symm(hw, cfg, id); + return ret; +} + +/** + * ice_rem_raw_rss_cfg - remove RSS configuration for raw pattern + * @vf: pointer to the VF info + * @cfg: pointer to the RSS raw pattern configuration + * + * This function removes the RSS configuration for raw pattern. + * Check if vsi group is already removed first. If not, remove the + * profile. + * + * Return: 0 on success; negative error code on failure. + */ +static int +ice_rem_raw_rss_cfg(struct ice_vf *vf, struct ice_rss_raw_cfg *cfg) +{ + struct ice_parser_profile *prof = &cfg->prof; + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_hw *hw = &vf->pf->hw; + int ptg, ret = 0; + u16 vsig, vsi; + u64 id; + + id = find_first_bit(prof->ptypes, ICE_FLOW_PTYPE_MAX); + + ptg = hw->blk[ICE_BLK_RSS].xlt1.t[id]; + + memset(&vf->rss_prof_info[ptg], 0, + sizeof(struct ice_rss_prof_info)); + + /* check if vsig is already removed */ + vsi = ice_get_hw_vsi_num(hw, vf->lan_vsi_idx); + if (vsi >= ICE_MAX_VSI) { + ret = -EINVAL; + goto err; + } + + vsig = hw->blk[ICE_BLK_RSS].xlt2.vsis[vsi].vsig; + if (vsig) { + ret = ice_rem_prof_id_flow(hw, ICE_BLK_RSS, vsi, id); + if (ret) + goto err; + + ret = ice_rem_prof(hw, ICE_BLK_RSS, id); + if (ret) + goto err; + } + + return ret; + +err: + dev_err(dev, "HW profile remove failed\n"); + return ret; +} + /** * ice_vc_handle_rss_cfg * @vf: pointer to the VF info @@ -569,6 +1569,9 @@ int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add) struct device *dev = ice_pf_to_dev(vf->pf); struct ice_hw *hw = &vf->pf->hw; struct ice_vsi *vsi; + u8 hash_type; + bool symm; + int ret; if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { dev_dbg(dev, "VF %d attempting to configure RSS, but RSS is not supported by the PF\n", @@ -604,49 +1607,44 @@ int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add) goto error_param; } - if (!ice_vc_validate_pattern(vf, &rss_cfg->proto_hdrs)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; + if (rss_cfg->rss_algorithm == VIRTCHNL_RSS_ALG_R_ASYMMETRIC) { + hash_type = add ? ICE_AQ_VSI_Q_OPT_RSS_HASH_XOR : + ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ; + + ret = ice_vc_rss_hash_update(hw, vsi, hash_type); + if (ret) + v_ret = ice_err_to_virt_err(ret); goto error_param; } - if (rss_cfg->rss_algorithm == VIRTCHNL_RSS_ALG_R_ASYMMETRIC) { - struct ice_vsi_ctx *ctx; - u8 lut_type, hash_type; - int status; + hash_type = add ? ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ : + ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ; + ret = ice_vc_rss_hash_update(hw, vsi, hash_type); + if (ret) { + v_ret = ice_err_to_virt_err(ret); + goto error_param; + } - lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI; - hash_type = add ? ICE_AQ_VSI_Q_OPT_RSS_HASH_XOR : - ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ; + symm = rss_cfg->rss_algorithm == VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC; + /* Configure RSS hash for raw pattern */ + if (rss_cfg->proto_hdrs.tunnel_level == 0 && + rss_cfg->proto_hdrs.count == 0) { + struct ice_rss_raw_cfg raw_cfg; - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) { - v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; + if (ice_parse_raw_rss_pattern(vf, &rss_cfg->proto_hdrs, + &raw_cfg)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - ctx->info.q_opt_rss = - FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_LUT_M, lut_type) | - FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_HASH_M, hash_type); - - /* Preserve existing queueing option setting */ - ctx->info.q_opt_rss |= (vsi->info.q_opt_rss & - ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M); - ctx->info.q_opt_tc = vsi->info.q_opt_tc; - ctx->info.q_opt_flags = vsi->info.q_opt_rss; - - ctx->info.valid_sections = - cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID); - - status = ice_update_vsi(hw, vsi->idx, ctx, NULL); - if (status) { - dev_err(dev, "update VSI for RSS failed, err %d aq_err %s\n", - status, libie_aq_str(hw->adminq.sq_last_status)); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; + if (add) { + raw_cfg.symm = symm; + if (ice_add_raw_rss_cfg(vf, &raw_cfg)) + v_ret = VIRTCHNL_STATUS_ERR_PARAM; } else { - vsi->info.q_opt_rss = ctx->info.q_opt_rss; + if (ice_rem_raw_rss_cfg(vf, &raw_cfg)) + v_ret = VIRTCHNL_STATUS_ERR_PARAM; } - - kfree(ctx); } else { struct ice_rss_hash_cfg cfg; @@ -665,24 +1663,12 @@ int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add) } if (add) { - if (ice_add_rss_cfg(hw, vsi, &cfg)) { + cfg.symm = symm; + if (ice_add_rss_cfg_wrap(vf, &cfg)) v_ret = VIRTCHNL_STATUS_ERR_PARAM; - dev_err(dev, "ice_add_rss_cfg failed for vsi = %d, v_ret = %d\n", - vsi->vsi_num, v_ret); - } } else { - int status; - - status = ice_rem_rss_cfg(hw, vsi->idx, &cfg); - /* We just ignore -ENOENT, because if two configurations - * share the same profile remove one of them actually - * removes both, since the profile is deleted. - */ - if (status && status != -ENOENT) { + if (ice_rem_rss_cfg_wrap(vf, &cfg)) v_ret = VIRTCHNL_STATUS_ERR_PARAM; - dev_err(dev, "ice_rem_rss_cfg failed for VF ID:%d, error:%d\n", - vf->vf_id, status); - } } } From f89e4e1512333fcb92474e0e8487929b352c50de Mon Sep 17 00:00:00 2001 From: Aleksandr Loktionov Date: Thu, 30 Oct 2025 14:59:48 +0100 Subject: [PATCH 629/867] ice: improve TCAM priority handling for RSS profiles Enhance TCAM priority logic to avoid conflicts between RSS profiles with overlapping PTGs and attributes. Track used PTG and attribute combinations. Ensure higher-priority profiles override lower ones. Add helper for setting TCAM flags and masks. Ensure RSS rule consistency and prevent unintended matches. Co-developed-by: Dan Nowlin Signed-off-by: Dan Nowlin Signed-off-by: Przemek Kitszel Reviewed-by: Simon Horman Signed-off-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/ice/ice_flex_pipe.c | 91 ++++++++++++++++--- .../net/ethernet/intel/ice/ice_flex_type.h | 1 + 2 files changed, 78 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index c8cb492fddf46..c0dbec3693669 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -3577,6 +3577,19 @@ ice_move_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 vsig, return 0; } +/** + * ice_set_tcam_flags - set TCAM flag don't care mask + * @mask: mask for flags + * @dc_mask: pointer to the don't care mask + */ +static void ice_set_tcam_flags(u16 mask, u8 dc_mask[ICE_TCAM_KEY_VAL_SZ]) +{ + u16 inverted_mask = ~mask; + + /* flags are lowest u16 */ + put_unaligned_le16(inverted_mask, dc_mask); +} + /** * ice_rem_chg_tcam_ent - remove a specific TCAM entry from change list * @hw: pointer to the HW struct @@ -3647,6 +3660,9 @@ ice_prof_tcam_ena_dis(struct ice_hw *hw, enum ice_block blk, bool enable, if (!p) return -ENOMEM; + /* set don't care masks for TCAM flags */ + ice_set_tcam_flags(tcam->attr.mask, dc_msk); + status = ice_tcam_write_entry(hw, blk, tcam->tcam_idx, tcam->prof_id, tcam->ptg, vsig, 0, tcam->attr.flags, vl_msk, dc_msk, nm_msk); @@ -3672,6 +3688,34 @@ ice_prof_tcam_ena_dis(struct ice_hw *hw, enum ice_block blk, bool enable, return status; } +/** + * ice_ptg_attr_in_use - determine if PTG and attribute pair is in use + * @ptg_attr: pointer to the PTG and attribute pair to check + * @ptgs_used: bitmap that denotes which PTGs are in use + * @attr_used: array of PTG and attributes pairs already used + * @attr_cnt: count of entries in the attr_used array + * + * Return: true if the PTG and attribute pair is in use, false otherwise. + */ +static bool +ice_ptg_attr_in_use(struct ice_tcam_inf *ptg_attr, unsigned long *ptgs_used, + struct ice_tcam_inf *attr_used[], u16 attr_cnt) +{ + u16 i; + + if (!test_bit(ptg_attr->ptg, ptgs_used)) + return false; + + /* the PTG is used, so now look for correct attributes */ + for (i = 0; i < attr_cnt; i++) + if (attr_used[i]->ptg == ptg_attr->ptg && + attr_used[i]->attr.flags == ptg_attr->attr.flags && + attr_used[i]->attr.mask == ptg_attr->attr.mask) + return true; + + return false; +} + /** * ice_adj_prof_priorities - adjust profile based on priorities * @hw: pointer to the HW struct @@ -3684,10 +3728,16 @@ ice_adj_prof_priorities(struct ice_hw *hw, enum ice_block blk, u16 vsig, struct list_head *chg) { DECLARE_BITMAP(ptgs_used, ICE_XLT1_CNT); + struct ice_tcam_inf **attr_used; struct ice_vsig_prof *t; - int status; + u16 attr_used_cnt = 0; + int status = 0; u16 idx; + attr_used = kcalloc(ICE_MAX_PTG_ATTRS, sizeof(*attr_used), GFP_KERNEL); + if (!attr_used) + return -ENOMEM; + bitmap_zero(ptgs_used, ICE_XLT1_CNT); idx = vsig & ICE_VSIG_IDX_M; @@ -3705,11 +3755,15 @@ ice_adj_prof_priorities(struct ice_hw *hw, enum ice_block blk, u16 vsig, u16 i; for (i = 0; i < t->tcam_count; i++) { + bool used; + /* Scan the priorities from newest to oldest. * Make sure that the newest profiles take priority. */ - if (test_bit(t->tcam[i].ptg, ptgs_used) && - t->tcam[i].in_use) { + used = ice_ptg_attr_in_use(&t->tcam[i], ptgs_used, + attr_used, attr_used_cnt); + + if (used && t->tcam[i].in_use) { /* need to mark this PTG as never match, as it * was already in use and therefore duplicate * (and lower priority) @@ -3719,9 +3773,8 @@ ice_adj_prof_priorities(struct ice_hw *hw, enum ice_block blk, u16 vsig, &t->tcam[i], chg); if (status) - return status; - } else if (!test_bit(t->tcam[i].ptg, ptgs_used) && - !t->tcam[i].in_use) { + goto free_attr_used; + } else if (!used && !t->tcam[i].in_use) { /* need to enable this PTG, as it in not in use * and not enabled (highest priority) */ @@ -3730,15 +3783,21 @@ ice_adj_prof_priorities(struct ice_hw *hw, enum ice_block blk, u16 vsig, &t->tcam[i], chg); if (status) - return status; + goto free_attr_used; } /* keep track of used ptgs */ - __set_bit(t->tcam[i].ptg, ptgs_used); + set_bit(t->tcam[i].ptg, ptgs_used); + if (attr_used_cnt < ICE_MAX_PTG_ATTRS) + attr_used[attr_used_cnt++] = &t->tcam[i]; + else + ice_debug(hw, ICE_DBG_INIT, "Warn: ICE_MAX_PTG_ATTRS exceeded\n"); } } - return 0; +free_attr_used: + kfree(attr_used); + return status; } /** @@ -3821,11 +3880,15 @@ ice_add_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl, p->vsig = vsig; p->tcam_idx = t->tcam[i].tcam_idx; + /* set don't care masks for TCAM flags */ + ice_set_tcam_flags(t->tcam[i].attr.mask, dc_msk); + /* write the TCAM entry */ status = ice_tcam_write_entry(hw, blk, t->tcam[i].tcam_idx, t->tcam[i].prof_id, - t->tcam[i].ptg, vsig, 0, 0, - vl_msk, dc_msk, nm_msk); + t->tcam[i].ptg, vsig, 0, + t->tcam[i].attr.flags, vl_msk, + dc_msk, nm_msk); if (status) { devm_kfree(ice_hw_to_dev(hw), p); goto err_ice_add_prof_id_vsig; @@ -4139,9 +4202,6 @@ ice_flow_assoc_fdir_prof(struct ice_hw *hw, enum ice_block blk, u16 vsi_num; int status; - if (blk != ICE_BLK_FD) - return -EINVAL; - vsi_num = ice_get_hw_vsi_num(hw, dest_vsi); status = ice_add_prof_id_flow(hw, blk, vsi_num, hdl); if (status) { @@ -4150,6 +4210,9 @@ ice_flow_assoc_fdir_prof(struct ice_hw *hw, enum ice_block blk, return status; } + if (blk != ICE_BLK_FD) + return 0; + vsi_num = ice_get_hw_vsi_num(hw, fdir_vsi); status = ice_add_prof_id_flow(hw, blk, vsi_num, hdl); if (status) { diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h index 817beca591e0c..80c9e7c749c22 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_type.h +++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h @@ -187,6 +187,7 @@ struct ice_prof_map { }; #define ICE_INVALID_TCAM 0xFFFF +#define ICE_MAX_PTG_ATTRS 1024 struct ice_tcam_inf { u16 tcam_idx; From 41e880eb8482499f0acb6371b30fe7d8dd44eb8b Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Thu, 30 Oct 2025 14:59:49 +0100 Subject: [PATCH 630/867] ice: Extend PTYPE bitmap coverage for GTP encapsulated flows Consolidate updates to the Protocol Type (PTYPE) bitmap definitions across multiple flow types in the Intel ICE driver to support GTP (GPRS Tunneling Protocol) encapsulated traffic. Enable improved Receive Side Scaling (RSS) configuration for both user and control plane GTP flows. Cover a wide range of protocol and encapsulation scenarios, including: - MAC OFOS and IL - IPv4 and IPv6 (OFOS, IL, ALL, no-L4) - TCP, SCTP, ICMP - GRE OF - GTPC (control plane) Expand the PTYPE bitmap entries to improve classification and distribution of GTP traffic across multiple queues, enhancing performance and scalability in mobile network environments. Co-developed-by: Dan Nowlin Signed-off-by: Dan Nowlin Co-developed-by: Qi Zhang Signed-off-by: Qi Zhang Co-developed-by: Jie Wang Signed-off-by: Jie Wang Co-developed-by: Junfeng Guo Signed-off-by: Junfeng Guo Signed-off-by: Przemek Kitszel Reviewed-by: Jedrzej Jagielski Reviewed-by: Simon Horman Signed-off-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_flow.c | 52 +++++++++++------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c index a2f2a612428d5..c9b6d0a84bd16 100644 --- a/drivers/net/ethernet/intel/ice/ice_flow.c +++ b/drivers/net/ethernet/intel/ice/ice_flow.c @@ -220,9 +220,9 @@ struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = { */ static const u32 ice_ptypes_mac_ofos[] = { 0xFDC00846, 0xBFBF7F7E, 0xF70001DF, 0xFEFDFDFB, - 0x0000077E, 0x00000000, 0x00000000, 0x00000000, - 0x00400000, 0x03FFF000, 0x7FFFFFE0, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x0000077E, 0x000003FF, 0x00000000, 0x00000000, + 0x00400000, 0x03FFF000, 0xFFFFFFE0, 0x00000707, + 0xFFFFF000, 0x000003FF, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -245,10 +245,10 @@ static const u32 ice_ptypes_macvlan_il[] = { * include IPv4 other PTYPEs */ static const u32 ice_ptypes_ipv4_ofos[] = { - 0x1DC00000, 0x04000800, 0x00000000, 0x00000000, + 0x1D800000, 0xBFBF7800, 0x000001DF, 0x00000000, 0x00000000, 0x00000155, 0x00000000, 0x00000000, - 0x00000000, 0x000FC000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x000FC000, 0x000002A0, 0x00000000, + 0x00015000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -259,10 +259,10 @@ static const u32 ice_ptypes_ipv4_ofos[] = { * IPv4 other PTYPEs */ static const u32 ice_ptypes_ipv4_ofos_all[] = { - 0x1DC00000, 0x04000800, 0x00000000, 0x00000000, + 0x1D800000, 0x27BF7800, 0x00000000, 0x00000000, 0x00000000, 0x00000155, 0x00000000, 0x00000000, - 0x00000000, 0x000FC000, 0x83E0F800, 0x00000101, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x000FC000, 0x83E0FAA0, 0x00000101, + 0x3FFD5000, 0x00000000, 0x02FBEFBC, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -274,7 +274,7 @@ static const u32 ice_ptypes_ipv4_il[] = { 0xE0000000, 0xB807700E, 0x80000003, 0xE01DC03B, 0x0000000E, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x001FF800, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xC0FC0000, 0x0000000F, 0xBC0BC0BC, 0x00000BC0, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -285,10 +285,10 @@ static const u32 ice_ptypes_ipv4_il[] = { * include IPv6 other PTYPEs */ static const u32 ice_ptypes_ipv6_ofos[] = { - 0x00000000, 0x00000000, 0x77000000, 0x10002000, + 0x00000000, 0x00000000, 0x76000000, 0x10002000, 0x00000000, 0x000002AA, 0x00000000, 0x00000000, - 0x00000000, 0x03F00000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x03F00000, 0x00000540, 0x00000000, + 0x0002A000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -299,10 +299,10 @@ static const u32 ice_ptypes_ipv6_ofos[] = { * IPv6 other PTYPEs */ static const u32 ice_ptypes_ipv6_ofos_all[] = { - 0x00000000, 0x00000000, 0x77000000, 0x10002000, - 0x00000000, 0x000002AA, 0x00000000, 0x00000000, - 0x00080F00, 0x03F00000, 0x7C1F0000, 0x00000206, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x76000000, 0xFEFDE000, + 0x0000077E, 0x000002AA, 0x00000000, 0x00000000, + 0x00000000, 0x03F00000, 0x7C1F0540, 0x00000206, + 0xC002A000, 0x000003FF, 0xBC000000, 0x0002FBEF, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -314,7 +314,7 @@ static const u32 ice_ptypes_ipv6_il[] = { 0x00000000, 0x03B80770, 0x000001DC, 0x0EE00000, 0x00000770, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x7FE00000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x3F000000, 0x000003F0, 0x02F02F00, 0x0002F02F, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -387,8 +387,8 @@ static const u32 ice_ptypes_ipv6_il_no_l4[] = { static const u32 ice_ptypes_udp_il[] = { 0x81000000, 0x20204040, 0x04000010, 0x80810102, 0x00000040, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00410000, 0x90842000, 0x00000007, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00410000, 0x908427E0, 0x00000007, + 0x0413F000, 0x00000041, 0x10410410, 0x00004104, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -400,7 +400,7 @@ static const u32 ice_ptypes_tcp_il[] = { 0x04000000, 0x80810102, 0x10000040, 0x02040408, 0x00000102, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00820000, 0x21084000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x08200000, 0x00000082, 0x20820820, 0x00008208, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -412,7 +412,7 @@ static const u32 ice_ptypes_sctp_il[] = { 0x08000000, 0x01020204, 0x20000081, 0x04080810, 0x00000204, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x01040000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x10400000, 0x00000104, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -436,7 +436,7 @@ static const u32 ice_ptypes_icmp_il[] = { 0x00000000, 0x02040408, 0x40000102, 0x08101020, 0x00000408, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x42108000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x20800000, 0x00000208, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -448,7 +448,7 @@ static const u32 ice_ptypes_gre_of[] = { 0x00000000, 0xBFBF7800, 0x000001DF, 0xFEFDE000, 0x0000017E, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0xBEFBEFBC, 0x0002FBEF, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -457,7 +457,7 @@ static const u32 ice_ptypes_gre_of[] = { /* Packet types for packets with an Innermost/Last MAC header */ static const u32 ice_ptypes_mac_il[] = { - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x20000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -471,7 +471,7 @@ static const u32 ice_ptypes_mac_il[] = { static const u32 ice_ptypes_gtpc[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000180, 0x00000000, + 0x00000000, 0x00000000, 0x000001E0, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, From 3da28eb277c1c175df1aa048e404cc765ae04327 Mon Sep 17 00:00:00 2001 From: Aleksandr Loktionov Date: Thu, 30 Oct 2025 14:59:50 +0100 Subject: [PATCH 631/867] iavf: add RSS support for GTP protocol via ethtool Extend the iavf driver to support Receive Side Scaling (RSS) configuration for GTP (GPRS Tunneling Protocol) flows using ethtool. The implementation introduces new RSS flow segment headers and hash field definitions for various GTP encapsulations, including: - GTPC - GTPU (IP, Extension Header, Uplink, Downlink) - TEID-based hashing The ethtool interface is updated to parse and apply these new flow types and hash fields, enabling fine-grained traffic distribution for GTP-based mobile workloads. This enhancement improves performance and scalability for virtualized network functions (VNFs) and user plane functions (UPFs) in 5G and LTE deployments. Reviewed-by: Jedrzej Jagielski Reviewed-by: Simon Horman Signed-off-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/iavf/iavf_adv_rss.c | 119 ++++++++++++++---- .../net/ethernet/intel/iavf/iavf_adv_rss.h | 31 +++++ .../net/ethernet/intel/iavf/iavf_ethtool.c | 89 +++++++++++++ 3 files changed, 216 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_adv_rss.c b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.c index a9e1da35e2489..4d12dfe1b4819 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_adv_rss.c +++ b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.c @@ -90,6 +90,55 @@ iavf_fill_adv_rss_sctp_hdr(struct virtchnl_proto_hdr *hdr, u64 hash_flds) VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, SCTP, DST_PORT); } +/** + * iavf_fill_adv_rss_gtp_hdr - Fill GTP-related RSS protocol headers + * @proto_hdrs: pointer to the virtchnl protocol headers structure to populate + * @packet_hdrs: bitmask of packet header types to configure + * @hash_flds: RSS hash field configuration + * + * This function populates the virtchnl protocol header structure with + * appropriate GTP-related header types based on the specified packet_hdrs. + * It supports GTPC, GTPU with extension headers, and uplink/downlink PDU + * types. For certain GTPU types, it also appends an IPv4 header to enable + * hashing on the destination IP address. + * + * Return: 0 on success or -EOPNOTSUPP if the packet_hdrs value is unsupported. + */ +static int +iavf_fill_adv_rss_gtp_hdr(struct virtchnl_proto_hdrs *proto_hdrs, + u32 packet_hdrs, u64 hash_flds) +{ + struct virtchnl_proto_hdr *hdr; + + hdr = &proto_hdrs->proto_hdr[proto_hdrs->count - 1]; + + switch (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_GTP) { + case IAVF_ADV_RSS_FLOW_SEG_HDR_GTPC_TEID: + case IAVF_ADV_RSS_FLOW_SEG_HDR_GTPC: + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, GTPC); + break; + case IAVF_ADV_RSS_FLOW_SEG_HDR_GTPU_EH: + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, GTPU_EH); + break; + case IAVF_ADV_RSS_FLOW_SEG_HDR_GTPU_UP: + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, GTPU_EH_PDU_UP); + hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++]; + iavf_fill_adv_rss_ip4_hdr(hdr, IAVF_ADV_RSS_HASH_FLD_IPV4_DA); + break; + case IAVF_ADV_RSS_FLOW_SEG_HDR_GTPU_DWN: + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, GTPU_EH_PDU_DWN); + fallthrough; + case IAVF_ADV_RSS_FLOW_SEG_HDR_GTPU_IP: + hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++]; + iavf_fill_adv_rss_ip4_hdr(hdr, IAVF_ADV_RSS_HASH_FLD_IPV4_DA); + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + /** * iavf_fill_adv_rss_cfg_msg - fill the RSS configuration into virtchnl message * @rss_cfg: the virtchnl message to be filled with RSS configuration setting @@ -103,6 +152,8 @@ int iavf_fill_adv_rss_cfg_msg(struct virtchnl_rss_cfg *rss_cfg, u32 packet_hdrs, u64 hash_flds, bool symm) { + const u32 packet_l3_hdrs = packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_L3; + const u32 packet_l4_hdrs = packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_L4; struct virtchnl_proto_hdrs *proto_hdrs = &rss_cfg->proto_hdrs; struct virtchnl_proto_hdr *hdr; @@ -113,31 +164,41 @@ iavf_fill_adv_rss_cfg_msg(struct virtchnl_rss_cfg *rss_cfg, proto_hdrs->tunnel_level = 0; /* always outer layer */ - hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++]; - switch (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_L3) { - case IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4: - iavf_fill_adv_rss_ip4_hdr(hdr, hash_flds); - break; - case IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6: - iavf_fill_adv_rss_ip6_hdr(hdr, hash_flds); - break; - default: - return -EINVAL; + if (packet_l3_hdrs) { + hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++]; + switch (packet_l3_hdrs) { + case IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4: + iavf_fill_adv_rss_ip4_hdr(hdr, hash_flds); + break; + case IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6: + iavf_fill_adv_rss_ip6_hdr(hdr, hash_flds); + break; + default: + return -EINVAL; + } } - hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++]; - switch (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_L4) { - case IAVF_ADV_RSS_FLOW_SEG_HDR_TCP: - iavf_fill_adv_rss_tcp_hdr(hdr, hash_flds); - break; - case IAVF_ADV_RSS_FLOW_SEG_HDR_UDP: - iavf_fill_adv_rss_udp_hdr(hdr, hash_flds); - break; - case IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP: - iavf_fill_adv_rss_sctp_hdr(hdr, hash_flds); - break; - default: - return -EINVAL; + if (packet_l4_hdrs) { + hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++]; + switch (packet_l4_hdrs) { + case IAVF_ADV_RSS_FLOW_SEG_HDR_TCP: + iavf_fill_adv_rss_tcp_hdr(hdr, hash_flds); + break; + case IAVF_ADV_RSS_FLOW_SEG_HDR_UDP: + iavf_fill_adv_rss_udp_hdr(hdr, hash_flds); + break; + case IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP: + iavf_fill_adv_rss_sctp_hdr(hdr, hash_flds); + break; + default: + return -EINVAL; + } + } + + if (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_GTP) { + hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++]; + if (iavf_fill_adv_rss_gtp_hdr(proto_hdrs, packet_hdrs, hash_flds)) + return -EINVAL; } return 0; @@ -186,6 +247,8 @@ iavf_print_adv_rss_cfg(struct iavf_adapter *adapter, struct iavf_adv_rss *rss, proto = "UDP"; else if (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP) proto = "SCTP"; + else if (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_GTP) + proto = "GTP"; else return; @@ -211,6 +274,16 @@ iavf_print_adv_rss_cfg(struct iavf_adapter *adapter, struct iavf_adv_rss *rss, IAVF_ADV_RSS_HASH_FLD_UDP_DST_PORT | IAVF_ADV_RSS_HASH_FLD_SCTP_DST_PORT)) strcat(hash_opt, "dst port,"); + if (hash_flds & IAVF_ADV_RSS_HASH_FLD_GTPC_TEID) + strcat(hash_opt, "gtp-c,"); + if (hash_flds & IAVF_ADV_RSS_HASH_FLD_GTPU_IP_TEID) + strcat(hash_opt, "gtp-u ip,"); + if (hash_flds & IAVF_ADV_RSS_HASH_FLD_GTPU_EH_TEID) + strcat(hash_opt, "gtp-u ext,"); + if (hash_flds & IAVF_ADV_RSS_HASH_FLD_GTPU_UP_TEID) + strcat(hash_opt, "gtp-u ul,"); + if (hash_flds & IAVF_ADV_RSS_HASH_FLD_GTPU_DWN_TEID) + strcat(hash_opt, "gtp-u dl,"); if (!action) action = ""; diff --git a/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h index e31eb2afebeab..74cc9e0d528c8 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h +++ b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h @@ -22,6 +22,12 @@ enum iavf_adv_rss_flow_seg_hdr { IAVF_ADV_RSS_FLOW_SEG_HDR_TCP = 0x00000004, IAVF_ADV_RSS_FLOW_SEG_HDR_UDP = 0x00000008, IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP = 0x00000010, + IAVF_ADV_RSS_FLOW_SEG_HDR_GTPC = 0x00000400, + IAVF_ADV_RSS_FLOW_SEG_HDR_GTPC_TEID = 0x00000800, + IAVF_ADV_RSS_FLOW_SEG_HDR_GTPU_IP = 0x00001000, + IAVF_ADV_RSS_FLOW_SEG_HDR_GTPU_EH = 0x00002000, + IAVF_ADV_RSS_FLOW_SEG_HDR_GTPU_DWN = 0x00004000, + IAVF_ADV_RSS_FLOW_SEG_HDR_GTPU_UP = 0x00008000, }; #define IAVF_ADV_RSS_FLOW_SEG_HDR_L3 \ @@ -33,6 +39,14 @@ enum iavf_adv_rss_flow_seg_hdr { IAVF_ADV_RSS_FLOW_SEG_HDR_UDP | \ IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP) +#define IAVF_ADV_RSS_FLOW_SEG_HDR_GTP \ + (IAVF_ADV_RSS_FLOW_SEG_HDR_GTPC | \ + IAVF_ADV_RSS_FLOW_SEG_HDR_GTPC_TEID | \ + IAVF_ADV_RSS_FLOW_SEG_HDR_GTPU_IP | \ + IAVF_ADV_RSS_FLOW_SEG_HDR_GTPU_EH | \ + IAVF_ADV_RSS_FLOW_SEG_HDR_GTPU_DWN | \ + IAVF_ADV_RSS_FLOW_SEG_HDR_GTPU_UP) + enum iavf_adv_rss_flow_field { /* L3 */ IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV4_SA, @@ -46,6 +60,17 @@ enum iavf_adv_rss_flow_field { IAVF_ADV_RSS_FLOW_FIELD_IDX_UDP_DST_PORT, IAVF_ADV_RSS_FLOW_FIELD_IDX_SCTP_SRC_PORT, IAVF_ADV_RSS_FLOW_FIELD_IDX_SCTP_DST_PORT, + /* GTPC_TEID */ + IAVF_ADV_RSS_FLOW_FIELD_IDX_GTPC_TEID, + /* GTPU_IP */ + IAVF_ADV_RSS_FLOW_FIELD_IDX_GTPU_IP_TEID, + /* GTPU_EH */ + IAVF_ADV_RSS_FLOW_FIELD_IDX_GTPU_EH_TEID, + IAVF_ADV_RSS_FLOW_FIELD_IDX_GTPU_EH_QFI, + /* GTPU_UP */ + IAVF_ADV_RSS_FLOW_FIELD_IDX_GTPU_UP_TEID, + /* GTPU_DWN */ + IAVF_ADV_RSS_FLOW_FIELD_IDX_GTPU_DWN_TEID, /* The total number of enums must not exceed 64 */ IAVF_ADV_RSS_FLOW_FIELD_IDX_MAX @@ -72,6 +97,12 @@ enum iavf_adv_rss_flow_field { BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_SCTP_SRC_PORT) #define IAVF_ADV_RSS_HASH_FLD_SCTP_DST_PORT \ BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_SCTP_DST_PORT) +#define IAVF_ADV_RSS_HASH_FLD_GTPC_TEID BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_GTPC_TEID) +#define IAVF_ADV_RSS_HASH_FLD_GTPU_IP_TEID BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_GTPU_IP_TEID) +#define IAVF_ADV_RSS_HASH_FLD_GTPU_EH_TEID BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_GTPU_EH_TEID) +#define IAVF_ADV_RSS_HASH_FLD_GTPU_UP_TEID BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_GTPU_UP_TEID) +#define IAVF_ADV_RSS_HASH_FLD_GTPU_DWN_TEID \ + BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_GTPU_DWN_TEID) /* bookkeeping of advanced RSS configuration */ struct iavf_adv_rss { diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 05d72be3fe80c..a3f8ced232662 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -1336,6 +1336,56 @@ static u32 iavf_adv_rss_parse_hdrs(const struct ethtool_rxfh_fields *cmd) hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP | IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6; break; + case GTPU_V4_FLOW: + hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_GTPU_IP | + IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4; + break; + case GTPC_V4_FLOW: + hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_GTPC | + IAVF_ADV_RSS_FLOW_SEG_HDR_UDP | + IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4; + break; + case GTPC_TEID_V4_FLOW: + hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_GTPC_TEID | + IAVF_ADV_RSS_FLOW_SEG_HDR_UDP | + IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4; + break; + case GTPU_EH_V4_FLOW: + hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_GTPU_EH | + IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4; + break; + case GTPU_UL_V4_FLOW: + hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_GTPU_UP | + IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4; + break; + case GTPU_DL_V4_FLOW: + hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_GTPU_DWN | + IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4; + break; + case GTPU_V6_FLOW: + hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_GTPU_IP | + IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6; + break; + case GTPC_V6_FLOW: + hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_GTPC | + IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6; + break; + case GTPC_TEID_V6_FLOW: + hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_GTPC_TEID | + IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6; + break; + case GTPU_EH_V6_FLOW: + hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_GTPU_EH | + IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6; + break; + case GTPU_UL_V6_FLOW: + hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_GTPU_UP | + IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6; + break; + case GTPU_DL_V6_FLOW: + hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_GTPU_DWN | + IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6; + break; default: break; } @@ -1353,6 +1403,12 @@ iavf_adv_rss_parse_hash_flds(const struct ethtool_rxfh_fields *cmd, bool symm) case TCP_V4_FLOW: case UDP_V4_FLOW: case SCTP_V4_FLOW: + case GTPU_V4_FLOW: + case GTPC_V4_FLOW: + case GTPC_TEID_V4_FLOW: + case GTPU_EH_V4_FLOW: + case GTPU_UL_V4_FLOW: + case GTPU_DL_V4_FLOW: if (cmd->data & RXH_IP_SRC) hfld |= IAVF_ADV_RSS_HASH_FLD_IPV4_SA; if (cmd->data & RXH_IP_DST) @@ -1361,6 +1417,12 @@ iavf_adv_rss_parse_hash_flds(const struct ethtool_rxfh_fields *cmd, bool symm) case TCP_V6_FLOW: case UDP_V6_FLOW: case SCTP_V6_FLOW: + case GTPU_V6_FLOW: + case GTPC_V6_FLOW: + case GTPC_TEID_V6_FLOW: + case GTPU_EH_V6_FLOW: + case GTPU_UL_V6_FLOW: + case GTPU_DL_V6_FLOW: if (cmd->data & RXH_IP_SRC) hfld |= IAVF_ADV_RSS_HASH_FLD_IPV6_SA; if (cmd->data & RXH_IP_DST) @@ -1382,6 +1444,7 @@ iavf_adv_rss_parse_hash_flds(const struct ethtool_rxfh_fields *cmd, bool symm) break; case UDP_V4_FLOW: case UDP_V6_FLOW: + case GTPC_V4_FLOW: if (cmd->data & RXH_L4_B_0_1) hfld |= IAVF_ADV_RSS_HASH_FLD_UDP_SRC_PORT; if (cmd->data & RXH_L4_B_2_3) @@ -1398,6 +1461,32 @@ iavf_adv_rss_parse_hash_flds(const struct ethtool_rxfh_fields *cmd, bool symm) break; } } + if (cmd->data & RXH_GTP_TEID) { + switch (cmd->flow_type) { + case GTPC_TEID_V4_FLOW: + case GTPC_TEID_V6_FLOW: + hfld |= IAVF_ADV_RSS_HASH_FLD_GTPC_TEID; + break; + case GTPU_V4_FLOW: + case GTPU_V6_FLOW: + hfld |= IAVF_ADV_RSS_HASH_FLD_GTPU_IP_TEID; + break; + case GTPU_EH_V4_FLOW: + case GTPU_EH_V6_FLOW: + hfld |= IAVF_ADV_RSS_HASH_FLD_GTPU_EH_TEID; + break; + case GTPU_UL_V4_FLOW: + case GTPU_UL_V6_FLOW: + hfld |= IAVF_ADV_RSS_HASH_FLD_GTPU_UP_TEID; + break; + case GTPU_DL_V4_FLOW: + case GTPU_DL_V6_FLOW: + hfld |= IAVF_ADV_RSS_HASH_FLD_GTPU_DWN_TEID; + break; + default: + break; + } + } return hfld; } From c04956cccb78cc233a20cc18f663689671f03c65 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 5 Nov 2025 10:01:12 -0800 Subject: [PATCH 632/867] tg3: extract GRXRINGS from .get_rxnfc Commit 84eaf4359c36 ("net: ethtool: add get_rx_ring_count callback to optimize RX ring queries") added specific support for GRXRINGS callback, simplifying .get_rxnfc. Remove the handling of GRXRINGS in .get_rxnfc() by moving it to the new .get_rx_ring_count(). Given that tg3_get_rxnfc() only handles ETHTOOL_GRXRINGS, then this function becomes useless now, and it is removed. This also fixes the behavior for devices without MSIX support. Previously, the function would return -EOPNOTSUPP, but now it correctly returns 1. The functionality remains the same: return the current queue count if the device is running, otherwise return the minimum of online CPUs and TG3_RSS_MAX_NUM_QS. Signed-off-by: Breno Leitao Reviewed-by: Michael Chan Link: https://patch.msgid.link/20251105-grxrings_v1-v1-1-54c2caafa1fd@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/tg3.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index d78cafdb20949..fa58c3ffceb06 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -12719,29 +12719,17 @@ static int tg3_get_sset_count(struct net_device *dev, int sset) } } -static int tg3_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, - u32 *rules __always_unused) +static u32 tg3_get_rx_ring_count(struct net_device *dev) { struct tg3 *tp = netdev_priv(dev); if (!tg3_flag(tp, SUPPORT_MSIX)) - return -EOPNOTSUPP; + return 1; - switch (info->cmd) { - case ETHTOOL_GRXRINGS: - if (netif_running(tp->dev)) - info->data = tp->rxq_cnt; - else { - info->data = num_online_cpus(); - if (info->data > TG3_RSS_MAX_NUM_QS) - info->data = TG3_RSS_MAX_NUM_QS; - } + if (netif_running(tp->dev)) + return tp->rxq_cnt; - return 0; - - default: - return -EOPNOTSUPP; - } + return min(num_online_cpus(), TG3_RSS_MAX_NUM_QS); } static u32 tg3_get_rxfh_indir_size(struct net_device *dev) @@ -14268,7 +14256,7 @@ static const struct ethtool_ops tg3_ethtool_ops = { .get_coalesce = tg3_get_coalesce, .set_coalesce = tg3_set_coalesce, .get_sset_count = tg3_get_sset_count, - .get_rxnfc = tg3_get_rxnfc, + .get_rx_ring_count = tg3_get_rx_ring_count, .get_rxfh_indir_size = tg3_get_rxfh_indir_size, .get_rxfh = tg3_get_rxfh, .set_rxfh = tg3_set_rxfh, From 8a25a2e34157d882032112e4194ccdfb29c499e8 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 5 Nov 2025 22:31:26 +0900 Subject: [PATCH 633/867] net: phy: qt2025: Wait until PHY becomes ready Wait until a PHY becomes ready in the probe callback by using read_poll_timeout function. Reviewed-by: Andrew Lunn Reviewed-by: Alice Ryhl Reviewed-by: Gary Guo Signed-off-by: FUJITA Tomonori Link: https://patch.msgid.link/20251105133126.3221948-1-fujita.tomonori@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/qt2025.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/qt2025.rs b/drivers/net/phy/qt2025.rs index 0b9400dcb4c10..aaaead6512a01 100644 --- a/drivers/net/phy/qt2025.rs +++ b/drivers/net/phy/qt2025.rs @@ -12,6 +12,7 @@ use kernel::c_str; use kernel::error::code; use kernel::firmware::Firmware; +use kernel::io::poll::read_poll_timeout; use kernel::net::phy::{ self, reg::{Mmd, C45}, @@ -19,6 +20,7 @@ use kernel::net::phy::{ }; use kernel::prelude::*; use kernel::sizes::{SZ_16K, SZ_8K}; +use kernel::time::Delta; kernel::module_phy_driver! { drivers: [PhyQT2025], @@ -93,7 +95,13 @@ impl Driver for PhyQT2025 { // The micro-controller will start running from SRAM. dev.write(C45::new(Mmd::PCS, 0xe854), 0x0040)?; - // TODO: sleep here until the hw becomes ready. + read_poll_timeout( + || dev.read(C45::new(Mmd::PCS, 0xd7fd)), + |val| *val != 0x00 && *val != 0x10, + Delta::from_millis(50), + Delta::from_secs(3), + )?; + Ok(()) } From 74d4432421a3e2669fbccc08c0f4fc2980bf0e39 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 5 Nov 2025 21:29:08 +0200 Subject: [PATCH 634/867] docs: netlink: Couple of intro-specs documentation fixes Fix typo "handul" to "handful" and remove outdated limitation stating only generic netlink is supported (we have netlink-raw). Reviewed-by: Carolina Jubran Signed-off-by: Gal Pressman Link: https://patch.msgid.link/20251105192908.686458-1-gal@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/userspace-api/netlink/intro-specs.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/userspace-api/netlink/intro-specs.rst b/Documentation/userspace-api/netlink/intro-specs.rst index a4435ae4628d4..e5ebc617754a3 100644 --- a/Documentation/userspace-api/netlink/intro-specs.rst +++ b/Documentation/userspace-api/netlink/intro-specs.rst @@ -13,10 +13,10 @@ Simple CLI Kernel comes with a simple CLI tool which should be useful when developing Netlink related code. The tool is implemented in Python and can use a YAML specification to issue Netlink requests -to the kernel. Only Generic Netlink is supported. +to the kernel. The tool is located at ``tools/net/ynl/pyynl/cli.py``. It accepts -a handul of arguments, the most important ones are: +a handful of arguments, the most important ones are: - ``--spec`` - point to the spec file - ``--do $name`` / ``--dump $name`` - issue request ``$name`` From 24ab8efb9aea77764dd99d2bad41fd8991223013 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 31 Oct 2025 22:20:55 +0100 Subject: [PATCH 635/867] xsk: Move NETDEV_XDP_ACT_ZC into generic header Move NETDEV_XDP_ACT_ZC into xdp_sock_drv.h header such that external code can reuse it, and rename it into more generic NETDEV_XDP_ACT_XSK. Signed-off-by: Daniel Borkmann Co-developed-by: David Wei Signed-off-by: David Wei Reviewed-by: Maciej Fijalkowski Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20251031212103.310683-7-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- include/net/xdp_sock_drv.h | 4 ++++ net/xdp/xsk_buff_pool.c | 6 +----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 4f2d3268a6769..242e34f771cca 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -12,6 +12,10 @@ #define XDP_UMEM_MIN_CHUNK_SHIFT 11 #define XDP_UMEM_MIN_CHUNK_SIZE (1 << XDP_UMEM_MIN_CHUNK_SHIFT) +#define NETDEV_XDP_ACT_XSK (NETDEV_XDP_ACT_BASIC | \ + NETDEV_XDP_ACT_REDIRECT | \ + NETDEV_XDP_ACT_XSK_ZEROCOPY) + struct xsk_cb_desc { void *src; u8 off; diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 00a4eddaa0cd6..51526034c42ac 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -155,10 +155,6 @@ static void xp_disable_drv_zc(struct xsk_buff_pool *pool) } } -#define NETDEV_XDP_ACT_ZC (NETDEV_XDP_ACT_BASIC | \ - NETDEV_XDP_ACT_REDIRECT | \ - NETDEV_XDP_ACT_XSK_ZEROCOPY) - int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *netdev, u16 queue_id, u16 flags) { @@ -200,7 +196,7 @@ int xp_assign_dev(struct xsk_buff_pool *pool, /* For copy-mode, we are done. */ return 0; - if ((netdev->xdp_features & NETDEV_XDP_ACT_ZC) != NETDEV_XDP_ACT_ZC) { + if ((netdev->xdp_features & NETDEV_XDP_ACT_XSK) != NETDEV_XDP_ACT_XSK) { err = -EOPNOTSUPP; goto err_unreg_pool; } From 25e63e559c41b5caa2ad8f076eba90f9897c0a07 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 31 Oct 2025 22:20:59 +0100 Subject: [PATCH 636/867] netkit: Document fast vs slowpath members via macros Instead of a comment, just use two cachline groups to document the intent for members often accessed in fast or slow path. Signed-off-by: Daniel Borkmann Co-developed-by: David Wei Signed-off-by: David Wei Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20251031212103.310683-11-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- drivers/net/netkit.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c index 492be60f2e70f..0a2fef7caccb9 100644 --- a/drivers/net/netkit.c +++ b/drivers/net/netkit.c @@ -16,17 +16,19 @@ #define DRV_NAME "netkit" struct netkit { - /* Needed in fast-path */ + __cacheline_group_begin(netkit_fastpath); struct net_device __rcu *peer; struct bpf_mprog_entry __rcu *active; enum netkit_action policy; enum netkit_scrub scrub; struct bpf_mprog_bundle bundle; + __cacheline_group_end(netkit_fastpath); - /* Needed in slow-path */ + __cacheline_group_begin(netkit_slowpath); enum netkit_mode mode; bool primary; u32 headroom; + __cacheline_group_end(netkit_slowpath); }; struct netkit_link { From e98d8792929df31ec51710773509f6fd2964ea81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Tue, 4 Nov 2025 23:24:14 +0100 Subject: [PATCH 637/867] net: rswitch: Move definition of S4 gPTP offset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The files rcar_gen4_ptp.{c,h} implements an abstraction of the gPTP support implemented together with different other IP blocks. The first device added which supported this was RSWITCH on R-Car S4. While doing so the RSWITCH R-Car S4 specific offset was added to the generic Gen4 gPTP header file. Move it to the RSWITCH driver to make it clear it only applies to this driver. Signed-off-by: Niklas Söderlund Link: https://patch.msgid.link/20251104222420.882731-2-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/rcar_gen4_ptp.h | 2 -- drivers/net/ethernet/renesas/rswitch_main.c | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/renesas/rcar_gen4_ptp.h b/drivers/net/ethernet/renesas/rcar_gen4_ptp.h index f77e79e473572..536badd798cce 100644 --- a/drivers/net/ethernet/renesas/rcar_gen4_ptp.h +++ b/drivers/net/ethernet/renesas/rcar_gen4_ptp.h @@ -9,8 +9,6 @@ #include -#define RCAR_GEN4_GPTP_OFFSET_S4 0x00018000 - /* driver's definitions */ #define RCAR_GEN4_RXTSTAMP_ENABLED BIT(0) #define RCAR_GEN4_RXTSTAMP_TYPE_V2_L2_EVENT BIT(1) diff --git a/drivers/net/ethernet/renesas/rswitch_main.c b/drivers/net/ethernet/renesas/rswitch_main.c index f21a814aa9d11..24ed33ac4bcd8 100644 --- a/drivers/net/ethernet/renesas/rswitch_main.c +++ b/drivers/net/ethernet/renesas/rswitch_main.c @@ -30,6 +30,8 @@ #include "rswitch.h" #include "rswitch_l2.h" +#define RSWITCH_GPTP_OFFSET_S4 0x00018000 + static int rswitch_reg_wait(void __iomem *addr, u32 offs, u32 mask, u32 expected) { u32 val; @@ -2175,7 +2177,7 @@ static int renesas_eth_sw_probe(struct platform_device *pdev) if (IS_ERR(priv->addr)) return PTR_ERR(priv->addr); - priv->ptp_priv->addr = priv->addr + RCAR_GEN4_GPTP_OFFSET_S4; + priv->ptp_priv->addr = priv->addr + RSWITCH_GPTP_OFFSET_S4; ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); if (ret < 0) { From 50ab1c6becdec42ce7cc9b52f2ea67c6a81218f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Tue, 4 Nov 2025 23:24:15 +0100 Subject: [PATCH 638/867] net: rcar_gen4_ptp: Move control fields to users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The struct rcar_gen4_ptp_private provides two fields for convenience of its users, tstamp_tx_ctrl and tstamp_rx_ctrl. These fields are not used by the rcar_gen4_ptp driver itself but only by the drivers using it. Upcoming work will enable the RAVB driver currently only supporting gPTP on pre-Gen4 SoCs to use the Gen4 implementation as well. To facilitate this the convenience of having these fields in struct rcar_gen4_ptp_private becomes a problem as the RAVB driver already have it's own driver specific fields for the same thing. Move the fields from struct rcar_gen4_ptp_private to each driver using the Gen4 gPTP clocks own private data structures. There is no functional change. Signed-off-by: Niklas Söderlund Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251104222420.882731-3-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/rcar_gen4_ptp.h | 2 -- drivers/net/ethernet/renesas/rswitch.h | 3 +++ drivers/net/ethernet/renesas/rswitch_main.c | 17 ++++++++--------- drivers/net/ethernet/renesas/rtsn.c | 17 ++++++++--------- 4 files changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/renesas/rcar_gen4_ptp.h b/drivers/net/ethernet/renesas/rcar_gen4_ptp.h index 536badd798cce..1a1e43add129b 100644 --- a/drivers/net/ethernet/renesas/rcar_gen4_ptp.h +++ b/drivers/net/ethernet/renesas/rcar_gen4_ptp.h @@ -23,8 +23,6 @@ struct rcar_gen4_ptp_private { struct ptp_clock *clock; struct ptp_clock_info info; spinlock_t lock; /* For multiple registers access */ - u32 tstamp_tx_ctrl; - u32 tstamp_rx_ctrl; s64 default_addend; bool initialized; }; diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h index a1d4a877e5bd4..3b348ebf6742b 100644 --- a/drivers/net/ethernet/renesas/rswitch.h +++ b/drivers/net/ethernet/renesas/rswitch.h @@ -1063,6 +1063,9 @@ struct rswitch_private { bool etha_no_runtime_change; bool gwca_halt; struct net_device *offload_brdev; + + u32 tstamp_tx_ctrl; + u32 tstamp_rx_ctrl; }; bool is_rdev(const struct net_device *ndev); diff --git a/drivers/net/ethernet/renesas/rswitch_main.c b/drivers/net/ethernet/renesas/rswitch_main.c index 24ed33ac4bcd8..31aabc6fc462d 100644 --- a/drivers/net/ethernet/renesas/rswitch_main.c +++ b/drivers/net/ethernet/renesas/rswitch_main.c @@ -845,7 +845,7 @@ static bool rswitch_rx(struct net_device *ndev, int *quota) if (!skb) goto out; - get_ts = rdev->priv->ptp_priv->tstamp_rx_ctrl & RCAR_GEN4_RXTSTAMP_TYPE_V2_L2_EVENT; + get_ts = rdev->priv->tstamp_rx_ctrl & RCAR_GEN4_RXTSTAMP_TYPE_V2_L2_EVENT; if (get_ts) { struct skb_shared_hwtstamps *shhwtstamps; struct timespec64 ts; @@ -1799,14 +1799,13 @@ static int rswitch_hwstamp_get(struct net_device *ndev, struct kernel_hwtstamp_config *config) { struct rswitch_device *rdev = netdev_priv(ndev); - struct rcar_gen4_ptp_private *ptp_priv; - - ptp_priv = rdev->priv->ptp_priv; + struct rswitch_private *priv = rdev->priv; config->flags = 0; - config->tx_type = ptp_priv->tstamp_tx_ctrl ? HWTSTAMP_TX_ON : - HWTSTAMP_TX_OFF; - switch (ptp_priv->tstamp_rx_ctrl & RCAR_GEN4_RXTSTAMP_TYPE) { + config->tx_type = + priv->tstamp_tx_ctrl ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; + + switch (priv->tstamp_rx_ctrl & RCAR_GEN4_RXTSTAMP_TYPE) { case RCAR_GEN4_RXTSTAMP_TYPE_V2_L2_EVENT: config->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; break; @@ -1856,8 +1855,8 @@ static int rswitch_hwstamp_set(struct net_device *ndev, break; } - rdev->priv->ptp_priv->tstamp_tx_ctrl = tstamp_tx_ctrl; - rdev->priv->ptp_priv->tstamp_rx_ctrl = tstamp_rx_ctrl; + rdev->priv->tstamp_tx_ctrl = tstamp_tx_ctrl; + rdev->priv->tstamp_rx_ctrl = tstamp_rx_ctrl; return 0; } diff --git a/drivers/net/ethernet/renesas/rtsn.c b/drivers/net/ethernet/renesas/rtsn.c index 15a043e85431f..958c198084728 100644 --- a/drivers/net/ethernet/renesas/rtsn.c +++ b/drivers/net/ethernet/renesas/rtsn.c @@ -62,6 +62,9 @@ struct rtsn_private { int tx_data_irq; int rx_data_irq; + + u32 tstamp_tx_ctrl; + u32 tstamp_rx_ctrl; }; static u32 rtsn_read(struct rtsn_private *priv, enum rtsn_reg reg) @@ -162,7 +165,7 @@ static int rtsn_rx(struct net_device *ndev, int budget) unsigned int i; bool get_ts; - get_ts = priv->ptp_priv->tstamp_rx_ctrl & + get_ts = priv->tstamp_rx_ctrl & RCAR_GEN4_RXTSTAMP_TYPE_V2_L2_EVENT; ndescriptors = priv->dirty_rx + priv->num_rx_ring - priv->cur_rx; @@ -1122,21 +1125,19 @@ static int rtsn_do_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd) static int rtsn_hwtstamp_get(struct net_device *ndev, struct kernel_hwtstamp_config *config) { - struct rcar_gen4_ptp_private *ptp_priv; struct rtsn_private *priv; if (!netif_running(ndev)) return -ENODEV; priv = netdev_priv(ndev); - ptp_priv = priv->ptp_priv; config->flags = 0; config->tx_type = - ptp_priv->tstamp_tx_ctrl ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; + priv->tstamp_tx_ctrl ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; - switch (ptp_priv->tstamp_rx_ctrl & RCAR_GEN4_RXTSTAMP_TYPE) { + switch (priv->tstamp_rx_ctrl & RCAR_GEN4_RXTSTAMP_TYPE) { case RCAR_GEN4_RXTSTAMP_TYPE_V2_L2_EVENT: config->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; break; @@ -1155,7 +1156,6 @@ static int rtsn_hwtstamp_set(struct net_device *ndev, struct kernel_hwtstamp_config *config, struct netlink_ext_ack *extack) { - struct rcar_gen4_ptp_private *ptp_priv; struct rtsn_private *priv; u32 tstamp_rx_ctrl; u32 tstamp_tx_ctrl; @@ -1164,7 +1164,6 @@ static int rtsn_hwtstamp_set(struct net_device *ndev, return -ENODEV; priv = netdev_priv(ndev); - ptp_priv = priv->ptp_priv; if (config->flags) return -EINVAL; @@ -1195,8 +1194,8 @@ static int rtsn_hwtstamp_set(struct net_device *ndev, break; } - ptp_priv->tstamp_tx_ctrl = tstamp_tx_ctrl; - ptp_priv->tstamp_rx_ctrl = tstamp_rx_ctrl; + priv->tstamp_tx_ctrl = tstamp_tx_ctrl; + priv->tstamp_rx_ctrl = tstamp_rx_ctrl; return 0; } From b314e4f7a9d9ba7821cfac57e1bf3648bb27a289 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Tue, 4 Nov 2025 23:24:16 +0100 Subject: [PATCH 639/867] net: rswitch: Use common defines for time stamping control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of translating to/from driver specific flags for packet time stamp control use the common flags directly. This simplifies the driver as the translating code can be removed while at the same time making it clear the flags are not flags written to hardware registers. One thing to note is that the bit-wise and check in rswitch_rx() of RCAR_GEN4_RXTSTAMP_TYPE_V2_L2_EVENT is replaced with a not set check of HWTSTAMP_FILTER_NONE. This is okay as the bit of device specific event replaced was set for all modes except HWTSTAMP_FILTER_NONE. Signed-off-by: Niklas Söderlund Link: https://patch.msgid.link/20251104222420.882731-4-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/rswitch.h | 4 +-- drivers/net/ethernet/renesas/rswitch_main.c | 32 ++++++--------------- 2 files changed, 10 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h index 3b348ebf6742b..aa605304fed05 100644 --- a/drivers/net/ethernet/renesas/rswitch.h +++ b/drivers/net/ethernet/renesas/rswitch.h @@ -1064,8 +1064,8 @@ struct rswitch_private { bool gwca_halt; struct net_device *offload_brdev; - u32 tstamp_tx_ctrl; - u32 tstamp_rx_ctrl; + enum hwtstamp_tx_types tstamp_tx_ctrl; + enum hwtstamp_rx_filters tstamp_rx_ctrl; }; bool is_rdev(const struct net_device *ndev); diff --git a/drivers/net/ethernet/renesas/rswitch_main.c b/drivers/net/ethernet/renesas/rswitch_main.c index 31aabc6fc462d..e14b21148f27a 100644 --- a/drivers/net/ethernet/renesas/rswitch_main.c +++ b/drivers/net/ethernet/renesas/rswitch_main.c @@ -845,7 +845,7 @@ static bool rswitch_rx(struct net_device *ndev, int *quota) if (!skb) goto out; - get_ts = rdev->priv->tstamp_rx_ctrl & RCAR_GEN4_RXTSTAMP_TYPE_V2_L2_EVENT; + get_ts = rdev->priv->tstamp_rx_ctrl != HWTSTAMP_FILTER_NONE; if (get_ts) { struct skb_shared_hwtstamps *shhwtstamps; struct timespec64 ts; @@ -1802,20 +1802,8 @@ static int rswitch_hwstamp_get(struct net_device *ndev, struct rswitch_private *priv = rdev->priv; config->flags = 0; - config->tx_type = - priv->tstamp_tx_ctrl ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; - - switch (priv->tstamp_rx_ctrl & RCAR_GEN4_RXTSTAMP_TYPE) { - case RCAR_GEN4_RXTSTAMP_TYPE_V2_L2_EVENT: - config->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; - break; - case RCAR_GEN4_RXTSTAMP_TYPE_ALL: - config->rx_filter = HWTSTAMP_FILTER_ALL; - break; - default: - config->rx_filter = HWTSTAMP_FILTER_NONE; - break; - } + config->tx_type = priv->tstamp_tx_ctrl; + config->rx_filter = priv->tstamp_rx_ctrl; return 0; } @@ -1825,18 +1813,16 @@ static int rswitch_hwstamp_set(struct net_device *ndev, struct netlink_ext_ack *extack) { struct rswitch_device *rdev = netdev_priv(ndev); - u32 tstamp_rx_ctrl = RCAR_GEN4_RXTSTAMP_ENABLED; - u32 tstamp_tx_ctrl; + enum hwtstamp_rx_filters tstamp_rx_ctrl; + enum hwtstamp_tx_types tstamp_tx_ctrl; if (config->flags) return -EINVAL; switch (config->tx_type) { case HWTSTAMP_TX_OFF: - tstamp_tx_ctrl = 0; - break; case HWTSTAMP_TX_ON: - tstamp_tx_ctrl = RCAR_GEN4_TXTSTAMP_ENABLED; + tstamp_tx_ctrl = config->tx_type; break; default: return -ERANGE; @@ -1844,14 +1830,12 @@ static int rswitch_hwstamp_set(struct net_device *ndev, switch (config->rx_filter) { case HWTSTAMP_FILTER_NONE: - tstamp_rx_ctrl = 0; - break; case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: - tstamp_rx_ctrl |= RCAR_GEN4_RXTSTAMP_TYPE_V2_L2_EVENT; + tstamp_rx_ctrl = config->rx_filter; break; default: config->rx_filter = HWTSTAMP_FILTER_ALL; - tstamp_rx_ctrl |= RCAR_GEN4_RXTSTAMP_TYPE_ALL; + tstamp_rx_ctrl = HWTSTAMP_FILTER_ALL; break; } From e43791f40b814d00b8e2be41f9d2bc2a97246d6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Tue, 4 Nov 2025 23:24:17 +0100 Subject: [PATCH 640/867] net: rtsn: Use common defines for time stamping control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of translating to/from driver specific flags for packet time stamp control use the common flags directly. This simplifies the driver as the translating code can be removed while at the same time making it clear the flags are not flags written to hardware registers. One thing to note is that the bit-wise and check in rtsn_rx() of RCAR_GEN4_RXTSTAMP_TYPE_V2_L2_EVENT is replaced with a not set check of HWTSTAMP_FILTER_NONE. This is okay as the bit of device specific event replaced was set for all modes except HWTSTAMP_FILTER_NONE. Signed-off-by: Niklas Söderlund Link: https://patch.msgid.link/20251104222420.882731-5-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/rtsn.c | 36 +++++++---------------------- 1 file changed, 8 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/renesas/rtsn.c b/drivers/net/ethernet/renesas/rtsn.c index 958c198084728..fdb1e7b7fb066 100644 --- a/drivers/net/ethernet/renesas/rtsn.c +++ b/drivers/net/ethernet/renesas/rtsn.c @@ -165,8 +165,7 @@ static int rtsn_rx(struct net_device *ndev, int budget) unsigned int i; bool get_ts; - get_ts = priv->tstamp_rx_ctrl & - RCAR_GEN4_RXTSTAMP_TYPE_V2_L2_EVENT; + get_ts = priv->tstamp_rx_ctrl != HWTSTAMP_FILTER_NONE; ndescriptors = priv->dirty_rx + priv->num_rx_ring - priv->cur_rx; rx_packets = 0; @@ -1133,21 +1132,8 @@ static int rtsn_hwtstamp_get(struct net_device *ndev, priv = netdev_priv(ndev); config->flags = 0; - - config->tx_type = - priv->tstamp_tx_ctrl ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; - - switch (priv->tstamp_rx_ctrl & RCAR_GEN4_RXTSTAMP_TYPE) { - case RCAR_GEN4_RXTSTAMP_TYPE_V2_L2_EVENT: - config->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; - break; - case RCAR_GEN4_RXTSTAMP_TYPE_ALL: - config->rx_filter = HWTSTAMP_FILTER_ALL; - break; - default: - config->rx_filter = HWTSTAMP_FILTER_NONE; - break; - } + config->tx_type = priv->tstamp_tx_ctrl; + config->rx_filter = priv->tstamp_rx_ctrl; return 0; } @@ -1156,9 +1142,9 @@ static int rtsn_hwtstamp_set(struct net_device *ndev, struct kernel_hwtstamp_config *config, struct netlink_ext_ack *extack) { + enum hwtstamp_rx_filters tstamp_rx_ctrl; + enum hwtstamp_tx_types tstamp_tx_ctrl; struct rtsn_private *priv; - u32 tstamp_rx_ctrl; - u32 tstamp_tx_ctrl; if (!netif_running(ndev)) return -ENODEV; @@ -1170,10 +1156,8 @@ static int rtsn_hwtstamp_set(struct net_device *ndev, switch (config->tx_type) { case HWTSTAMP_TX_OFF: - tstamp_tx_ctrl = 0; - break; case HWTSTAMP_TX_ON: - tstamp_tx_ctrl = RCAR_GEN4_TXTSTAMP_ENABLED; + tstamp_tx_ctrl = config->tx_type; break; default: return -ERANGE; @@ -1181,16 +1165,12 @@ static int rtsn_hwtstamp_set(struct net_device *ndev, switch (config->rx_filter) { case HWTSTAMP_FILTER_NONE: - tstamp_rx_ctrl = 0; - break; case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: - tstamp_rx_ctrl = RCAR_GEN4_RXTSTAMP_ENABLED | - RCAR_GEN4_RXTSTAMP_TYPE_V2_L2_EVENT; + tstamp_rx_ctrl = config->rx_filter; break; default: config->rx_filter = HWTSTAMP_FILTER_ALL; - tstamp_rx_ctrl = RCAR_GEN4_RXTSTAMP_ENABLED | - RCAR_GEN4_RXTSTAMP_TYPE_ALL; + tstamp_rx_ctrl = HWTSTAMP_FILTER_ALL; break; } From 3614d249d1da83b4806e015740117ed1069e3ca1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Tue, 4 Nov 2025 23:24:18 +0100 Subject: [PATCH 641/867] net: rcar_gen4_ptp: Remove unused defines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver specific flags to control packet time stamps have all been replaced by values from enum hwtstamp_tx_types and enum hwtstamp_rx_filters. Remove the driver specific flags as there are no more users. Signed-off-by: Niklas Söderlund Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251104222420.882731-6-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/rcar_gen4_ptp.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/net/ethernet/renesas/rcar_gen4_ptp.h b/drivers/net/ethernet/renesas/rcar_gen4_ptp.h index 1a1e43add129b..9a9c232c854ef 100644 --- a/drivers/net/ethernet/renesas/rcar_gen4_ptp.h +++ b/drivers/net/ethernet/renesas/rcar_gen4_ptp.h @@ -9,15 +9,6 @@ #include -/* driver's definitions */ -#define RCAR_GEN4_RXTSTAMP_ENABLED BIT(0) -#define RCAR_GEN4_RXTSTAMP_TYPE_V2_L2_EVENT BIT(1) -#define RCAR_GEN4_RXTSTAMP_TYPE_ALL (RCAR_GEN4_RXTSTAMP_TYPE_V2_L2_EVENT | BIT(2)) -#define RCAR_GEN4_RXTSTAMP_TYPE RCAR_GEN4_RXTSTAMP_TYPE_ALL - -#define RCAR_GEN4_TXTSTAMP_ENABLED BIT(0) - - struct rcar_gen4_ptp_private { void __iomem *addr; struct ptp_clock *clock; From 5ce97b8d6132a0f252c68d58fed35f73c31e0bee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Tue, 4 Nov 2025 23:24:19 +0100 Subject: [PATCH 642/867] net: ravb: Break out Rx hardware timestamping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prepare for moving away from device specific bit-fields to track how to do hardware Rx timestamping to using net common enums by breaking out the timestamping to a helper function. This is done to create cleaner code and prepare for easier changes improving the hardware timestapming. There is no functional change. Signed-off-by: Niklas Söderlund Link: https://patch.msgid.link/20251104222420.882731-7-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 40 +++++++++++++++--------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index cc619dbebf9d8..5477bb5c69ae6 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -946,6 +946,29 @@ static int ravb_rx_gbeth(struct net_device *ndev, int budget, int q) return rx_packets; } +static void ravb_rx_rcar_hwstamp(struct ravb_private *priv, int q, + struct ravb_ex_rx_desc *desc, + struct sk_buff *skb) +{ + u32 get_ts = priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE; + struct skb_shared_hwtstamps *shhwtstamps; + struct timespec64 ts; + + get_ts &= (q == RAVB_NC) ? + RAVB_RXTSTAMP_TYPE_V2_L2_EVENT : + ~RAVB_RXTSTAMP_TYPE_V2_L2_EVENT; + + if (!get_ts) + return; + + shhwtstamps = skb_hwtstamps(skb); + memset(shhwtstamps, 0, sizeof(*shhwtstamps)); + ts.tv_sec = ((u64)le16_to_cpu(desc->ts_sh) << 32) + | le32_to_cpu(desc->ts_sl); + ts.tv_nsec = le32_to_cpu(desc->ts_n); + shhwtstamps->hwtstamp = timespec64_to_ktime(ts); +} + /* Packet receive function for Ethernet AVB */ static int ravb_rx_rcar(struct net_device *ndev, int budget, int q) { @@ -955,7 +978,6 @@ static int ravb_rx_rcar(struct net_device *ndev, int budget, int q) struct ravb_ex_rx_desc *desc; unsigned int limit, i; struct sk_buff *skb; - struct timespec64 ts; int rx_packets = 0; u8 desc_status; u16 pkt_len; @@ -992,7 +1014,6 @@ static int ravb_rx_rcar(struct net_device *ndev, int budget, int q) if (desc_status & MSC_CEEF) stats->rx_missed_errors++; } else { - u32 get_ts = priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE; struct ravb_rx_buffer *rx_buff; void *rx_addr; @@ -1010,19 +1031,8 @@ static int ravb_rx_rcar(struct net_device *ndev, int budget, int q) break; } skb_mark_for_recycle(skb); - get_ts &= (q == RAVB_NC) ? - RAVB_RXTSTAMP_TYPE_V2_L2_EVENT : - ~RAVB_RXTSTAMP_TYPE_V2_L2_EVENT; - if (get_ts) { - struct skb_shared_hwtstamps *shhwtstamps; - - shhwtstamps = skb_hwtstamps(skb); - memset(shhwtstamps, 0, sizeof(*shhwtstamps)); - ts.tv_sec = ((u64) le16_to_cpu(desc->ts_sh) << - 32) | le32_to_cpu(desc->ts_sl); - ts.tv_nsec = le32_to_cpu(desc->ts_n); - shhwtstamps->hwtstamp = timespec64_to_ktime(ts); - } + + ravb_rx_rcar_hwstamp(priv, q, desc, skb); skb_put(skb, pkt_len); skb->protocol = eth_type_trans(skb, ndev); From 16e2e6cf75e62182b9e28d0e48ab72d93c464bda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Tue, 4 Nov 2025 23:24:20 +0100 Subject: [PATCH 643/867] net: ravb: Use common defines for time stamping control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of translating to/from driver specific flags for packet time stamp control use the common flags directly. This simplifies the driver as the translating code can be removed while at the same time making it clear the flags are not flags written to hardware registers. The change from a device specific bit-field track variable to the common enum datatypes forces us to touch the ravb_rx_rcar_hwstamp() in a non trivial way. To make this cleaner and easier to understand expand the nested conditions. Signed-off-by: Niklas Söderlund Link: https://patch.msgid.link/20251104222420.882731-8-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb.h | 14 ++------- drivers/net/ethernet/renesas/ravb_main.c | 37 ++++++++---------------- 2 files changed, 14 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index d65cd83ddd163..5e56ec9b1013a 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -35,16 +35,6 @@ /* Driver's parameters */ #define RAVB_ALIGN 128 -/* Hardware time stamp */ -#define RAVB_TXTSTAMP_VALID 0x00000001 /* TX timestamp valid */ -#define RAVB_TXTSTAMP_ENABLED 0x00000010 /* Enable TX timestamping */ - -#define RAVB_RXTSTAMP_VALID 0x00000001 /* RX timestamp valid */ -#define RAVB_RXTSTAMP_TYPE 0x00000006 /* RX type mask */ -#define RAVB_RXTSTAMP_TYPE_V2_L2_EVENT 0x00000002 -#define RAVB_RXTSTAMP_TYPE_ALL 0x00000006 -#define RAVB_RXTSTAMP_ENABLED 0x00000010 /* Enable RX timestamping */ - enum ravb_reg { /* AVB-DMAC registers */ CCC = 0x0000, @@ -1114,8 +1104,8 @@ struct ravb_private { u32 rx_over_errors; u32 rx_fifo_errors; struct net_device_stats stats[NUM_RX_QUEUE]; - u32 tstamp_tx_ctrl; - u32 tstamp_rx_ctrl; + enum hwtstamp_tx_types tstamp_tx_ctrl; + enum hwtstamp_rx_filters tstamp_rx_ctrl; struct list_head ts_skb_list; u32 ts_skb_tag; struct ravb_ptp ptp; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 5477bb5c69ae6..1680e94b92425 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -950,13 +950,14 @@ static void ravb_rx_rcar_hwstamp(struct ravb_private *priv, int q, struct ravb_ex_rx_desc *desc, struct sk_buff *skb) { - u32 get_ts = priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE; struct skb_shared_hwtstamps *shhwtstamps; struct timespec64 ts; + bool get_ts; - get_ts &= (q == RAVB_NC) ? - RAVB_RXTSTAMP_TYPE_V2_L2_EVENT : - ~RAVB_RXTSTAMP_TYPE_V2_L2_EVENT; + if (q == RAVB_NC) + get_ts = priv->tstamp_rx_ctrl == HWTSTAMP_FILTER_PTP_V2_L2_EVENT; + else + get_ts = priv->tstamp_rx_ctrl != HWTSTAMP_FILTER_PTP_V2_L2_EVENT; if (!get_ts) return; @@ -2424,18 +2425,8 @@ static int ravb_hwtstamp_get(struct net_device *ndev, struct ravb_private *priv = netdev_priv(ndev); config->flags = 0; - config->tx_type = priv->tstamp_tx_ctrl ? HWTSTAMP_TX_ON : - HWTSTAMP_TX_OFF; - switch (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE) { - case RAVB_RXTSTAMP_TYPE_V2_L2_EVENT: - config->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; - break; - case RAVB_RXTSTAMP_TYPE_ALL: - config->rx_filter = HWTSTAMP_FILTER_ALL; - break; - default: - config->rx_filter = HWTSTAMP_FILTER_NONE; - } + config->tx_type = priv->tstamp_tx_ctrl; + config->rx_filter = priv->tstamp_rx_ctrl; return 0; } @@ -2446,15 +2437,13 @@ static int ravb_hwtstamp_set(struct net_device *ndev, struct netlink_ext_ack *extack) { struct ravb_private *priv = netdev_priv(ndev); - u32 tstamp_rx_ctrl = RAVB_RXTSTAMP_ENABLED; - u32 tstamp_tx_ctrl; + enum hwtstamp_rx_filters tstamp_rx_ctrl; + enum hwtstamp_tx_types tstamp_tx_ctrl; switch (config->tx_type) { case HWTSTAMP_TX_OFF: - tstamp_tx_ctrl = 0; - break; case HWTSTAMP_TX_ON: - tstamp_tx_ctrl = RAVB_TXTSTAMP_ENABLED; + tstamp_tx_ctrl = config->tx_type; break; default: return -ERANGE; @@ -2462,14 +2451,12 @@ static int ravb_hwtstamp_set(struct net_device *ndev, switch (config->rx_filter) { case HWTSTAMP_FILTER_NONE: - tstamp_rx_ctrl = 0; - break; case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: - tstamp_rx_ctrl |= RAVB_RXTSTAMP_TYPE_V2_L2_EVENT; + tstamp_rx_ctrl = config->rx_filter; break; default: config->rx_filter = HWTSTAMP_FILTER_ALL; - tstamp_rx_ctrl |= RAVB_RXTSTAMP_TYPE_ALL; + tstamp_rx_ctrl = HWTSTAMP_FILTER_ALL; } priv->tstamp_tx_ctrl = tstamp_tx_ctrl; From c6934c4e049c8a7f2b7fab620c04bc5dfbd947c1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 4 Nov 2025 15:23:44 -0800 Subject: [PATCH 644/867] netlink: specs: netdev add missing stats to qstat-get Add missing entries in C attribute list. Link: https://patch.msgid.link/20251104232348.1954349-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 10c412b7433f7..82bf5cb2617d1 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -733,6 +733,29 @@ operations: - rx-bytes - tx-packets - tx-bytes + - rx-alloc-fail + - rx-hw-drops + - rx-hw-drop-overruns + - rx-csum-complete + - rx-csum-unnecessary + - rx-csum-none + - rx-csum-bad + - rx-hw-gro-packets + - rx-hw-gro-bytes + - rx-hw-gro-wire-packets + - rx-hw-gro-wire-bytes + - rx-hw-drop-ratelimits + - tx-hw-drops + - tx-hw-drop-errors + - tx-csum-none + - tx-needs-csum + - tx-hw-gso-packets + - tx-hw-gso-bytes + - tx-hw-gso-wire-packets + - tx-hw-gso-wire-bytes + - tx-hw-drop-ratelimits + - tx-stop + - tx-wake - name: bind-rx doc: Bind dmabuf to netdev From be88c549e9d78828a2e06126ed7e17fc2e030f1f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 6 Nov 2025 00:32:40 +0000 Subject: [PATCH 645/867] tcp: Call tcp_syn_ack_timeout() directly. Since DCCP has been removed, we do not need to use request_sock_ops.syn_ack_timeout(). Let's call tcp_syn_ack_timeout() directly. Now other function pointers of request_sock_ops are protocol-dependent. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251106003357.273403-2-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/request_sock.h | 1 - net/ipv4/inet_connection_sock.c | 4 +++- net/ipv4/tcp_ipv4.c | 1 - net/ipv4/tcp_timer.c | 3 +-- net/ipv6/tcp_ipv6.c | 1 - 5 files changed, 4 insertions(+), 6 deletions(-) diff --git a/include/net/request_sock.h b/include/net/request_sock.h index cd4d4cf71d0d2..9b9e04f6bb893 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -36,7 +36,6 @@ struct request_sock_ops { struct sk_buff *skb, enum sk_rst_reason reason); void (*destructor)(struct request_sock *req); - void (*syn_ack_timeout)(const struct request_sock *req); }; struct saved_syn { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 3b83b66b2284c..6a86c1ac30112 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1096,9 +1096,11 @@ static void reqsk_timer_handler(struct timer_list *t) young <<= 1; } } + syn_ack_recalc(req, max_syn_ack_retries, READ_ONCE(queue->rskq_defer_accept), &expire, &resend); - req->rsk_ops->syn_ack_timeout(req); + tcp_syn_ack_timeout(req); + if (!expire && (!resend || !tcp_rtx_synack(sk_listener, req) || diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b7526a7888cbe..0cfebac33a910 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1660,7 +1660,6 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = { .send_ack = tcp_v4_reqsk_send_ack, .destructor = tcp_v4_reqsk_destructor, .send_reset = tcp_v4_send_reset, - .syn_ack_timeout = tcp_syn_ack_timeout, }; const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 2dd73a4e8e517..0672c3d8f4f10 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -458,7 +458,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) struct tcp_sock *tp = tcp_sk(sk); int max_retries; - req->rsk_ops->syn_ack_timeout(req); + tcp_syn_ack_timeout(req); /* Add one more retry for fastopen. * Paired with WRITE_ONCE() in tcp_sock_set_syncnt() @@ -752,7 +752,6 @@ void tcp_syn_ack_timeout(const struct request_sock *req) __NET_INC_STATS(net, LINUX_MIB_TCPTIMEOUTS); } -EXPORT_IPV6_MOD(tcp_syn_ack_timeout); void tcp_reset_keepalive_timer(struct sock *sk, unsigned long len) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7df21c1cba213..08113f4301249 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -796,7 +796,6 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .send_ack = tcp_v6_reqsk_send_ack, .destructor = tcp_v6_reqsk_destructor, .send_reset = tcp_v6_send_reset, - .syn_ack_timeout = tcp_syn_ack_timeout, }; const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { From 3ce5dd8161ecdf12ffe0af99ff8980f1432f64a5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 6 Nov 2025 00:32:41 +0000 Subject: [PATCH 646/867] tcp: Remove timeout arg from reqsk_queue_hash_req(). inet_csk_reqsk_queue_hash_add() is no longer shared by DCCP. We do not need to pass req->timeout down to reqsk_queue_hash_req(). Let's move tcp_timeout_init() from tcp_conn_request() to reqsk_queue_hash_req(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251106003357.273403-3-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/inet_connection_sock.h | 3 +-- net/ipv4/inet_connection_sock.c | 11 +++++------ net/ipv4/tcp_input.c | 14 +++++--------- 3 files changed, 11 insertions(+), 17 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index b4b8866476075..90a99a2fc8047 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -267,8 +267,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, struct sock *inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, struct sock *child); -bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, - unsigned long timeout); +bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req); struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, struct request_sock *req, bool own_req); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 6a86c1ac30112..d9c674403eb03 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1144,8 +1144,7 @@ static void reqsk_timer_handler(struct timer_list *t) reqsk_put(oreq); } -static bool reqsk_queue_hash_req(struct request_sock *req, - unsigned long timeout) +static bool reqsk_queue_hash_req(struct request_sock *req) { bool found_dup_sk = false; @@ -1153,8 +1152,9 @@ static bool reqsk_queue_hash_req(struct request_sock *req, return false; /* The timer needs to be setup after a successful insertion. */ + req->timeout = tcp_timeout_init((struct sock *)req); timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED); - mod_timer(&req->rsk_timer, jiffies + timeout); + mod_timer(&req->rsk_timer, jiffies + req->timeout); /* before letting lookups find us, make sure all req fields * are committed to memory and refcnt initialized. @@ -1164,10 +1164,9 @@ static bool reqsk_queue_hash_req(struct request_sock *req, return true; } -bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, - unsigned long timeout) +bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req) { - if (!reqsk_queue_hash_req(req, timeout)) + if (!reqsk_queue_hash_req(req)) return false; inet_csk_reqsk_queue_added(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6db1d4c36a88b..804ec56bdd244 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -7531,15 +7531,11 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, sock_put(fastopen_sk); } else { tcp_rsk(req)->tfo_listener = false; - if (!want_cookie) { - req->timeout = tcp_timeout_init((struct sock *)req); - if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, - req->timeout))) { - reqsk_free(req); - dst_release(dst); - return 0; - } - + if (!want_cookie && + unlikely(!inet_csk_reqsk_queue_hash_add(sk, req))) { + reqsk_free(req); + dst_release(dst); + return 0; } af_ops->send_synack(sk, dst, &fl, req, &foc, !want_cookie ? TCP_SYNACK_NORMAL : From 6fbf648d5cc48ebf250f32a8793a95a2596c0d50 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 6 Nov 2025 00:32:42 +0000 Subject: [PATCH 647/867] tcp: Remove redundant init for req->num_timeout. Commit 5903123f662e ("tcp: Use BPF timeout setting for SYN ACK RTO") introduced req->timeout and initialised it in 3 places: 1. reqsk_alloc() sets 0 2. inet_reqsk_alloc() sets TCP_TIMEOUT_INIT 3. tcp_conn_request() sets tcp_timeout_init() 1. has been always redundant as 2. overwrites it immediately. 2. was necessary for TFO SYN+ACK but is no longer needed after commit 8ea731d4c2ce ("tcp: Make SYN ACK RTO tunable by BPF programs with TFO"). 3. was moved to reqsk_queue_hash_req() in the previous patch. Now, we always initialise req->timeout just before scheduling the SYN+ACK timer: * For non-TFO SYN+ACK : reqsk_queue_hash_req() * For TFO SYN+ACK : tcp_fastopen_create_child() Let's remove the redundant initialisation of req->timeout in reqsk_alloc() and inet_reqsk_alloc(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251106003357.273403-4-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/inet_connection_sock.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d9c674403eb03..2bfe7af51bbb1 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -885,7 +885,6 @@ reqsk_alloc_noprof(const struct request_sock_ops *ops, struct sock *sk_listener, sk_tx_queue_clear(req_to_sk(req)); req->saved_syn = NULL; req->syncookie = 0; - req->timeout = 0; req->num_timeout = 0; req->num_retrans = 0; req->sk = NULL; @@ -913,7 +912,6 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, ireq->ireq_state = TCP_NEW_SYN_RECV; write_pnet(&ireq->ireq_net, sock_net(sk_listener)); ireq->ireq_family = sk_listener->sk_family; - req->timeout = TCP_TIMEOUT_INIT; } return req; From 207ce0f6bc131812c96cf4f6db328af5396cebac Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 6 Nov 2025 00:32:43 +0000 Subject: [PATCH 648/867] tcp: Remove timeout arg from reqsk_timeout(). reqsk_timeout() is always called with @timeout being TCP_RTO_MAX. Let's remove the arg. As a prep for the next patch, reqsk_timeout() is moved to tcp.h and renamed to tcp_reqsk_timeout(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251106003357.273403-5-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/inet_connection_sock.h | 8 -------- include/net/tcp.h | 7 +++++++ net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/tcp_minisocks.c | 5 +++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 90a99a2fc8047..fd40af2221b99 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -290,14 +290,6 @@ static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req); void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req); -static inline unsigned long -reqsk_timeout(struct request_sock *req, unsigned long max_timeout) -{ - u64 timeout = (u64)req->timeout << req->num_timeout; - - return (unsigned long)min_t(u64, timeout, max_timeout); -} - void inet_csk_destroy_sock(struct sock *sk); void inet_csk_prepare_for_destroy_sock(struct sock *sk); void inet_csk_prepare_forced_close(struct sock *sk); diff --git a/include/net/tcp.h b/include/net/tcp.h index 0aa1f07d036a6..0c7274ac7ed5b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -841,6 +841,13 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp) return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us); } +static inline unsigned long tcp_reqsk_timeout(struct request_sock *req) +{ + u64 timeout = (u64)req->timeout << req->num_timeout; + + return (unsigned long)min_t(u64, timeout, TCP_RTO_MAX); +} + u32 tcp_delack_max(const struct sock *sk); /* Compute the actual rto_min value */ diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 2bfe7af51bbb1..b4eae731c9ba5 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1105,7 +1105,7 @@ static void reqsk_timer_handler(struct timer_list *t) inet_rsk(req)->acked)) { if (req->num_timeout++ == 0) atomic_dec(&queue->young); - mod_timer(&req->rsk_timer, jiffies + reqsk_timeout(req, TCP_RTO_MAX)); + mod_timer(&req->rsk_timer, jiffies + tcp_reqsk_timeout(req)); if (!nreq) return; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ded2cf1f60067..d8f4d813e8dd2 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -714,7 +714,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * it can be estimated (approximately) * from another data. */ - tmp_opt.ts_recent_stamp = ktime_get_seconds() - reqsk_timeout(req, TCP_RTO_MAX) / HZ; + tmp_opt.ts_recent_stamp = ktime_get_seconds() - + tcp_reqsk_timeout(req) / HZ; paws_reject = tcp_paws_reject(&tmp_opt, th->rst); } } @@ -753,7 +754,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, !tcp_rtx_synack(sk, req)) { unsigned long expires = jiffies; - expires += reqsk_timeout(req, TCP_RTO_MAX); + expires += tcp_reqsk_timeout(req); if (!fastopen) mod_timer_pending(&req->rsk_timer, expires); else From 1e9d3005e02cba82047d49f859982fc73b9a100b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 6 Nov 2025 00:32:44 +0000 Subject: [PATCH 649/867] tcp: Apply max RTO to non-TFO SYN+ACK. Since commit 54a378f43425 ("tcp: add the ability to control max RTO"), TFO SYN+ACK RTO is capped by the TFO full sk's inet_csk(sk)->icsk_rto_max. The value is inherited from the parent listener. Let's apply the same cap to non-TFO SYN+ACK. Note that req->rsk_listener is always non-NULL when we call tcp_reqsk_timeout() in reqsk_timer_handler() or tcp_check_req(). It could be NULL for SYN cookie req, but we do not use req->timeout then. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251106003357.273403-6-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 0c7274ac7ed5b..4833ec7903eca 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -845,7 +845,8 @@ static inline unsigned long tcp_reqsk_timeout(struct request_sock *req) { u64 timeout = (u64)req->timeout << req->num_timeout; - return (unsigned long)min_t(u64, timeout, TCP_RTO_MAX); + return (unsigned long)min_t(u64, timeout, + tcp_rto_max(req->rsk_listener)); } u32 tcp_delack_max(const struct sock *sk); From ffc56c90819e86d3a8c4eff6f831317d1c1476b6 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 6 Nov 2025 00:32:45 +0000 Subject: [PATCH 650/867] selftest: packetdrill: Add max RTO test for SYN+ACK. This script sets net.ipv4.tcp_rto_max_ms to 1000 and checks if SYN+ACK RTO is capped at 1s for TFO and non-TFO. Without the previous patch, the max RTO is applied to TFO SYN+ACK only, and non-TFO SYN+ACK RTO increases exponentially. # selftests: net/packetdrill: tcp_rto_synack_rto_max.pkt # TAP version 13 # 1..2 # tcp_rto_synack_rto_max.pkt:46: error handling packet: timing error: expected outbound packet at 5.091936 sec but happened at 6.107826 sec; tolerance 0.127974 sec # script packet: 5.091936 S. 0:0(0) ack 1 # actual packet: 6.107826 S. 0:0(0) ack 1 win 65535 # not ok 1 ipv4 # tcp_rto_synack_rto_max.pkt:46: error handling packet: timing error: expected outbound packet at 5.075901 sec but happened at 6.091841 sec; tolerance 0.127976 sec # script packet: 5.075901 S. 0:0(0) ack 1 # actual packet: 6.091841 S. 0:0(0) ack 1 win 65535 # not ok 2 ipv6 # # Totals: pass:0 fail:2 xfail:0 xpass:0 skip:0 error:0 not ok 49 selftests: net/packetdrill: tcp_rto_synack_rto_max.pkt # exit=1 With the previous patch, all SYN+ACKs are retransmitted after 1s. # selftests: net/packetdrill: tcp_rto_synack_rto_max.pkt # TAP version 13 # 1..2 # ok 1 ipv4 # ok 2 ipv6 # # Totals: pass:2 fail:0 xfail:0 xpass:0 skip:0 error:0 ok 49 selftests: net/packetdrill: tcp_rto_synack_rto_max.pkt Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251106003357.273403-7-kuniyu@google.com Signed-off-by: Jakub Kicinski --- .../packetdrill/tcp_rto_synack_rto_max.pkt | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt diff --git a/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt b/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt new file mode 100644 index 0000000000000..47550df124cec --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Test SYN+ACK RTX with 1s RTO. +// +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_rto_max_ms=1000` + +// +// Test 1: TFO SYN+ACK +// + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 1000 + +0 > S. 0:0(0) ack 11 + +// RTO must be capped to 1s + +1 > S. 0:0(0) ack 11 + +1 > S. 0:0(0) ack 11 + +1 > S. 0:0(0) ack 11 + + +0 < . 11:11(0) ack 1 win 1000 + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + + +0 close(4) = 0 + +0 close(3) = 0 + + +// +// Test 2: non-TFO SYN+ACK +// + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 1000 + +0 > S. 0:0(0) ack 1 + +// RTO must be capped to 1s + +1 > S. 0:0(0) ack 1 + +1 > S. 0:0(0) ack 1 + +1 > S. 0:0(0) ack 1 + + +0 < . 1:1(0) ack 1 win 1000 + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + + +0 close(4) = 0 + +0 close(3) = 0 From 4da4e4bde1c453ac5cc2dce5def81d504ae257ee Mon Sep 17 00:00:00 2001 From: Nate Karstens Date: Thu, 6 Nov 2025 16:28:33 -0600 Subject: [PATCH 651/867] strparser: Fix signed/unsigned mismatch bug The `len` member of the sk_buff is an unsigned int. This is cast to `ssize_t` (a signed type) for the first sk_buff in the comparison, but not the second sk_buff. On 32-bit systems, this can result in an integer underflow for certain values because unsigned arithmetic is being used. This appears to be an oversight: if the intention was to use unsigned arithmetic, then the first cast would have been omitted. The change ensures both len values are cast to `ssize_t`. The underflow causes an issue with ktls when multiple TLS PDUs are included in a single TCP segment. The mainline kernel does not use strparser for ktls anymore, but this is still useful for other features that still use strparser, and for backporting. Signed-off-by: Nate Karstens Cc: stable@vger.kernel.org Fixes: 43a0c6751a32 ("strparser: Stream parser for messages") Reviewed-by: Jacob Keller Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20251106222835.1871628-1-nate.karstens@garmin.com Signed-off-by: Jakub Kicinski --- net/strparser/strparser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 43b1f558b33db..e659fea2da70f 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -238,7 +238,7 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, strp_parser_err(strp, -EMSGSIZE, desc); break; } else if (len <= (ssize_t)head->len - - skb->len - stm->strp.offset) { + (ssize_t)skb->len - stm->strp.offset) { /* Length must be into new skb (and also * greater than zero) */ From 416dd649f3aa3774907c668167a29c668dbc634b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Nov 2025 11:52:36 +0000 Subject: [PATCH 652/867] tcp: add net.ipv4.tcp_comp_sack_rtt_percent TCP SACK compression has been added in 2018 in commit 5d9f4262b7ea ("tcp: add SACK compression"). It is working great for WAN flows (with large RTT). Wifi in particular gets a significant boost _when_ ACK are suppressed. Add a new sysctl so that we can tune the very conservative 5 % value that has been used so far in this formula, so that small RTT flows can benefit from this feature. delay = min ( 5 % of RTT, 1 ms) This patch adds new tcp_comp_sack_rtt_percent sysctl to ease experiments and tuning. Given that we cap the delay to 1ms (tcp_comp_sack_delay_ns sysctl), set the default value to 33 %. Quoting Neal Cardwell ( https://lore.kernel.org/netdev/CADVnQymZ1tFnEA1Q=vtECs0=Db7zHQ8=+WCQtnhHFVbEOzjVnQ@mail.gmail.com/ ) The rationale for 33% is basically to try to facilitate pipelining, where there are always at least 3 ACKs and 3 GSO/TSO skbs per SRTT, so that the path can maintain a budget for 3 full-sized GSO/TSO skbs "in flight" at all times: + 1 skb in the qdisc waiting to be sent by the NIC next + 1 skb being sent by the NIC (being serialized by the NIC out onto the wire) + 1 skb being received and aggregated by the receiver machine's aggregation mechanism (some combination of LRO, GRO, and sack compression) Note that this is basically the same magic number (3) and the same rationales as: (a) tcp_tso_should_defer() ensuring that we defer sending data for no longer than cwnd/tcp_tso_win_divisor (where tcp_tso_win_divisor = 3), and (b) bbr_quantization_budget() ensuring that cwnd is at least 3 GSO/TSO skbs to maintain pipelining and full throughput at low RTTs Signed-off-by: Eric Dumazet Reviewed-by: Neal Cardwell Link: https://patch.msgid.link/20251106115236.3450026-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 13 +++++++++++-- include/net/netns/ipv4.h | 1 + net/ipv4/sysctl_net_ipv4.c | 9 +++++++++ net/ipv4/tcp_input.c | 26 ++++++++++++++++++-------- net/ipv4/tcp_ipv4.c | 1 + 5 files changed, 40 insertions(+), 10 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 7cd35bfd39e68..2bae61be18593 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -854,9 +854,18 @@ tcp_sack - BOOLEAN Default: 1 (enabled) +tcp_comp_sack_rtt_percent - INTEGER + Percentage of SRTT used for the compressed SACK feature. + See tcp_comp_sack_nr, tcp_comp_sack_delay_ns, tcp_comp_sack_slack_ns. + + Possible values : 1 - 1000 + + Default : 33 % + tcp_comp_sack_delay_ns - LONG INTEGER - TCP tries to reduce number of SACK sent, using a timer - based on 5% of SRTT, capped by this sysctl, in nano seconds. + TCP tries to reduce number of SACK sent, using a timer based + on tcp_comp_sack_rtt_percent of SRTT, capped by this sysctl + in nano seconds. The default is 1ms, based on TSO autosizing period. Default : 1,000,000 ns (1 ms) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 0e96c90e56c6d..de9d36acc8e22 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -221,6 +221,7 @@ struct netns_ipv4 { int sysctl_tcp_pacing_ss_ratio; int sysctl_tcp_pacing_ca_ratio; unsigned int sysctl_tcp_child_ehash_entries; + int sysctl_tcp_comp_sack_rtt_percent; unsigned long sysctl_tcp_comp_sack_delay_ns; unsigned long sysctl_tcp_comp_sack_slack_ns; int sysctl_max_syn_backlog; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 0c7c8f9041cbf..35367f8e2da32 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1451,6 +1451,15 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, + { + .procname = "tcp_comp_sack_rtt_percent", + .data = &init_net.ipv4.sysctl_tcp_comp_sack_rtt_percent, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE, + .extra2 = SYSCTL_ONE_THOUSAND, + }, { .procname = "tcp_comp_sack_slack_ns", .data = &init_net.ipv4.sysctl_tcp_comp_sack_slack_ns, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 804ec56bdd244..9df5d75156057 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5893,7 +5893,9 @@ static inline void tcp_data_snd_check(struct sock *sk) static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) { struct tcp_sock *tp = tcp_sk(sk); - unsigned long rtt, delay; + struct net *net = sock_net(sk); + unsigned long rtt; + u64 delay; /* More than one full frame received... */ if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && @@ -5912,7 +5914,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) * Defer the ack until tcp_release_cb(). */ if (sock_owned_by_user_nocheck(sk) && - READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_backlog_ack_defer)) { + READ_ONCE(net->ipv4.sysctl_tcp_backlog_ack_defer)) { set_bit(TCP_ACK_DEFERRED, &sk->sk_tsq_flags); return; } @@ -5927,7 +5929,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) } if (!tcp_is_sack(tp) || - tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)) + tp->compressed_ack >= READ_ONCE(net->ipv4.sysctl_tcp_comp_sack_nr)) goto send_now; if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) { @@ -5942,18 +5944,26 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) if (hrtimer_is_queued(&tp->compressed_ack_timer)) return; - /* compress ack timer : 5 % of rtt, but no more than tcp_comp_sack_delay_ns */ + /* compress ack timer : comp_sack_rtt_percent of rtt, + * but no more than tcp_comp_sack_delay_ns. + */ rtt = tp->rcv_rtt_est.rtt_us; if (tp->srtt_us && tp->srtt_us < rtt) rtt = tp->srtt_us; - delay = min_t(unsigned long, - READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns), - rtt * (NSEC_PER_USEC >> 3)/20); + /* delay = (rtt >> 3) * NSEC_PER_USEC * comp_sack_rtt_percent / 100 + * -> + * delay = rtt * 1.25 * comp_sack_rtt_percent + */ + delay = (u64)(rtt + (rtt >> 2)) * + READ_ONCE(net->ipv4.sysctl_tcp_comp_sack_rtt_percent); + + delay = min(delay, READ_ONCE(net->ipv4.sysctl_tcp_comp_sack_delay_ns)); + sock_hold(sk); hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay), - READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns), + READ_ONCE(net->ipv4.sysctl_tcp_comp_sack_slack_ns), HRTIMER_MODE_REL_PINNED_SOFT); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0cfebac33a910..a7d9fec2950b9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3595,6 +3595,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC; net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC; net->ipv4.sysctl_tcp_comp_sack_nr = 44; + net->ipv4.sysctl_tcp_comp_sack_rtt_percent = 33; net->ipv4.sysctl_tcp_backlog_ack_defer = 1; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0; From 57531b3416448d1ced36a2a974a4085ec43d57b0 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Thu, 6 Nov 2025 17:12:09 +0100 Subject: [PATCH 653/867] selftests: net: local_termination: Wait for interfaces to come up It seems that most of the tests prepare the interfaces once before the test run (setup_prepare()), rely on setup_wait() to wait for link and only then run the test(s). local_termination brings the physical interfaces down and up during test run but never wait for them to come up. If the auto-negotiation takes some seconds, first test packets are being lost, which leads to false-negative test results. Use setup_wait() in run_test() to make sure auto-negotiation has been completed after all simple_if_init() calls on physical interfaces and test packets will not be lost because of the race against link establishment. Fixes: 90b9566aa5cd3f ("selftests: forwarding: add a test for local_termination.sh") Reviewed-by: Vladimir Oltean Signed-off-by: Alexander Sverdlin Link: https://patch.msgid.link/20251106161213.459501-1-alexander.sverdlin@siemens.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/local_termination.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index ecd34f364125c..892895659c7e4 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -176,6 +176,8 @@ run_test() local rcv_dmac=$(mac_get $rcv_if_name) local should_receive + setup_wait + tcpdump_start $rcv_if_name mc_route_prepare $send_if_name From 3f47e67dff1f7266e112c50313d63824f6f17102 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 6 Nov 2025 13:53:23 +0100 Subject: [PATCH 654/867] net: airoha: Add the capability to consume out-of-order DMA tx descriptors EN7581 and AN7583 SoCs are capable of DMA mapping non-linear tx skbs on non-consecutive DMA descriptors. This feature is useful when multiple flows are queued on the same hw tx queue since it allows to fully utilize the available tx DMA descriptors and to avoid the starvation of high-priority flow we have in the current codebase due to head-of-line blocking introduced by low-priority flows. Tested-by: Xuegang Lu Reviewed-by: Jacob Keller Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20251106-airoha-tx-linked-list-v2-1-0706d4a322bd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 85 +++++++++++------------- drivers/net/ethernet/airoha/airoha_eth.h | 7 +- 2 files changed, 45 insertions(+), 47 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 688faf999e4c0..75893c90a0a17 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -892,19 +892,13 @@ static int airoha_qdma_tx_napi_poll(struct napi_struct *napi, int budget) dma_unmap_single(eth->dev, e->dma_addr, e->dma_len, DMA_TO_DEVICE); - memset(e, 0, sizeof(*e)); + e->dma_addr = 0; + list_add_tail(&e->list, &q->tx_list); + WRITE_ONCE(desc->msg0, 0); WRITE_ONCE(desc->msg1, 0); q->queued--; - /* completion ring can report out-of-order indexes if hw QoS - * is enabled and packets with different priority are queued - * to same DMA ring. Take into account possible out-of-order - * reports incrementing DMA ring tail pointer - */ - while (q->tail != q->head && !q->entry[q->tail].dma_addr) - q->tail = (q->tail + 1) % q->ndesc; - if (skb) { u16 queue = skb_get_queue_mapping(skb); struct netdev_queue *txq; @@ -949,6 +943,7 @@ static int airoha_qdma_init_tx_queue(struct airoha_queue *q, q->ndesc = size; q->qdma = qdma; q->free_thr = 1 + MAX_SKB_FRAGS; + INIT_LIST_HEAD(&q->tx_list); q->entry = devm_kzalloc(eth->dev, q->ndesc * sizeof(*q->entry), GFP_KERNEL); @@ -961,9 +956,9 @@ static int airoha_qdma_init_tx_queue(struct airoha_queue *q, return -ENOMEM; for (i = 0; i < q->ndesc; i++) { - u32 val; + u32 val = FIELD_PREP(QDMA_DESC_DONE_MASK, 1); - val = FIELD_PREP(QDMA_DESC_DONE_MASK, 1); + list_add_tail(&q->entry[i].list, &q->tx_list); WRITE_ONCE(q->desc[i].ctrl, cpu_to_le32(val)); } @@ -973,9 +968,9 @@ static int airoha_qdma_init_tx_queue(struct airoha_queue *q, airoha_qdma_wr(qdma, REG_TX_RING_BASE(qid), dma_addr); airoha_qdma_rmw(qdma, REG_TX_CPU_IDX(qid), TX_RING_CPU_IDX_MASK, - FIELD_PREP(TX_RING_CPU_IDX_MASK, q->head)); + FIELD_PREP(TX_RING_CPU_IDX_MASK, 0)); airoha_qdma_rmw(qdma, REG_TX_DMA_IDX(qid), TX_RING_DMA_IDX_MASK, - FIELD_PREP(TX_RING_DMA_IDX_MASK, q->head)); + FIELD_PREP(TX_RING_DMA_IDX_MASK, 0)); return 0; } @@ -1031,17 +1026,21 @@ static int airoha_qdma_init_tx(struct airoha_qdma *qdma) static void airoha_qdma_cleanup_tx_queue(struct airoha_queue *q) { struct airoha_eth *eth = q->qdma->eth; + int i; spin_lock_bh(&q->lock); - while (q->queued) { - struct airoha_queue_entry *e = &q->entry[q->tail]; + for (i = 0; i < q->ndesc; i++) { + struct airoha_queue_entry *e = &q->entry[i]; + + if (!e->dma_addr) + continue; dma_unmap_single(eth->dev, e->dma_addr, e->dma_len, DMA_TO_DEVICE); dev_kfree_skb_any(e->skb); + e->dma_addr = 0; e->skb = NULL; - - q->tail = (q->tail + 1) % q->ndesc; + list_add_tail(&e->list, &q->tx_list); q->queued--; } spin_unlock_bh(&q->lock); @@ -1883,20 +1882,6 @@ static u32 airoha_get_dsa_tag(struct sk_buff *skb, struct net_device *dev) #endif } -static bool airoha_dev_tx_queue_busy(struct airoha_queue *q, u32 nr_frags) -{ - u32 tail = q->tail <= q->head ? q->tail + q->ndesc : q->tail; - u32 index = q->head + nr_frags; - - /* completion napi can free out-of-order tx descriptors if hw QoS is - * enabled and packets with different priorities are queued to the same - * DMA ring. Take into account possible out-of-order reports checking - * if the tx queue is full using circular buffer head/tail pointers - * instead of the number of queued packets. - */ - return index >= tail; -} - static int airoha_get_fe_port(struct airoha_gdm_port *port) { struct airoha_qdma *qdma = port->qdma; @@ -1919,8 +1904,10 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, struct airoha_gdm_port *port = netdev_priv(dev); struct airoha_qdma *qdma = port->qdma; u32 nr_frags, tag, msg0, msg1, len; + struct airoha_queue_entry *e; struct netdev_queue *txq; struct airoha_queue *q; + LIST_HEAD(tx_list); void *data; int i, qid; u16 index; @@ -1966,7 +1953,7 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, txq = netdev_get_tx_queue(dev, qid); nr_frags = 1 + skb_shinfo(skb)->nr_frags; - if (airoha_dev_tx_queue_busy(q, nr_frags)) { + if (q->queued + nr_frags >= q->ndesc) { /* not enough space in the queue */ netif_tx_stop_queue(txq); spin_unlock_bh(&q->lock); @@ -1975,11 +1962,13 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, len = skb_headlen(skb); data = skb->data; - index = q->head; + + e = list_first_entry(&q->tx_list, struct airoha_queue_entry, + list); + index = e - q->entry; for (i = 0; i < nr_frags; i++) { struct airoha_qdma_desc *desc = &q->desc[index]; - struct airoha_queue_entry *e = &q->entry[index]; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; dma_addr_t addr; u32 val; @@ -1989,7 +1978,14 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, if (unlikely(dma_mapping_error(dev->dev.parent, addr))) goto error_unmap; - index = (index + 1) % q->ndesc; + list_move_tail(&e->list, &tx_list); + e->skb = i ? NULL : skb; + e->dma_addr = addr; + e->dma_len = len; + + e = list_first_entry(&q->tx_list, struct airoha_queue_entry, + list); + index = e - q->entry; val = FIELD_PREP(QDMA_DESC_LEN_MASK, len); if (i < nr_frags - 1) @@ -2002,15 +1998,9 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, WRITE_ONCE(desc->msg1, cpu_to_le32(msg1)); WRITE_ONCE(desc->msg2, cpu_to_le32(0xffff)); - e->skb = i ? NULL : skb; - e->dma_addr = addr; - e->dma_len = len; - data = skb_frag_address(frag); len = skb_frag_size(frag); } - - q->head = index; q->queued += i; skb_tx_timestamp(skb); @@ -2019,7 +2009,7 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, if (netif_xmit_stopped(txq) || !netdev_xmit_more()) airoha_qdma_rmw(qdma, REG_TX_CPU_IDX(qid), TX_RING_CPU_IDX_MASK, - FIELD_PREP(TX_RING_CPU_IDX_MASK, q->head)); + FIELD_PREP(TX_RING_CPU_IDX_MASK, index)); if (q->ndesc - q->queued < q->free_thr) netif_tx_stop_queue(txq); @@ -2029,10 +2019,13 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, return NETDEV_TX_OK; error_unmap: - for (i--; i >= 0; i--) { - index = (q->head + i) % q->ndesc; - dma_unmap_single(dev->dev.parent, q->entry[index].dma_addr, - q->entry[index].dma_len, DMA_TO_DEVICE); + while (!list_empty(&tx_list)) { + e = list_first_entry(&tx_list, struct airoha_queue_entry, + list); + dma_unmap_single(dev->dev.parent, e->dma_addr, e->dma_len, + DMA_TO_DEVICE); + e->dma_addr = 0; + list_move_tail(&e->list, &q->tx_list); } spin_unlock_bh(&q->lock); diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index eb27a4ff51984..fbbc58133364b 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -169,7 +169,10 @@ enum trtcm_param { struct airoha_queue_entry { union { void *buf; - struct sk_buff *skb; + struct { + struct list_head list; + struct sk_buff *skb; + }; }; dma_addr_t dma_addr; u16 dma_len; @@ -193,6 +196,8 @@ struct airoha_queue { struct napi_struct napi; struct page_pool *page_pool; struct sk_buff *skb; + + struct list_head tx_list; }; struct airoha_tx_irq_queue { From 140039580efa96d18576a6c9fa6b1158be8a1d0f Mon Sep 17 00:00:00 2001 From: Erni Sri Satya Vennela Date: Wed, 5 Nov 2025 11:04:28 -0800 Subject: [PATCH 655/867] net: mana: Fix incorrect speed reported by debugfs Once the netshaper is created for MANA, the current bandwidth is reported in debugfs like this: $ sudo ./tools/net/ynl/pyynl/cli.py \ --spec Documentation/netlink/specs/net_shaper.yaml \ --do set \ --json '{"ifindex":'3', "handle":{ "scope": "netdev", "id":'1' }, "bw-max": 200000000 }' None $ sudo cat /sys/kernel/debug/mana/1/vport0/current_speed 200 After the shaper is deleted, it is expected to report the maximum speed supported by the SKU. But currently it is reporting 0, which is incorrect. Fix this inconsistency, by resetting apc->speed to apc->max_speed during deletion of the shaper object. This will improve readability and debuggability. Signed-off-by: Erni Sri Satya Vennela Reviewed-by: Jacob Keller Link: https://patch.msgid.link/1762369468-32570-1-git-send-email-ernis@linux.microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/mana/mana_en.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 739087081dfde..cccd5b63cee69 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -854,7 +854,7 @@ static int mana_shaper_del(struct net_shaper_binding *binding, /* Reset mana port context parameters */ apc->handle.id = 0; apc->handle.scope = NET_SHAPER_SCOPE_UNSPEC; - apc->speed = 0; + apc->speed = apc->max_speed; } return err; From b87ee13e34931779ac1dcd3264beba50b54966fd Mon Sep 17 00:00:00 2001 From: Parthiban Veerasooran Date: Wed, 5 Nov 2025 10:42:12 +0530 Subject: [PATCH 656/867] net: phy: phy-c45: add OATC14 10BASE-T1S PHY cable diagnostic support Add support for Open Alliance TC14 (OATC14) 10BASE-T1S PHYs cable diagnostic feature. This patch implements: - genphy_c45_oatc14_cable_test_start() to initiate a cable test - genphy_c45_oatc14_cable_test_get_status() to retrieve test results - Helper function to map PHY cable test status to ethtool result codes - Function declarations and exports for use by PHY drivers This enables ethtool to report ok, open, short, and undetectable cable conditions on OATC14 10Base-T1S PHYs. Open Alliance TC14 10BASE-T1S Advanced Diagnostic PHY Features Specification ref: https://opensig.org/wp-content/uploads/2025/06/OPEN_Alliance_10BASE-T1S_Advanced_PHY_features_for-automotive_Ethernet_V2.1b.pdf Signed-off-by: Parthiban Veerasooran Link: https://patch.msgid.link/20251105051213.50443-2-parthiban.veerasooran@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mdio-open-alliance.h | 36 ++++++++ drivers/net/phy/phy-c45.c | 122 +++++++++++++++++++++++++++ include/linux/phy.h | 3 + 3 files changed, 161 insertions(+) diff --git a/drivers/net/phy/mdio-open-alliance.h b/drivers/net/phy/mdio-open-alliance.h index 931e14660d759..6850a3f0b31e2 100644 --- a/drivers/net/phy/mdio-open-alliance.h +++ b/drivers/net/phy/mdio-open-alliance.h @@ -43,4 +43,40 @@ /* Version Identifiers */ #define OATC14_IDM 0x0a00 +/* + * Open Alliance TC14 (10BASE-T1S) - Advanced Diagnostic Features Registers + * + * Refer to the OPEN Alliance documentation: + * https://opensig.org/automotive-ethernet-specifications/ + * + * Specification: + * "10BASE-T1S Advanced Diagnostic PHY Features" + * https://opensig.org/wp-content/uploads/2025/06/OPEN_Alliance_10BASE-T1S_Advanced_PHY_features_for-automotive_Ethernet_V2.1b.pdf + */ +/* Advanced Diagnostic Features Capability Register*/ +#define MDIO_OATC14_ADFCAP 0xcc00 +#define OATC14_ADFCAP_HDD_CAPABILITY GENMASK(10, 8) + +/* Harness Defect Detection Register */ +#define MDIO_OATC14_HDD 0xcc01 +#define OATC14_HDD_CONTROL BIT(15) +#define OATC14_HDD_READY BIT(14) +#define OATC14_HDD_START_CONTROL BIT(13) +#define OATC14_HDD_VALID BIT(2) +#define OATC14_HDD_SHORT_OPEN_STATUS GENMASK(1, 0) + +/* Bus Short/Open Status: + * 0 0 - no fault; everything is ok. (Default) + * 0 1 - detected as an open or missing termination(s) + * 1 0 - detected as a short or extra termination(s) + * 1 1 - fault but fault type not detectable. More details can be available by + * vender specific register if supported. + */ +enum oatc14_hdd_status { + OATC14_HDD_STATUS_CABLE_OK = 0, + OATC14_HDD_STATUS_OPEN, + OATC14_HDD_STATUS_SHORT, + OATC14_HDD_STATUS_NOT_DETECTABLE, +}; + #endif /* __MDIO_OPEN_ALLIANCE__ */ diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 1a7b32be4625c..e8e5be4684ab9 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "mdio-open-alliance.h" #include "phylib-internal.h" @@ -1573,3 +1574,124 @@ int genphy_c45_ethtool_set_eee(struct phy_device *phydev, return ret; } EXPORT_SYMBOL(genphy_c45_ethtool_set_eee); + +/** + * oatc14_cable_test_get_result_code - Convert hardware cable test status to + * ethtool result code. + * @status: The hardware-reported cable test status + * + * This helper function maps the OATC14 HDD cable test status to the + * corresponding ethtool cable test result code. It provides a translation + * between the device-specific status values and the standardized ethtool + * result codes. + * + * Return: + * * ETHTOOL_A_CABLE_RESULT_CODE_OK - Cable is OK + * * ETHTOOL_A_CABLE_RESULT_CODE_OPEN - Open circuit detected + * * ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT - Short circuit detected + * * ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC - Status not detectable or invalid + */ +static int oatc14_cable_test_get_result_code(enum oatc14_hdd_status status) +{ + switch (status) { + case OATC14_HDD_STATUS_CABLE_OK: + return ETHTOOL_A_CABLE_RESULT_CODE_OK; + case OATC14_HDD_STATUS_OPEN: + return ETHTOOL_A_CABLE_RESULT_CODE_OPEN; + case OATC14_HDD_STATUS_SHORT: + return ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT; + case OATC14_HDD_STATUS_NOT_DETECTABLE: + default: + return ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC; + } +} + +/** + * genphy_c45_oatc14_cable_test_get_status - Get status of OATC14 10Base-T1S + * PHY cable test. + * @phydev: pointer to the PHY device structure + * @finished: pointer to a boolean set true if the test is complete + * + * Retrieves the current status of the OATC14 10Base-T1S PHY cable test. + * This function reads the OATC14 HDD register to determine whether the test + * results are valid and whether the test has finished. + * + * If the test is complete, the function reports the cable test result via + * the ethtool cable test interface using ethnl_cable_test_result(), and then + * clears the test control bit in the PHY register to reset the test state. + * + * Return: 0 on success, or a negative error code on failure (e.g. register + * read/write error). + */ +int genphy_c45_oatc14_cable_test_get_status(struct phy_device *phydev, + bool *finished) +{ + int ret; + u8 sts; + + *finished = false; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, MDIO_OATC14_HDD); + if (ret < 0) + return ret; + + if (!(ret & OATC14_HDD_VALID)) + return 0; + + *finished = true; + + sts = FIELD_GET(OATC14_HDD_SHORT_OPEN_STATUS, ret); + + ret = ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_A, + oatc14_cable_test_get_result_code(sts)); + if (ret) + return ret; + + return phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, + MDIO_OATC14_HDD, OATC14_HDD_CONTROL); +} +EXPORT_SYMBOL(genphy_c45_oatc14_cable_test_get_status); + +/** + * genphy_c45_oatc14_cable_test_start - Start a cable test on an OATC14 + * 10Base-T1S PHY. + * @phydev: Pointer to the PHY device structure + * + * This function initiates a cable diagnostic test on a Clause 45 OATC14 + * 10Base-T1S capable PHY device. It first reads the PHY’s advanced diagnostic + * capability register to check if High Definition Diagnostics (HDD) mode is + * supported. If the PHY does not report HDD capability, cable testing is not + * supported and the function returns -EOPNOTSUPP. + * + * For PHYs that support HDD, the function sets the appropriate control bits in + * the OATC14_HDD register to enable and start the cable diagnostic test. + * + * Return: + * * 0 on success + * * -EOPNOTSUPP if the PHY does not support HDD capability + * * A negative error code on I/O or register access failures + */ +int genphy_c45_oatc14_cable_test_start(struct phy_device *phydev) +{ + int ret; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, MDIO_OATC14_ADFCAP); + if (ret < 0) + return ret; + + if (!(ret & OATC14_ADFCAP_HDD_CAPABILITY)) + return -EOPNOTSUPP; + + ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MDIO_OATC14_HDD, + OATC14_HDD_CONTROL); + if (ret) + return ret; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, MDIO_OATC14_HDD); + if (ret < 0) + return ret; + + return phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MDIO_OATC14_HDD, + OATC14_HDD_START_CONTROL); +} +EXPORT_SYMBOL(genphy_c45_oatc14_cable_test_start); diff --git a/include/linux/phy.h b/include/linux/phy.h index d145a200ea211..bf5457341ca80 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -2251,6 +2251,9 @@ int genphy_c45_ethtool_get_eee(struct phy_device *phydev, int genphy_c45_ethtool_set_eee(struct phy_device *phydev, struct ethtool_keee *data); int genphy_c45_an_config_eee_aneg(struct phy_device *phydev); +int genphy_c45_oatc14_cable_test_start(struct phy_device *phydev); +int genphy_c45_oatc14_cable_test_get_status(struct phy_device *phydev, + bool *finished); /* The gen10g_* functions are the old Clause 45 stub */ int gen10g_config_aneg(struct phy_device *phydev); From f424409483d26b3d54439cda832f273a54745cf6 Mon Sep 17 00:00:00 2001 From: Parthiban Veerasooran Date: Wed, 5 Nov 2025 10:42:13 +0530 Subject: [PATCH 657/867] net: phy: microchip_t1s:: add cable diagnostic support for LAN867x Rev.D0 Enable Open Alliance TC14 (OATC14) 10Base-T1S cable diagnostic feature for Microchip LAN867x Rev.D0 PHY by implementing `cable_test_start` and `cable_test_get_status` using the generic C45 functions. This allows the `ethtool` utility to perform cable diagnostic tests directly on the PHY, improving network troubleshooting and maintenance. Signed-off-by: Parthiban Veerasooran Link: https://patch.msgid.link/20251105051213.50443-3-parthiban.veerasooran@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/microchip_t1s.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/microchip_t1s.c b/drivers/net/phy/microchip_t1s.c index bce5cf087b194..5a0a667789770 100644 --- a/drivers/net/phy/microchip_t1s.c +++ b/drivers/net/phy/microchip_t1s.c @@ -573,6 +573,8 @@ static struct phy_driver microchip_t1s_driver[] = { .get_plca_cfg = genphy_c45_plca_get_cfg, .set_plca_cfg = lan86xx_plca_set_cfg, .get_plca_status = genphy_c45_plca_get_status, + .cable_test_start = genphy_c45_oatc14_cable_test_start, + .cable_test_get_status = genphy_c45_oatc14_cable_test_get_status, }, { PHY_ID_MATCH_EXACT(PHY_ID_LAN865X_REVB), From f73e0f46bbfab29b111ff52d047f15aa13623972 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 5 Nov 2025 23:09:17 +0100 Subject: [PATCH 658/867] net: phy: fixed_phy: shrink size of struct fixed_phy_status All three members are effectively of type bool, so make this explicit and shrink size of struct fixed_phy_status. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/9eca3d7e-fa64-4724-8fdc-f2c1a8f2ae8f@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/fixed_phy.c | 2 +- include/linux/phy_fixed.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index d498d8a9bba6f..9bd6937411e43 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -224,7 +224,7 @@ EXPORT_SYMBOL_GPL(fixed_phy_register); struct phy_device *fixed_phy_register_100fd(void) { static const struct fixed_phy_status status = { - .link = 1, + .link = true, .speed = SPEED_100, .duplex = DUPLEX_FULL, }; diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 8bade999831c8..436bff20f324f 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -5,11 +5,11 @@ #include struct fixed_phy_status { - int link; int speed; int duplex; - int pause; - int asym_pause; + bool link:1; + bool pause:1; + bool asym_pause:1; }; struct device_node; From dae4a92399fa8d68aa917db6bb3245f83021e762 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 5 Nov 2025 16:26:02 -0800 Subject: [PATCH 659/867] psp: report basic stats from the core Track and report stats common to all psp devices from the core. A 'stale-event' is when the core marks the rx state of an active psp_assoc as incapable of authenticating psp encapsulated data. Signed-off-by: Daniel Zahka Link: https://patch.msgid.link/20251106002608.1578518-2-daniel.zahka@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/psp.yaml | 40 +++++++++++++++ include/net/psp/types.h | 9 ++++ include/uapi/linux/psp.h | 10 ++++ net/psp/psp-nl-gen.c | 19 +++++++ net/psp/psp-nl-gen.h | 2 + net/psp/psp_nl.c | 74 ++++++++++++++++++++++++++++ net/psp/psp_sock.c | 4 +- 7 files changed, 157 insertions(+), 1 deletion(-) diff --git a/Documentation/netlink/specs/psp.yaml b/Documentation/netlink/specs/psp.yaml index 944429e5c9a84..9141482213843 100644 --- a/Documentation/netlink/specs/psp.yaml +++ b/Documentation/netlink/specs/psp.yaml @@ -76,6 +76,28 @@ attribute-sets: name: spi doc: Security Parameters Index (SPI) of the association. type: u32 + - + name: stats + attributes: + - + name: dev-id + doc: PSP device ID. + type: u32 + checks: + min: 1 + - + name: key-rotations + type: uint + doc: | + Number of key rotations during the lifetime of the device. + Kernel statistic. + - + name: stale-events + type: uint + doc: | + Number of times a socket's Rx got shut down due to using + a key which went stale (fully rotated out). + Kernel statistic. operations: list: @@ -177,6 +199,24 @@ operations: pre: psp-assoc-device-get-locked post: psp-device-unlock + - + name: get-stats + doc: Get device statistics. + attribute-set: stats + do: + request: + attributes: + - dev-id + reply: &stats-all + attributes: + - dev-id + - key-rotations + - stale-events + pre: psp-device-get-locked + post: psp-device-unlock + dump: + reply: *stats-all + mcast-groups: list: - diff --git a/include/net/psp/types.h b/include/net/psp/types.h index 31cee64b7c86a..5b0ccaac38825 100644 --- a/include/net/psp/types.h +++ b/include/net/psp/types.h @@ -59,6 +59,10 @@ struct psp_dev_config { * device key * @stale_assocs: associations which use a rotated out key * + * @stats: statistics maintained by the core + * @stats.rotations: See stats attr key-rotations + * @stats.stales: See stats attr stale-events + * * @rcu: RCU head for freeing the structure */ struct psp_dev { @@ -81,6 +85,11 @@ struct psp_dev { struct list_head prev_assocs; struct list_head stale_assocs; + struct { + unsigned long rotations; + unsigned long stales; + } stats; + struct rcu_head rcu; }; diff --git a/include/uapi/linux/psp.h b/include/uapi/linux/psp.h index 607c42c39ba55..31592760ad794 100644 --- a/include/uapi/linux/psp.h +++ b/include/uapi/linux/psp.h @@ -45,6 +45,15 @@ enum { PSP_A_KEYS_MAX = (__PSP_A_KEYS_MAX - 1) }; +enum { + PSP_A_STATS_DEV_ID = 1, + PSP_A_STATS_KEY_ROTATIONS, + PSP_A_STATS_STALE_EVENTS, + + __PSP_A_STATS_MAX, + PSP_A_STATS_MAX = (__PSP_A_STATS_MAX - 1) +}; + enum { PSP_CMD_DEV_GET = 1, PSP_CMD_DEV_ADD_NTF, @@ -55,6 +64,7 @@ enum { PSP_CMD_KEY_ROTATE_NTF, PSP_CMD_RX_ASSOC, PSP_CMD_TX_ASSOC, + PSP_CMD_GET_STATS, __PSP_CMD_MAX, PSP_CMD_MAX = (__PSP_CMD_MAX - 1) diff --git a/net/psp/psp-nl-gen.c b/net/psp/psp-nl-gen.c index 9fdd6f831803e..73f8b06d66f0f 100644 --- a/net/psp/psp-nl-gen.c +++ b/net/psp/psp-nl-gen.c @@ -47,6 +47,11 @@ static const struct nla_policy psp_tx_assoc_nl_policy[PSP_A_ASSOC_SOCK_FD + 1] = [PSP_A_ASSOC_SOCK_FD] = { .type = NLA_U32, }, }; +/* PSP_CMD_GET_STATS - do */ +static const struct nla_policy psp_get_stats_nl_policy[PSP_A_STATS_DEV_ID + 1] = { + [PSP_A_STATS_DEV_ID] = NLA_POLICY_MIN(NLA_U32, 1), +}; + /* Ops table for psp */ static const struct genl_split_ops psp_nl_ops[] = { { @@ -99,6 +104,20 @@ static const struct genl_split_ops psp_nl_ops[] = { .maxattr = PSP_A_ASSOC_SOCK_FD, .flags = GENL_CMD_CAP_DO, }, + { + .cmd = PSP_CMD_GET_STATS, + .pre_doit = psp_device_get_locked, + .doit = psp_nl_get_stats_doit, + .post_doit = psp_device_unlock, + .policy = psp_get_stats_nl_policy, + .maxattr = PSP_A_STATS_DEV_ID, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = PSP_CMD_GET_STATS, + .dumpit = psp_nl_get_stats_dumpit, + .flags = GENL_CMD_CAP_DUMP, + }, }; static const struct genl_multicast_group psp_nl_mcgrps[] = { diff --git a/net/psp/psp-nl-gen.h b/net/psp/psp-nl-gen.h index 25268ed11fb56..5bc3b5d5a53e2 100644 --- a/net/psp/psp-nl-gen.h +++ b/net/psp/psp-nl-gen.h @@ -28,6 +28,8 @@ int psp_nl_dev_set_doit(struct sk_buff *skb, struct genl_info *info); int psp_nl_key_rotate_doit(struct sk_buff *skb, struct genl_info *info); int psp_nl_rx_assoc_doit(struct sk_buff *skb, struct genl_info *info); int psp_nl_tx_assoc_doit(struct sk_buff *skb, struct genl_info *info); +int psp_nl_get_stats_doit(struct sk_buff *skb, struct genl_info *info); +int psp_nl_get_stats_dumpit(struct sk_buff *skb, struct netlink_callback *cb); enum { PSP_NLGRP_MGMT, diff --git a/net/psp/psp_nl.c b/net/psp/psp_nl.c index 8aaca62744c3c..f990cccbe99c1 100644 --- a/net/psp/psp_nl.c +++ b/net/psp/psp_nl.c @@ -262,6 +262,7 @@ int psp_nl_key_rotate_doit(struct sk_buff *skb, struct genl_info *info) psd->generation & ~PSP_GEN_VALID_MASK); psp_assocs_key_rotated(psd); + psd->stats.rotations++; nlmsg_end(ntf, (struct nlmsghdr *)ntf->data); genlmsg_multicast_netns(&psp_nl_family, dev_net(psd->main_netdev), ntf, @@ -503,3 +504,76 @@ int psp_nl_tx_assoc_doit(struct sk_buff *skb, struct genl_info *info) nlmsg_free(rsp); return err; } + +static int +psp_nl_stats_fill(struct psp_dev *psd, struct sk_buff *rsp, + const struct genl_info *info) +{ + void *hdr; + + hdr = genlmsg_iput(rsp, info); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_u32(rsp, PSP_A_STATS_DEV_ID, psd->id) || + nla_put_uint(rsp, PSP_A_STATS_KEY_ROTATIONS, + psd->stats.rotations) || + nla_put_uint(rsp, PSP_A_STATS_STALE_EVENTS, psd->stats.stales)) + goto err_cancel_msg; + + genlmsg_end(rsp, hdr); + return 0; + +err_cancel_msg: + genlmsg_cancel(rsp, hdr); + return -EMSGSIZE; +} + +int psp_nl_get_stats_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct psp_dev *psd = info->user_ptr[0]; + struct sk_buff *rsp; + int err; + + rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!rsp) + return -ENOMEM; + + err = psp_nl_stats_fill(psd, rsp, info); + if (err) + goto err_free_msg; + + return genlmsg_reply(rsp, info); + +err_free_msg: + nlmsg_free(rsp); + return err; +} + +static int +psp_nl_stats_get_dumpit_one(struct sk_buff *rsp, struct netlink_callback *cb, + struct psp_dev *psd) +{ + if (psp_dev_check_access(psd, sock_net(rsp->sk))) + return 0; + + return psp_nl_stats_fill(psd, rsp, genl_info_dump(cb)); +} + +int psp_nl_get_stats_dumpit(struct sk_buff *rsp, struct netlink_callback *cb) +{ + struct psp_dev *psd; + int err = 0; + + mutex_lock(&psp_devs_lock); + xa_for_each_start(&psp_devs, cb->args[0], psd, cb->args[0]) { + mutex_lock(&psd->lock); + err = psp_nl_stats_get_dumpit_one(rsp, cb, psd); + mutex_unlock(&psd->lock); + if (err) + break; + } + mutex_unlock(&psp_devs_lock); + + return err; +} diff --git a/net/psp/psp_sock.c b/net/psp/psp_sock.c index a931d825d1cc4..f785672b7df66 100644 --- a/net/psp/psp_sock.c +++ b/net/psp/psp_sock.c @@ -253,8 +253,10 @@ void psp_assocs_key_rotated(struct psp_dev *psd) /* Mark the stale associations as invalid, they will no longer * be able to Rx any traffic. */ - list_for_each_entry_safe(pas, next, &psd->prev_assocs, assocs_list) + list_for_each_entry_safe(pas, next, &psd->prev_assocs, assocs_list) { pas->generation |= ~PSP_GEN_VALID_MASK; + psd->stats.stales++; + } list_splice_init(&psd->prev_assocs, &psd->stale_assocs); list_splice_init(&psd->active_assocs, &psd->prev_assocs); From 2098cec32865422b3ddc6a1bd8beb055a3850451 Mon Sep 17 00:00:00 2001 From: Daniel Zahka Date: Wed, 5 Nov 2025 16:26:03 -0800 Subject: [PATCH 660/867] selftests: drv-net: psp: add assertions on core-tracked psp dev stats Add assertions to existing test cases to cover key rotations and 'stale-events'. Signed-off-by: Daniel Zahka Link: https://patch.msgid.link/20251106002608.1578518-3-daniel.zahka@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/psp.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/testing/selftests/drivers/net/psp.py b/tools/testing/selftests/drivers/net/psp.py index 4ae7a785ff10a..06559ef49b9a5 100755 --- a/tools/testing/selftests/drivers/net/psp.py +++ b/tools/testing/selftests/drivers/net/psp.py @@ -109,6 +109,10 @@ def _check_data_outq(s, exp_len, force_wait=False): time.sleep(0.01) ksft_eq(outq, exp_len) + +def _get_stat(cfg, key): + return cfg.pspnl.get_stats({'dev-id': cfg.psp_dev_id})[key] + # # Test case boiler plate # @@ -171,11 +175,16 @@ def dev_rotate(cfg): """ Test key rotation """ _init_psp_dev(cfg) + prev_rotations = _get_stat(cfg, 'key-rotations') + rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) ksft_eq(rot['id'], cfg.psp_dev_id) rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) ksft_eq(rot['id'], cfg.psp_dev_id) + cur_rotations = _get_stat(cfg, 'key-rotations') + ksft_eq(cur_rotations, prev_rotations + 2) + def dev_rotate_spi(cfg): """ Test key rotation and SPI check """ @@ -475,6 +484,7 @@ def data_stale_key(cfg): """ Test send on a double-rotated key """ _init_psp_dev(cfg) + prev_stale = _get_stat(cfg, 'stale-events') s = _make_psp_conn(cfg) try: rx_assoc = cfg.pspnl.rx_assoc({"version": 0, @@ -495,6 +505,9 @@ def data_stale_key(cfg): cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + cur_stale = _get_stat(cfg, 'stale-events') + ksft_gt(cur_stale, prev_stale) + s.send(b'0123456789' * 200) _check_data_outq(s, 2000, force_wait=True) finally: From f05d26198cf2c71f25f6bbe62ca4481c15543922 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 5 Nov 2025 16:26:04 -0800 Subject: [PATCH 661/867] psp: add stats from psp spec to driver facing api Provide a driver api for reporting device statistics required by the "Implementation Requirements" section of the PSP Architecture Specification. Use a warning to ensure drivers report stats required by the spec. Signed-off-by: Daniel Zahka Link: https://patch.msgid.link/20251106002608.1578518-4-daniel.zahka@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/psp.yaml | 55 ++++++++++++++++++++++++++++ include/net/psp/types.h | 23 ++++++++++++ include/uapi/linux/psp.h | 8 ++++ net/psp/psp_main.c | 3 +- net/psp/psp_nl.c | 21 ++++++++++- 5 files changed, 108 insertions(+), 2 deletions(-) diff --git a/Documentation/netlink/specs/psp.yaml b/Documentation/netlink/specs/psp.yaml index 9141482213843..f3a57782d2cf4 100644 --- a/Documentation/netlink/specs/psp.yaml +++ b/Documentation/netlink/specs/psp.yaml @@ -98,6 +98,61 @@ attribute-sets: Number of times a socket's Rx got shut down due to using a key which went stale (fully rotated out). Kernel statistic. + - + name: rx-packets + type: uint + doc: | + Number of successfully processed and authenticated PSP packets. + Device statistic (from the PSP spec). + - + name: rx-bytes + type: uint + doc: | + Number of successfully authenticated PSP bytes received, counting from + the first byte after the IV through the last byte of payload. + The fixed initial portion of the PSP header (16 bytes) + and the PSP trailer/ICV (16 bytes) are not included in this count. + Device statistic (from the PSP spec). + - + name: rx-auth-fail + type: uint + doc: | + Number of received PSP packets with unsuccessful authentication. + Device statistic (from the PSP spec). + - + name: rx-error + type: uint + doc: | + Number of received PSP packets with length/framing errors. + Device statistic (from the PSP spec). + - + name: rx-bad + type: uint + doc: | + Number of received PSP packets with miscellaneous errors + (invalid master key indicated by SPI, unsupported version, etc.) + Device statistic (from the PSP spec). + - + name: tx-packets + type: uint + doc: | + Number of successfully processed PSP packets for transmission. + Device statistic (from the PSP spec). + - + name: tx-bytes + type: uint + doc: | + Number of successfully processed PSP bytes for transmit, counting from + the first byte after the IV through the last byte of payload. + The fixed initial portion of the PSP header (16 bytes) + and the PSP trailer/ICV (16 bytes) are not included in this count. + Device statistic (from the PSP spec). + - + name: tx-error + type: uint + doc: | + Number of PSP packets for transmission with errors. + Device statistic (from the PSP spec). operations: list: diff --git a/include/net/psp/types.h b/include/net/psp/types.h index 5b0ccaac38825..25a9096d4e7da 100644 --- a/include/net/psp/types.h +++ b/include/net/psp/types.h @@ -150,6 +150,22 @@ struct psp_assoc { u8 drv_data[] __aligned(8); }; +struct psp_dev_stats { + union { + struct { + u64 rx_packets; + u64 rx_bytes; + u64 rx_auth_fail; + u64 rx_error; + u64 rx_bad; + u64 tx_packets; + u64 tx_bytes; + u64 tx_error; + }; + DECLARE_FLEX_ARRAY(u64, required); + }; +}; + /** * struct psp_dev_ops - netdev driver facing PSP callbacks */ @@ -188,6 +204,13 @@ struct psp_dev_ops { * Remove an association from the device. */ void (*tx_key_del)(struct psp_dev *psd, struct psp_assoc *pas); + + /** + * @get_stats: get statistics from the device + * Stats required by the spec must be maintained and filled in. + * Stats must be filled in member-by-member, never memset the struct. + */ + void (*get_stats)(struct psp_dev *psd, struct psp_dev_stats *stats); }; #endif /* __NET_PSP_H */ diff --git a/include/uapi/linux/psp.h b/include/uapi/linux/psp.h index 31592760ad794..d8449c043ba11 100644 --- a/include/uapi/linux/psp.h +++ b/include/uapi/linux/psp.h @@ -49,6 +49,14 @@ enum { PSP_A_STATS_DEV_ID = 1, PSP_A_STATS_KEY_ROTATIONS, PSP_A_STATS_STALE_EVENTS, + PSP_A_STATS_RX_PACKETS, + PSP_A_STATS_RX_BYTES, + PSP_A_STATS_RX_AUTH_FAIL, + PSP_A_STATS_RX_ERROR, + PSP_A_STATS_RX_BAD, + PSP_A_STATS_TX_PACKETS, + PSP_A_STATS_TX_BYTES, + PSP_A_STATS_TX_ERROR, __PSP_A_STATS_MAX, PSP_A_STATS_MAX = (__PSP_A_STATS_MAX - 1) diff --git a/net/psp/psp_main.c b/net/psp/psp_main.c index 481aaf0fc9fcf..a8534124f6266 100644 --- a/net/psp/psp_main.c +++ b/net/psp/psp_main.c @@ -60,7 +60,8 @@ psp_dev_create(struct net_device *netdev, !psd_ops->key_rotate || !psd_ops->rx_spi_alloc || !psd_ops->tx_key_add || - !psd_ops->tx_key_del)) + !psd_ops->tx_key_del || + !psd_ops->get_stats)) return ERR_PTR(-EINVAL); psd = kzalloc(sizeof(*psd), GFP_KERNEL); diff --git a/net/psp/psp_nl.c b/net/psp/psp_nl.c index f990cccbe99c1..6afd7707ec12e 100644 --- a/net/psp/psp_nl.c +++ b/net/psp/psp_nl.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only +#include #include #include #include @@ -509,7 +510,17 @@ static int psp_nl_stats_fill(struct psp_dev *psd, struct sk_buff *rsp, const struct genl_info *info) { + unsigned int required_cnt = sizeof(struct psp_dev_stats) / sizeof(u64); + struct psp_dev_stats stats; void *hdr; + int i; + + memset(&stats, 0xff, sizeof(stats)); + psd->ops->get_stats(psd, &stats); + + for (i = 0; i < required_cnt; i++) + if (WARN_ON_ONCE(stats.required[i] == ETHTOOL_STAT_NOT_SET)) + return -EOPNOTSUPP; hdr = genlmsg_iput(rsp, info); if (!hdr) @@ -518,7 +529,15 @@ psp_nl_stats_fill(struct psp_dev *psd, struct sk_buff *rsp, if (nla_put_u32(rsp, PSP_A_STATS_DEV_ID, psd->id) || nla_put_uint(rsp, PSP_A_STATS_KEY_ROTATIONS, psd->stats.rotations) || - nla_put_uint(rsp, PSP_A_STATS_STALE_EVENTS, psd->stats.stales)) + nla_put_uint(rsp, PSP_A_STATS_STALE_EVENTS, psd->stats.stales) || + nla_put_uint(rsp, PSP_A_STATS_RX_PACKETS, stats.rx_packets) || + nla_put_uint(rsp, PSP_A_STATS_RX_BYTES, stats.rx_bytes) || + nla_put_uint(rsp, PSP_A_STATS_RX_AUTH_FAIL, stats.rx_auth_fail) || + nla_put_uint(rsp, PSP_A_STATS_RX_ERROR, stats.rx_error) || + nla_put_uint(rsp, PSP_A_STATS_RX_BAD, stats.rx_bad) || + nla_put_uint(rsp, PSP_A_STATS_TX_PACKETS, stats.tx_packets) || + nla_put_uint(rsp, PSP_A_STATS_TX_BYTES, stats.tx_bytes) || + nla_put_uint(rsp, PSP_A_STATS_TX_ERROR, stats.tx_error)) goto err_cancel_msg; genlmsg_end(rsp, hdr); From b1346219e5350990ab36cdf8d1b302c1b53fc21a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 5 Nov 2025 16:26:05 -0800 Subject: [PATCH 662/867] net/mlx5e: Add PSP stats support for Rx/Tx flows Add all statistics described under the "Implementation Requirements" section of the PSP Architecture Specification: Rx successfully decrypted PSP packets: psp_rx_pkts : Number of packets decrypted successfully psp_rx_bytes : Number of bytes decrypted successfully Rx PSP authentication failure statistics: psp_rx_pkts_auth_fail : Number of PSP packets that failed authentication psp_rx_bytes_auth_fail : Number of PSP bytes that failed authentication Rx PSP bad frame error statistics: psp_rx_pkts_frame_err; psp_rx_bytes_frame_err; Rx PSP drop statistics: psp_rx_pkts_drop : Number of PSP packets dropped psp_rx_bytes_drop : Number of PSP bytes dropped Tx successfully encrypted PSP packets: psp_tx_pkts : Number of packets encrypted successfully psp_tx_bytes : Number of bytes encrypted successfully Tx drops: tx_drop : Number of misc psp related drops The above can be seen using the ynl cli: ./pyynl/cli.py --spec netlink/specs/psp.yaml --dump get-stats Signed-off-by: Raed Salem Signed-off-by: Rahul Rameshbabu Signed-off-by: Daniel Zahka Link: https://patch.msgid.link/20251106002608.1578518-5-daniel.zahka@gmail.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en_accel/psp.c | 233 ++++++++++++++++-- .../mellanox/mlx5/core/en_accel/psp.h | 16 ++ .../mellanox/mlx5/core/en_accel/psp_rxtx.c | 1 + .../net/ethernet/mellanox/mlx5/core/en_main.c | 5 + 4 files changed, 240 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c index 8565cfe8d7dce..38e7c77cc8514 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c @@ -28,12 +28,15 @@ struct mlx5e_psp_tx { struct mlx5_flow_handle *rule; struct mutex mutex; /* Protect PSP TX steering */ u32 refcnt; + struct mlx5_fc *tx_counter; }; struct mlx5e_psp_rx_err { struct mlx5_flow_table *ft; struct mlx5_flow_handle *rule; - struct mlx5_flow_handle *drop_rule; + struct mlx5_flow_handle *auth_fail_rule; + struct mlx5_flow_handle *err_rule; + struct mlx5_flow_handle *bad_rule; struct mlx5_modify_hdr *copy_modify_hdr; }; @@ -50,6 +53,10 @@ struct mlx5e_accel_fs_psp_prot { struct mlx5e_accel_fs_psp { struct mlx5e_accel_fs_psp_prot fs_prot[ACCEL_FS_PSP_NUM_TYPES]; + struct mlx5_fc *rx_counter; + struct mlx5_fc *rx_auth_fail_counter; + struct mlx5_fc *rx_err_counter; + struct mlx5_fc *rx_bad_counter; }; struct mlx5e_psp_fs { @@ -72,9 +79,19 @@ static enum mlx5_traffic_types fs_psp2tt(enum accel_fs_psp_type i) static void accel_psp_fs_rx_err_del_rules(struct mlx5e_psp_fs *fs, struct mlx5e_psp_rx_err *rx_err) { - if (rx_err->drop_rule) { - mlx5_del_flow_rules(rx_err->drop_rule); - rx_err->drop_rule = NULL; + if (rx_err->bad_rule) { + mlx5_del_flow_rules(rx_err->bad_rule); + rx_err->bad_rule = NULL; + } + + if (rx_err->err_rule) { + mlx5_del_flow_rules(rx_err->err_rule); + rx_err->err_rule = NULL; + } + + if (rx_err->auth_fail_rule) { + mlx5_del_flow_rules(rx_err->auth_fail_rule); + rx_err->auth_fail_rule = NULL; } if (rx_err->rule) { @@ -117,6 +134,7 @@ static int accel_psp_fs_rx_err_add_rule(struct mlx5e_psp_fs *fs, { u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; struct mlx5_core_dev *mdev = fs->mdev; + struct mlx5_flow_destination dest[2]; struct mlx5_flow_act flow_act = {}; struct mlx5_modify_hdr *modify_hdr; struct mlx5_flow_handle *fte; @@ -147,10 +165,14 @@ static int accel_psp_fs_rx_err_add_rule(struct mlx5e_psp_fs *fs, accel_psp_setup_syndrome_match(spec, PSP_OK); /* create fte */ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; flow_act.modify_hdr = modify_hdr; - fte = mlx5_add_flow_rules(rx_err->ft, spec, &flow_act, - &fs_prot->default_dest, 1); + dest[0].type = fs_prot->default_dest.type; + dest[0].ft = fs_prot->default_dest.ft; + dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[1].counter = fs->rx_fs->rx_counter; + fte = mlx5_add_flow_rules(rx_err->ft, spec, &flow_act, dest, 2); if (IS_ERR(fte)) { err = PTR_ERR(fte); mlx5_core_err(mdev, "fail to add psp rx err copy rule err=%d\n", err); @@ -158,22 +180,69 @@ static int accel_psp_fs_rx_err_add_rule(struct mlx5e_psp_fs *fs, } rx_err->rule = fte; - /* add default drop rule */ + /* add auth fail drop rule */ memset(spec, 0, sizeof(*spec)); memset(&flow_act, 0, sizeof(flow_act)); + accel_psp_setup_syndrome_match(spec, PSP_ICV_FAIL); /* create fte */ - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; - fte = mlx5_add_flow_rules(rx_err->ft, spec, &flow_act, NULL, 0); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest[0].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[0].counter = fs->rx_fs->rx_auth_fail_counter; + fte = mlx5_add_flow_rules(rx_err->ft, spec, &flow_act, dest, 1); if (IS_ERR(fte)) { err = PTR_ERR(fte); - mlx5_core_err(mdev, "fail to add psp rx err drop rule err=%d\n", err); + mlx5_core_err(mdev, "fail to add psp rx auth fail drop rule err=%d\n", + err); goto out_drop_rule; } - rx_err->drop_rule = fte; + rx_err->auth_fail_rule = fte; + + /* add framing drop rule */ + memset(spec, 0, sizeof(*spec)); + memset(&flow_act, 0, sizeof(flow_act)); + accel_psp_setup_syndrome_match(spec, PSP_BAD_TRAILER); + /* create fte */ + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest[0].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[0].counter = fs->rx_fs->rx_err_counter; + fte = mlx5_add_flow_rules(rx_err->ft, spec, &flow_act, dest, 1); + if (IS_ERR(fte)) { + err = PTR_ERR(fte); + mlx5_core_err(mdev, "fail to add psp rx framing err drop rule err=%d\n", + err); + goto out_drop_auth_fail_rule; + } + rx_err->err_rule = fte; + + /* add misc. errors drop rule */ + memset(spec, 0, sizeof(*spec)); + memset(&flow_act, 0, sizeof(flow_act)); + /* create fte */ + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest[0].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[0].counter = fs->rx_fs->rx_bad_counter; + fte = mlx5_add_flow_rules(rx_err->ft, spec, &flow_act, dest, 1); + if (IS_ERR(fte)) { + err = PTR_ERR(fte); + mlx5_core_err(mdev, "fail to add psp rx misc. err drop rule err=%d\n", + err); + goto out_drop_error_rule; + } + rx_err->bad_rule = fte; + rx_err->copy_modify_hdr = modify_hdr; goto out_spec; +out_drop_error_rule: + mlx5_del_flow_rules(rx_err->err_rule); + rx_err->err_rule = NULL; +out_drop_auth_fail_rule: + mlx5_del_flow_rules(rx_err->auth_fail_rule); + rx_err->auth_fail_rule = NULL; out_drop_rule: mlx5_del_flow_rules(rx_err->rule); rx_err->rule = NULL; @@ -461,6 +530,10 @@ static void accel_psp_fs_cleanup_rx(struct mlx5e_psp_fs *fs) return; accel_psp = fs->rx_fs; + mlx5_fc_destroy(fs->mdev, accel_psp->rx_bad_counter); + mlx5_fc_destroy(fs->mdev, accel_psp->rx_err_counter); + mlx5_fc_destroy(fs->mdev, accel_psp->rx_auth_fail_counter); + mlx5_fc_destroy(fs->mdev, accel_psp->rx_counter); for (i = 0; i < ACCEL_FS_PSP_NUM_TYPES; i++) { fs_prot = &accel_psp->fs_prot[i]; mutex_destroy(&fs_prot->prot_mutex); @@ -474,7 +547,10 @@ static int accel_psp_fs_init_rx(struct mlx5e_psp_fs *fs) { struct mlx5e_accel_fs_psp_prot *fs_prot; struct mlx5e_accel_fs_psp *accel_psp; + struct mlx5_core_dev *mdev = fs->mdev; + struct mlx5_fc *flow_counter; enum accel_fs_psp_type i; + int err; accel_psp = kzalloc(sizeof(*accel_psp), GFP_KERNEL); if (!accel_psp) @@ -485,9 +561,68 @@ static int accel_psp_fs_init_rx(struct mlx5e_psp_fs *fs) mutex_init(&fs_prot->prot_mutex); } + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + mlx5_core_warn(mdev, + "fail to create psp rx flow counter err=%pe\n", + flow_counter); + err = PTR_ERR(flow_counter); + goto out_err; + } + accel_psp->rx_counter = flow_counter; + + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + mlx5_core_warn(mdev, + "fail to create psp rx auth fail flow counter err=%pe\n", + flow_counter); + err = PTR_ERR(flow_counter); + goto out_counter_err; + } + accel_psp->rx_auth_fail_counter = flow_counter; + + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + mlx5_core_warn(mdev, + "fail to create psp rx error flow counter err=%pe\n", + flow_counter); + err = PTR_ERR(flow_counter); + goto out_auth_fail_counter_err; + } + accel_psp->rx_err_counter = flow_counter; + + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + mlx5_core_warn(mdev, + "fail to create psp rx bad flow counter err=%pe\n", + flow_counter); + err = PTR_ERR(flow_counter); + goto out_err_counter_err; + } + accel_psp->rx_bad_counter = flow_counter; + fs->rx_fs = accel_psp; return 0; + +out_err_counter_err: + mlx5_fc_destroy(mdev, accel_psp->rx_err_counter); + accel_psp->rx_err_counter = NULL; +out_auth_fail_counter_err: + mlx5_fc_destroy(mdev, accel_psp->rx_auth_fail_counter); + accel_psp->rx_auth_fail_counter = NULL; +out_counter_err: + mlx5_fc_destroy(mdev, accel_psp->rx_counter); + accel_psp->rx_counter = NULL; +out_err: + for (i = 0; i < ACCEL_FS_PSP_NUM_TYPES; i++) { + fs_prot = &accel_psp->fs_prot[i]; + mutex_destroy(&fs_prot->prot_mutex); + } + kfree(accel_psp); + fs->rx_fs = NULL; + + return err; } void mlx5_accel_psp_fs_cleanup_rx_tables(struct mlx5e_priv *priv) @@ -532,6 +667,7 @@ static int accel_psp_fs_tx_create_ft_table(struct mlx5e_psp_fs *fs) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_destination dest = {}; struct mlx5_core_dev *mdev = fs->mdev; struct mlx5_flow_act flow_act = {}; u32 *in, *mc, *outer_headers_c; @@ -580,8 +716,11 @@ static int accel_psp_fs_tx_create_ft_table(struct mlx5e_psp_fs *fs) flow_act.crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_PSP; flow_act.flags |= FLOW_ACT_NO_APPEND; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW | - MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT; - rule = mlx5_add_flow_rules(ft, spec, &flow_act, NULL, 0); + MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter = tx_fs->tx_counter; + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); mlx5_core_err(mdev, "PSP: fail to add psp tx flow rule, err = %d\n", err); @@ -650,6 +789,7 @@ static void accel_psp_fs_cleanup_tx(struct mlx5e_psp_fs *fs) if (!tx_fs) return; + mlx5_fc_destroy(fs->mdev, tx_fs->tx_counter); mutex_destroy(&tx_fs->mutex); WARN_ON(tx_fs->refcnt); kfree(tx_fs); @@ -658,10 +798,12 @@ static void accel_psp_fs_cleanup_tx(struct mlx5e_psp_fs *fs) static int accel_psp_fs_init_tx(struct mlx5e_psp_fs *fs) { + struct mlx5_core_dev *mdev = fs->mdev; struct mlx5_flow_namespace *ns; + struct mlx5_fc *flow_counter; struct mlx5e_psp_tx *tx_fs; - ns = mlx5_get_flow_namespace(fs->mdev, MLX5_FLOW_NAMESPACE_EGRESS_IPSEC); + ns = mlx5_get_flow_namespace(mdev, MLX5_FLOW_NAMESPACE_EGRESS_IPSEC); if (!ns) return -EOPNOTSUPP; @@ -669,12 +811,55 @@ static int accel_psp_fs_init_tx(struct mlx5e_psp_fs *fs) if (!tx_fs) return -ENOMEM; + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + mlx5_core_warn(mdev, + "fail to create psp tx flow counter err=%pe\n", + flow_counter); + kfree(tx_fs); + return PTR_ERR(flow_counter); + } + tx_fs->tx_counter = flow_counter; mutex_init(&tx_fs->mutex); tx_fs->ns = ns; fs->tx_fs = tx_fs; return 0; } +static void +mlx5e_accel_psp_fs_get_stats_fill(struct mlx5e_priv *priv, + struct mlx5e_psp_stats *stats) +{ + struct mlx5e_psp_tx *tx_fs = priv->psp->fs->tx_fs; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_accel_fs_psp *accel_psp; + + accel_psp = (struct mlx5e_accel_fs_psp *)priv->psp->fs->rx_fs; + + if (tx_fs->tx_counter) + mlx5_fc_query(mdev, tx_fs->tx_counter, &stats->psp_tx_pkts, + &stats->psp_tx_bytes); + + if (accel_psp->rx_counter) + mlx5_fc_query(mdev, accel_psp->rx_counter, &stats->psp_rx_pkts, + &stats->psp_rx_bytes); + + if (accel_psp->rx_auth_fail_counter) + mlx5_fc_query(mdev, accel_psp->rx_auth_fail_counter, + &stats->psp_rx_pkts_auth_fail, + &stats->psp_rx_bytes_auth_fail); + + if (accel_psp->rx_err_counter) + mlx5_fc_query(mdev, accel_psp->rx_err_counter, + &stats->psp_rx_pkts_frame_err, + &stats->psp_rx_bytes_frame_err); + + if (accel_psp->rx_bad_counter) + mlx5_fc_query(mdev, accel_psp->rx_bad_counter, + &stats->psp_rx_pkts_drop, + &stats->psp_rx_bytes_drop); +} + void mlx5_accel_psp_fs_cleanup_tx_tables(struct mlx5e_priv *priv) { if (!priv->psp) @@ -849,12 +1034,30 @@ mlx5e_psp_key_rotate(struct psp_dev *psd, struct netlink_ext_ack *exack) return mlx5e_psp_rotate_key(priv->mdev); } +static void +mlx5e_psp_get_stats(struct psp_dev *psd, struct psp_dev_stats *stats) +{ + struct mlx5e_priv *priv = netdev_priv(psd->main_netdev); + struct mlx5e_psp_stats nstats; + + mlx5e_accel_psp_fs_get_stats_fill(priv, &nstats); + stats->rx_packets = nstats.psp_rx_pkts; + stats->rx_bytes = nstats.psp_rx_bytes; + stats->rx_auth_fail = nstats.psp_rx_pkts_auth_fail; + stats->rx_error = nstats.psp_rx_pkts_frame_err; + stats->rx_bad = nstats.psp_rx_pkts_drop; + stats->tx_packets = nstats.psp_tx_pkts; + stats->tx_bytes = nstats.psp_tx_bytes; + stats->tx_error = atomic_read(&priv->psp->tx_drop); +} + static struct psp_dev_ops mlx5_psp_ops = { .set_config = mlx5e_psp_set_config, .rx_spi_alloc = mlx5e_psp_rx_spi_alloc, .tx_key_add = mlx5e_psp_assoc_add, .tx_key_del = mlx5e_psp_assoc_del, .key_rotate = mlx5e_psp_key_rotate, + .get_stats = mlx5e_psp_get_stats, }; void mlx5e_psp_unregister(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.h index 42bb671fb2cb3..6b62fef0d9a73 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.h @@ -7,11 +7,27 @@ #include #include "en.h" +struct mlx5e_psp_stats { + u64 psp_rx_pkts; + u64 psp_rx_bytes; + u64 psp_rx_pkts_auth_fail; + u64 psp_rx_bytes_auth_fail; + u64 psp_rx_pkts_frame_err; + u64 psp_rx_bytes_frame_err; + u64 psp_rx_pkts_drop; + u64 psp_rx_bytes_drop; + u64 psp_tx_pkts; + u64 psp_tx_bytes; + u64 psp_tx_pkts_drop; + u64 psp_tx_bytes_drop; +}; + struct mlx5e_psp { struct psp_dev *psp; struct psp_dev_caps caps; struct mlx5e_psp_fs *fs; atomic_t tx_key_cnt; + atomic_t tx_drop; }; static inline bool mlx5_is_psp_device(struct mlx5_core_dev *mdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp_rxtx.c index 828bff1137aff..c17ea0fcd8efe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp_rxtx.c @@ -186,6 +186,7 @@ bool mlx5e_psp_handle_tx_skb(struct net_device *netdev, /* psp_encap of the packet */ if (!psp_dev_encapsulate(net, skb, psp_st->spi, psp_st->ver, 0)) { kfree_skb_reason(skb, SKB_DROP_REASON_PSP_OUTPUT); + atomic_inc(&priv->psp->tx_drop); return false; } if (skb_is_gso(skb)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8e394b16c92b9..5ec0f5ca45b4a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4025,6 +4025,11 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s) s->rx_bytes += rq_stats->bytes; s->multicast += rq_stats->mcast_packets; } + +#ifdef CONFIG_MLX5_EN_PSP + if (priv->psp) + s->tx_dropped += atomic_read(&priv->psp->tx_drop); +#endif } void From 178f0763c5f3cf710062ddf2e4d659127fee66b2 Mon Sep 17 00:00:00 2001 From: Daniel Zahka Date: Wed, 5 Nov 2025 16:26:06 -0800 Subject: [PATCH 663/867] netdevsim: implement psp device stats For now only tx/rx packets/bytes are reported. This is not compliant with the PSP Architecture Specification. Signed-off-by: Daniel Zahka Link: https://patch.msgid.link/20251106002608.1578518-6-daniel.zahka@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/netdevsim.h | 5 +++++ drivers/net/netdevsim/psp.c | 27 +++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 02c1c97b70080..af6fcfcda8ba8 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -109,6 +109,11 @@ struct netdevsim { int rq_reset_mode; struct { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + struct u64_stats_sync syncp; struct psp_dev *dev; u32 spi; u32 assoc_cnt; diff --git a/drivers/net/netdevsim/psp.c b/drivers/net/netdevsim/psp.c index 332b5b744f016..727da06101caa 100644 --- a/drivers/net/netdevsim/psp.c +++ b/drivers/net/netdevsim/psp.c @@ -70,6 +70,13 @@ nsim_do_psp(struct sk_buff *skb, struct netdevsim *ns, *psp_ext = skb->extensions; refcount_inc(&(*psp_ext)->refcnt); skb->decrypted = 1; + + u64_stats_update_begin(&ns->psp.syncp); + ns->psp.tx_packets++; + ns->psp.rx_packets++; + ns->psp.tx_bytes += skb->len - skb_inner_transport_offset(skb); + ns->psp.rx_bytes += skb->len - skb_inner_transport_offset(skb); + u64_stats_update_end(&ns->psp.syncp); } else { struct ipv6hdr *ip6h __maybe_unused; struct iphdr *iph; @@ -164,12 +171,32 @@ static void nsim_assoc_del(struct psp_dev *psd, struct psp_assoc *pas) ns->psp.assoc_cnt--; } +static void nsim_get_stats(struct psp_dev *psd, struct psp_dev_stats *stats) +{ + struct netdevsim *ns = psd->drv_priv; + unsigned int start; + + /* WARNING: do *not* blindly zero stats in real drivers! + * All required stats must be reported by the device! + */ + memset(stats, 0, sizeof(struct psp_dev_stats)); + + do { + start = u64_stats_fetch_begin(&ns->psp.syncp); + stats->rx_bytes = ns->psp.rx_bytes; + stats->rx_packets = ns->psp.rx_packets; + stats->tx_bytes = ns->psp.tx_bytes; + stats->tx_packets = ns->psp.tx_packets; + } while (u64_stats_fetch_retry(&ns->psp.syncp, start)); +} + static struct psp_dev_ops nsim_psp_ops = { .set_config = nsim_psp_set_config, .rx_spi_alloc = nsim_rx_spi_alloc, .tx_key_add = nsim_assoc_add, .tx_key_del = nsim_assoc_del, .key_rotate = nsim_key_rotate, + .get_stats = nsim_get_stats, }; static struct psp_dev_caps nsim_psp_caps = { From ad17e7e92a7c52ce70bb764813fcf99464f96903 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Thu, 6 Nov 2025 10:14:21 +0800 Subject: [PATCH 664/867] net: fec: correct rx_bytes statistic for the case SHIFT16 is set Two additional bytes in front of each frame received into the RX FIFO if SHIFT16 is set, so we need to subtract the extra two bytes from pkt_len to correct the statistic of rx_bytes. Fixes: 3ac72b7b63d5 ("net: fec: align IP header in hardware") Signed-off-by: Wei Fang Reviewed-by: Frank Li Link: https://patch.msgid.link/20251106021421.2096585-1-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 1edcfaee6819e..3222359ac15b7 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1835,6 +1835,8 @@ fec_enet_rx_queue(struct net_device *ndev, u16 queue_id, int budget) ndev->stats.rx_packets++; pkt_len = fec16_to_cpu(bdp->cbd_datlen); ndev->stats.rx_bytes += pkt_len; + if (fep->quirks & FEC_QUIRK_HAS_RACC) + ndev->stats.rx_bytes -= 2; index = fec_enet_get_bd_index(bdp, &rxq->bd); page = rxq->rx_skb_info[index].page; From fd9557c3606bb683c01a6c7627e915b539b9a8df Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Nov 2025 08:55:00 +0000 Subject: [PATCH 665/867] net: add prefetch() in skb_defer_free_flush() skb_defer_free_flush() is becoming more important these days. Add a prefetch operation to reduce latency a bit on some platforms like AMD EPYC 7B12. On more recent cpus, a stall happens when reading skb_shinfo(). Avoiding it will require a more elaborate strategy. Signed-off-by: Eric Dumazet Acked-by: Paolo Abeni Link: https://patch.msgid.link/20251106085500.2438951-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/dev.c b/net/core/dev.c index 537aa43edff0e..69515edd17bc6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6782,6 +6782,7 @@ static void skb_defer_free_flush(void) free_list = llist_del_all(&sdn->defer_list); llist_for_each_entry_safe(skb, next, free_list, ll_node) { + prefetch(next); napi_consume_skb(skb, 1); } } From 96a9178a29a6b84bb632ebeb4e84cf61191c73d5 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Thu, 6 Nov 2025 10:06:37 +0100 Subject: [PATCH 666/867] net: phy: micrel: lan8814 fix reset of the QSGMII interface The lan8814 is a quad-phy and it is using QSGMII towards the MAC. The problem is that everytime when one of the ports is configured then the PCS is reseted for all the PHYs. Meaning that the other ports can loose traffic until the link is establish again. To fix this, do the reset one time for the entire PHY package. Fixes: ece19502834d ("net: phy: micrel: 1588 support for LAN8814 phy") Signed-off-by: Horatiu Vultur Reviewed-by: Andrew Lunn Reviewed-by: Divya Koppera Link: https://patch.msgid.link/20251106090637.2030625-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 6a1a424e3b30f..01c87c9b77020 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -4380,12 +4380,6 @@ static int lan8814_config_init(struct phy_device *phydev) { struct kszphy_priv *lan8814 = phydev->priv; - /* Reset the PHY */ - lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, - LAN8814_QSGMII_SOFT_RESET, - LAN8814_QSGMII_SOFT_RESET_BIT, - LAN8814_QSGMII_SOFT_RESET_BIT); - /* Disable ANEG with QSGMII PCS Host side */ lanphy_modify_page_reg(phydev, LAN8814_PAGE_PORT_REGS, LAN8814_QSGMII_PCS1G_ANEG_CONFIG, @@ -4471,6 +4465,12 @@ static int lan8814_probe(struct phy_device *phydev) addr, sizeof(struct lan8814_shared_priv)); if (phy_package_init_once(phydev)) { + /* Reset the PHY */ + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_QSGMII_SOFT_RESET, + LAN8814_QSGMII_SOFT_RESET_BIT, + LAN8814_QSGMII_SOFT_RESET_BIT); + err = lan8814_release_coma_mode(phydev); if (err) return err; From 1fcf572211da626982223ab4e4641bcd92db0701 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Nov 2025 20:29:33 +0000 Subject: [PATCH 667/867] net: allow skb_release_head_state() to be called multiple times MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, only skb dst is cleared (thanks to skb_dst_drop()) Make sure skb->destructor, conntrack and extensions are cleared. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20251106202935.1776179-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5b4bc8b1c7d56..eeddb9e737ff2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1149,11 +1149,10 @@ void skb_release_head_state(struct sk_buff *skb) skb); #endif + skb->destructor = NULL; } -#if IS_ENABLED(CONFIG_NF_CONNTRACK) - nf_conntrack_put(skb_nfct(skb)); -#endif - skb_ext_put(skb); + nf_reset_ct(skb); + skb_ext_reset(skb); } /* Free everything but the sk_buff shell. */ From e20dfbad8aab2b7c72571ae3c3e2e646d6b04cb7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Nov 2025 20:29:34 +0000 Subject: [PATCH 668/867] net: fix napi_consume_skb() with alien skbs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a lack of NUMA awareness and more generally lack of slab caches affinity on TX completion path. Modern drivers are using napi_consume_skb(), hoping to cache sk_buff in per-cpu caches so that they can be recycled in RX path. Only use this if the skb was allocated on the same cpu, otherwise use skb_attempt_defer_free() so that the skb is freed on the original cpu. This removes contention on SLUB spinlocks and data structures. After this patch, I get ~50% improvement for an UDP tx workload on an AMD EPYC 9B45 (IDPF 200Gbit NIC with 32 TX queues). 80 Mpps -> 120 Mpps. Profiling one of the 32 cpus servicing NIC interrupts : Before: mpstat -P 511 1 1 Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %gnice %idle Average: 511 0.00 0.00 0.00 0.00 0.00 98.00 0.00 0.00 0.00 2.00 31.01% ksoftirqd/511 [kernel.kallsyms] [k] queued_spin_lock_slowpath 12.45% swapper [kernel.kallsyms] [k] queued_spin_lock_slowpath 5.60% ksoftirqd/511 [kernel.kallsyms] [k] __slab_free 3.31% ksoftirqd/511 [kernel.kallsyms] [k] idpf_tx_clean_buf_ring 3.27% ksoftirqd/511 [kernel.kallsyms] [k] idpf_tx_splitq_clean_all 2.95% ksoftirqd/511 [kernel.kallsyms] [k] idpf_tx_splitq_start 2.52% ksoftirqd/511 [kernel.kallsyms] [k] fq_dequeue 2.32% ksoftirqd/511 [kernel.kallsyms] [k] read_tsc 2.25% ksoftirqd/511 [kernel.kallsyms] [k] build_detached_freelist 2.15% ksoftirqd/511 [kernel.kallsyms] [k] kmem_cache_free 2.11% swapper [kernel.kallsyms] [k] __slab_free 2.06% ksoftirqd/511 [kernel.kallsyms] [k] idpf_features_check 2.01% ksoftirqd/511 [kernel.kallsyms] [k] idpf_tx_splitq_clean_hdr 1.97% ksoftirqd/511 [kernel.kallsyms] [k] skb_release_data 1.52% ksoftirqd/511 [kernel.kallsyms] [k] sock_wfree 1.34% swapper [kernel.kallsyms] [k] idpf_tx_clean_buf_ring 1.23% swapper [kernel.kallsyms] [k] idpf_tx_splitq_clean_all 1.15% ksoftirqd/511 [kernel.kallsyms] [k] dma_unmap_page_attrs 1.11% swapper [kernel.kallsyms] [k] idpf_tx_splitq_start 1.03% swapper [kernel.kallsyms] [k] fq_dequeue 0.94% swapper [kernel.kallsyms] [k] kmem_cache_free 0.93% swapper [kernel.kallsyms] [k] read_tsc 0.81% ksoftirqd/511 [kernel.kallsyms] [k] napi_consume_skb 0.79% swapper [kernel.kallsyms] [k] idpf_tx_splitq_clean_hdr 0.77% ksoftirqd/511 [kernel.kallsyms] [k] skb_free_head 0.76% swapper [kernel.kallsyms] [k] idpf_features_check 0.72% swapper [kernel.kallsyms] [k] skb_release_data 0.69% swapper [kernel.kallsyms] [k] build_detached_freelist 0.58% ksoftirqd/511 [kernel.kallsyms] [k] skb_release_head_state 0.56% ksoftirqd/511 [kernel.kallsyms] [k] __put_partials 0.55% ksoftirqd/511 [kernel.kallsyms] [k] kmem_cache_free_bulk 0.48% swapper [kernel.kallsyms] [k] sock_wfree After: mpstat -P 511 1 1 Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %gnice %idle Average: 511 0.00 0.00 0.00 0.00 0.00 51.49 0.00 0.00 0.00 48.51 19.10% swapper [kernel.kallsyms] [k] idpf_tx_splitq_clean_hdr 13.86% swapper [kernel.kallsyms] [k] idpf_tx_clean_buf_ring 10.80% swapper [kernel.kallsyms] [k] skb_attempt_defer_free 10.57% swapper [kernel.kallsyms] [k] idpf_tx_splitq_clean_all 7.18% swapper [kernel.kallsyms] [k] queued_spin_lock_slowpath 6.69% swapper [kernel.kallsyms] [k] sock_wfree 5.55% swapper [kernel.kallsyms] [k] dma_unmap_page_attrs 3.10% swapper [kernel.kallsyms] [k] fq_dequeue 3.00% swapper [kernel.kallsyms] [k] skb_release_head_state 2.73% swapper [kernel.kallsyms] [k] read_tsc 2.48% swapper [kernel.kallsyms] [k] idpf_tx_splitq_start 1.20% swapper [kernel.kallsyms] [k] idpf_features_check 1.13% swapper [kernel.kallsyms] [k] napi_consume_skb 0.93% swapper [kernel.kallsyms] [k] idpf_vport_splitq_napi_poll 0.64% swapper [kernel.kallsyms] [k] native_send_call_func_single_ipi 0.60% swapper [kernel.kallsyms] [k] acpi_processor_ffh_cstate_enter 0.53% swapper [kernel.kallsyms] [k] io_idle 0.43% swapper [kernel.kallsyms] [k] netif_skb_features 0.41% swapper [kernel.kallsyms] [k] __direct_call_cpuidle_state_enter2 0.40% swapper [kernel.kallsyms] [k] native_irq_return_iret 0.40% swapper [kernel.kallsyms] [k] idpf_tx_buf_hw_update 0.36% swapper [kernel.kallsyms] [k] sched_clock_noinstr 0.34% swapper [kernel.kallsyms] [k] handle_softirqs 0.32% swapper [kernel.kallsyms] [k] net_rx_action 0.32% swapper [kernel.kallsyms] [k] dql_completed 0.32% swapper [kernel.kallsyms] [k] validate_xmit_skb 0.31% swapper [kernel.kallsyms] [k] skb_network_protocol 0.29% swapper [kernel.kallsyms] [k] skb_csum_hwoffload_help 0.29% swapper [kernel.kallsyms] [k] x2apic_send_IPI 0.28% swapper [kernel.kallsyms] [k] ktime_get 0.24% swapper [kernel.kallsyms] [k] __qdisc_run Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Toke Høiland-Jørgensen Reviewed-by: Jason Xing Link: https://patch.msgid.link/20251106202935.1776179-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index eeddb9e737ff2..7ac5f8aa1235a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1476,6 +1476,11 @@ void napi_consume_skb(struct sk_buff *skb, int budget) DEBUG_NET_WARN_ON_ONCE(!in_softirq()); + if (skb->alloc_cpu != smp_processor_id() && !skb_shared(skb)) { + skb_release_head_state(skb); + return skb_attempt_defer_free(skb); + } + if (!skb_unref(skb)) return; From b61785852ed0a0e7dc16b606157e4a0228cd76cf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Nov 2025 20:29:35 +0000 Subject: [PATCH 669/867] net: increase skb_defer_max default to 128 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit skb_defer_max value is very conservative, and can be increased to avoid too many calls to kick_defer_list_purge(). Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Toke Høiland-Jørgensen Reviewed-by: Jason Xing Link: https://patch.msgid.link/20251106202935.1776179-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- Documentation/admin-guide/sysctl/net.rst | 4 ++-- net/core/hotdata.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 991773dcb9cfe..369a738a68193 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -355,9 +355,9 @@ skb_defer_max ------------- Max size (in skbs) of the per-cpu list of skbs being freed -by the cpu which allocated them. Used by TCP stack so far. +by the cpu which allocated them. -Default: 64 +Default: 128 optmem_max ---------- diff --git a/net/core/hotdata.c b/net/core/hotdata.c index 95d0a4df10069..dddd5c287cf08 100644 --- a/net/core/hotdata.c +++ b/net/core/hotdata.c @@ -20,7 +20,7 @@ struct net_hotdata net_hotdata __cacheline_aligned = { .dev_tx_weight = 64, .dev_rx_weight = 64, .sysctl_max_skb_frags = MAX_SKB_FRAGS, - .sysctl_skb_defer_max = 64, + .sysctl_skb_defer_max = 128, .sysctl_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE }; EXPORT_SYMBOL(net_hotdata); From 5636fcdb02114f492dd922cef67d1b1b5f09893b Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 6 Nov 2025 11:23:11 +0000 Subject: [PATCH 670/867] net: stmmac: lpc18xx: convert to PHY_INTF_SEL_x Use the common dwmac definitions for the PHY interface selection field. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vGy59-0000000DhQ1-393H@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c index 6fffc9dfbae55..66c309a7afb37 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c @@ -22,8 +22,8 @@ /* Register defines for CREG syscon */ #define LPC18XX_CREG_CREG6 0x12c # define LPC18XX_CREG_CREG6_ETHMODE_MASK 0x7 -# define LPC18XX_CREG_CREG6_ETHMODE_MII 0x0 -# define LPC18XX_CREG_CREG6_ETHMODE_RMII 0x4 +# define LPC18XX_CREG_CREG6_ETHMODE_MII PHY_INTF_SEL_GMII_MII +# define LPC18XX_CREG_CREG6_ETHMODE_RMII PHY_INTF_SEL_RMII static int lpc18xx_dwmac_probe(struct platform_device *pdev) { From eb0533c7e63be03c1220f24bd00aeb2a4f0e3f78 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 6 Nov 2025 11:23:16 +0000 Subject: [PATCH 671/867] net: stmmac: lpc18xx: use PHY_INTF_SEL_x directly Use the PHY_INTF_SEL_x values directly rather than the driver private LPC18XX_CREG_CREG6_ETHMODE_x definitions, and convert LPC18XX_CREG_CREG6_ETHMODE_MASK to use GENMASK(). Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vGy5E-0000000DhQ7-3cuy@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c index 66c309a7afb37..895d16dc0a4b5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c @@ -21,9 +21,7 @@ /* Register defines for CREG syscon */ #define LPC18XX_CREG_CREG6 0x12c -# define LPC18XX_CREG_CREG6_ETHMODE_MASK 0x7 -# define LPC18XX_CREG_CREG6_ETHMODE_MII PHY_INTF_SEL_GMII_MII -# define LPC18XX_CREG_CREG6_ETHMODE_RMII PHY_INTF_SEL_RMII +# define LPC18XX_CREG_CREG6_ETHMODE_MASK GENMASK(2, 0) static int lpc18xx_dwmac_probe(struct platform_device *pdev) { @@ -50,9 +48,9 @@ static int lpc18xx_dwmac_probe(struct platform_device *pdev) } if (plat_dat->phy_interface == PHY_INTERFACE_MODE_MII) { - ethmode = LPC18XX_CREG_CREG6_ETHMODE_MII; + ethmode = PHY_INTF_SEL_GMII_MII; } else if (plat_dat->phy_interface == PHY_INTERFACE_MODE_RMII) { - ethmode = LPC18XX_CREG_CREG6_ETHMODE_RMII; + ethmode = PHY_INTF_SEL_RMII; } else { dev_err(&pdev->dev, "Only MII and RMII mode supported\n"); return -EINVAL; From 9882f12194082a0178a025946911e677cfdcd8d8 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 6 Nov 2025 11:23:21 +0000 Subject: [PATCH 672/867] net: stmmac: lpc18xx: use stmmac_get_phy_intf_sel() Use stmmac_get_phy_intf_sel() to decode the PHY interface mode to the phy_intf_sel value, and use the result to program the ethernet mode. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vGy5J-0000000DhQD-46Ob@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c index 895d16dc0a4b5..0f6be2a17e659 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c @@ -47,15 +47,14 @@ static int lpc18xx_dwmac_probe(struct platform_device *pdev) return PTR_ERR(reg); } - if (plat_dat->phy_interface == PHY_INTERFACE_MODE_MII) { - ethmode = PHY_INTF_SEL_GMII_MII; - } else if (plat_dat->phy_interface == PHY_INTERFACE_MODE_RMII) { - ethmode = PHY_INTF_SEL_RMII; - } else { + if (plat_dat->phy_interface != PHY_INTERFACE_MODE_MII && + plat_dat->phy_interface != PHY_INTERFACE_MODE_RMII) { dev_err(&pdev->dev, "Only MII and RMII mode supported\n"); return -EINVAL; } + ethmode = stmmac_get_phy_intf_sel(plat_dat->phy_interface); + regmap_update_bits(reg, LPC18XX_CREG_CREG6, LPC18XX_CREG_CREG6_ETHMODE_MASK, ethmode); From 4bad4219249f12dabb5c246f5cf9378f3693e226 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 6 Nov 2025 11:23:27 +0000 Subject: [PATCH 673/867] net: stmmac: lpc18xx: validate phy_intf_sel Validate the phy_intf_sel value rather than the PHY interface mode. This will allow us to transition to the ->set_phy_intf_sel() method. Note that this will allow GMII as well as MII as the phy_intf_sel value is the same for both. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vGy5P-0000000DhQJ-0Oi3@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c index 0f6be2a17e659..ec60968113b83 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c @@ -47,14 +47,13 @@ static int lpc18xx_dwmac_probe(struct platform_device *pdev) return PTR_ERR(reg); } - if (plat_dat->phy_interface != PHY_INTERFACE_MODE_MII && - plat_dat->phy_interface != PHY_INTERFACE_MODE_RMII) { + ethmode = stmmac_get_phy_intf_sel(plat_dat->phy_interface); + if (ethmode != PHY_INTF_SEL_GMII_MII && + ethmode != PHY_INTF_SEL_RMII) { dev_err(&pdev->dev, "Only MII and RMII mode supported\n"); return -EINVAL; } - ethmode = stmmac_get_phy_intf_sel(plat_dat->phy_interface); - regmap_update_bits(reg, LPC18XX_CREG_CREG6, LPC18XX_CREG_CREG6_ETHMODE_MASK, ethmode); From 7fe0e06a7364ff7ffb3b44fe1c9340824b45c1a8 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 6 Nov 2025 11:23:32 +0000 Subject: [PATCH 674/867] net: stmmac: lpc18xx: use ->set_phy_intf_sel() Move the configuration of the dwmac PHY interface selection to the new ->set_phy_intf_sel() method. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vGy5U-0000000DhQP-19Hd@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-lpc18xx.c | 36 +++++++++++-------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c index ec60968113b83..c68d7de1f8ac5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c @@ -23,12 +23,27 @@ #define LPC18XX_CREG_CREG6 0x12c # define LPC18XX_CREG_CREG6_ETHMODE_MASK GENMASK(2, 0) +static int lpc18xx_set_phy_intf_sel(void *bsp_priv, u8 phy_intf_sel) +{ + struct regmap *reg = bsp_priv; + + if (phy_intf_sel != PHY_INTF_SEL_GMII_MII && + phy_intf_sel != PHY_INTF_SEL_RMII) + return -EINVAL; + + regmap_update_bits(reg, LPC18XX_CREG_CREG6, + LPC18XX_CREG_CREG6_ETHMODE_MASK, + FIELD_PREP(LPC18XX_CREG_CREG6_ETHMODE_MASK, + phy_intf_sel)); + + return 0; +} + static int lpc18xx_dwmac_probe(struct platform_device *pdev) { struct plat_stmmacenet_data *plat_dat; struct stmmac_resources stmmac_res; - struct regmap *reg; - u8 ethmode; + struct regmap *regmap; int ret; ret = stmmac_get_platform_resources(pdev, &stmmac_res); @@ -41,21 +56,14 @@ static int lpc18xx_dwmac_probe(struct platform_device *pdev) plat_dat->core_type = DWMAC_CORE_GMAC; - reg = syscon_regmap_lookup_by_compatible("nxp,lpc1850-creg"); - if (IS_ERR(reg)) { + regmap = syscon_regmap_lookup_by_compatible("nxp,lpc1850-creg"); + if (IS_ERR(regmap)) { dev_err(&pdev->dev, "syscon lookup failed\n"); - return PTR_ERR(reg); - } - - ethmode = stmmac_get_phy_intf_sel(plat_dat->phy_interface); - if (ethmode != PHY_INTF_SEL_GMII_MII && - ethmode != PHY_INTF_SEL_RMII) { - dev_err(&pdev->dev, "Only MII and RMII mode supported\n"); - return -EINVAL; + return PTR_ERR(regmap); } - regmap_update_bits(reg, LPC18XX_CREG_CREG6, - LPC18XX_CREG_CREG6_ETHMODE_MASK, ethmode); + plat_dat->bsp_priv = regmap; + plat_dat->set_phy_intf_sel = lpc18xx_set_phy_intf_sel; return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); } From 9cd23c02ac57a4727095e1d1d62974e0bea445fb Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 6 Nov 2025 11:23:37 +0000 Subject: [PATCH 675/867] net: stmmac: sti: use PHY_INTF_SEL_x to select PHY interface Use the common dwmac definitions for the PHY interface selection field, adding MII_PHY_SEL_VAL() temporarily to avoid line wrapping. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vGy5Z-0000000DhQV-1e2l@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c index 53d5ce1f6dc69..1e8769a81d77f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c @@ -77,13 +77,15 @@ * 001-RGMII * 010-SGMII * 100-RMII + * These are the DW MAC phy_intf_sel values */ #define MII_PHY_SEL_MASK GENMASK(4, 2) -#define ETH_PHY_SEL_RMII BIT(4) -#define ETH_PHY_SEL_SGMII BIT(3) -#define ETH_PHY_SEL_RGMII BIT(2) -#define ETH_PHY_SEL_GMII 0x0 -#define ETH_PHY_SEL_MII 0x0 +#define MII_PHY_SEL_VAL(val) FIELD_PREP_CONST(MII_PHY_SEL_MASK, val) +#define ETH_PHY_SEL_RMII MII_PHY_SEL_VAL(PHY_INTF_SEL_RMII) +#define ETH_PHY_SEL_SGMII MII_PHY_SEL_VAL(PHY_INTF_SEL_SGMII) +#define ETH_PHY_SEL_RGMII MII_PHY_SEL_VAL(PHY_INTF_SEL_RGMII) +#define ETH_PHY_SEL_GMII MII_PHY_SEL_VAL(PHY_INTF_SEL_GMII_MII) +#define ETH_PHY_SEL_MII MII_PHY_SEL_VAL(PHY_INTF_SEL_GMII_MII) struct sti_dwmac { phy_interface_t interface; /* MII interface */ From bd5a68159259efc910a5b199b3cd1a27d47205fb Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 6 Nov 2025 11:23:42 +0000 Subject: [PATCH 676/867] net: stmmac: sti: use PHY_INTF_SEL_x directly Use the PHY_INTF_SEL_x values directly rather than the driver private ETH_PHY_SEL_x values. Move the FIELD_PREP() into sti_dwmac_set_mode(). Use dwmac->interface directly. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vGy5e-0000000DhQb-2B7I@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-sti.c | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c index 1e8769a81d77f..c97535824be0f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c @@ -81,11 +81,6 @@ */ #define MII_PHY_SEL_MASK GENMASK(4, 2) #define MII_PHY_SEL_VAL(val) FIELD_PREP_CONST(MII_PHY_SEL_MASK, val) -#define ETH_PHY_SEL_RMII MII_PHY_SEL_VAL(PHY_INTF_SEL_RMII) -#define ETH_PHY_SEL_SGMII MII_PHY_SEL_VAL(PHY_INTF_SEL_SGMII) -#define ETH_PHY_SEL_RGMII MII_PHY_SEL_VAL(PHY_INTF_SEL_RGMII) -#define ETH_PHY_SEL_GMII MII_PHY_SEL_VAL(PHY_INTF_SEL_GMII_MII) -#define ETH_PHY_SEL_MII MII_PHY_SEL_VAL(PHY_INTF_SEL_GMII_MII) struct sti_dwmac { phy_interface_t interface; /* MII interface */ @@ -104,13 +99,13 @@ struct sti_dwmac_of_data { void (*fix_retime_src)(void *priv, int speed, unsigned int mode); }; -static u32 phy_intf_sels[] = { - [PHY_INTERFACE_MODE_MII] = ETH_PHY_SEL_MII, - [PHY_INTERFACE_MODE_GMII] = ETH_PHY_SEL_GMII, - [PHY_INTERFACE_MODE_RGMII] = ETH_PHY_SEL_RGMII, - [PHY_INTERFACE_MODE_RGMII_ID] = ETH_PHY_SEL_RGMII, - [PHY_INTERFACE_MODE_SGMII] = ETH_PHY_SEL_SGMII, - [PHY_INTERFACE_MODE_RMII] = ETH_PHY_SEL_RMII, +static u8 phy_intf_sels[] = { + [PHY_INTERFACE_MODE_MII] = PHY_INTF_SEL_GMII_MII, + [PHY_INTERFACE_MODE_GMII] = PHY_INTF_SEL_GMII_MII, + [PHY_INTERFACE_MODE_RGMII] = PHY_INTF_SEL_RGMII, + [PHY_INTERFACE_MODE_RGMII_ID] = PHY_INTF_SEL_RGMII, + [PHY_INTERFACE_MODE_SGMII] = PHY_INTF_SEL_SGMII, + [PHY_INTERFACE_MODE_RMII] = PHY_INTF_SEL_RMII, }; enum { @@ -164,16 +159,18 @@ static void stih4xx_fix_retime_src(void *priv, int spd, unsigned int mode) static int sti_dwmac_set_mode(struct sti_dwmac *dwmac) { struct regmap *regmap = dwmac->regmap; - int iface = dwmac->interface; u32 reg = dwmac->ctrl_reg; + u8 phy_intf_sel; u32 val; if (dwmac->gmac_en) regmap_update_bits(regmap, reg, EN_MASK, EN); - regmap_update_bits(regmap, reg, MII_PHY_SEL_MASK, phy_intf_sels[iface]); + phy_intf_sel = phy_intf_sels[dwmac->interface]; + regmap_update_bits(regmap, reg, MII_PHY_SEL_MASK, + FIELD_PREP(MII_PHY_SEL_MASK, phy_intf_sel)); - val = (iface == PHY_INTERFACE_MODE_REVMII) ? 0 : ENMII; + val = (dwmac->interface == PHY_INTERFACE_MODE_REVMII) ? 0 : ENMII; regmap_update_bits(regmap, reg, ENMII_MASK, val); dwmac->fix_retime_src(dwmac, dwmac->speed, 0); From ef5e870be90ffc390ae9afc0b50d411d3538126c Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 6 Nov 2025 11:23:47 +0000 Subject: [PATCH 677/867] net: stmmac: sti: use stmmac_get_phy_intf_sel() Use stmmac_get_phy_intf_sel() to decode the PHY interface mode to the phy_intf_sel value, validate the result and use that to set the control register to select the operating mode for the DWMAC core. Note that when an unsupported interface mode is used, the array would decode this to PHY_INTF_SEL_GMII_MII, so preserve this behaviour. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vGy5j-0000000DhQh-2e0x@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-sti.c | 22 ++++++++----------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c index c97535824be0f..593e154b59576 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c @@ -77,10 +77,9 @@ * 001-RGMII * 010-SGMII * 100-RMII - * These are the DW MAC phy_intf_sel values + * These are the DW MAC phy_intf_sel values. */ #define MII_PHY_SEL_MASK GENMASK(4, 2) -#define MII_PHY_SEL_VAL(val) FIELD_PREP_CONST(MII_PHY_SEL_MASK, val) struct sti_dwmac { phy_interface_t interface; /* MII interface */ @@ -99,15 +98,6 @@ struct sti_dwmac_of_data { void (*fix_retime_src)(void *priv, int speed, unsigned int mode); }; -static u8 phy_intf_sels[] = { - [PHY_INTERFACE_MODE_MII] = PHY_INTF_SEL_GMII_MII, - [PHY_INTERFACE_MODE_GMII] = PHY_INTF_SEL_GMII_MII, - [PHY_INTERFACE_MODE_RGMII] = PHY_INTF_SEL_RGMII, - [PHY_INTERFACE_MODE_RGMII_ID] = PHY_INTF_SEL_RGMII, - [PHY_INTERFACE_MODE_SGMII] = PHY_INTF_SEL_SGMII, - [PHY_INTERFACE_MODE_RMII] = PHY_INTF_SEL_RMII, -}; - enum { TX_RETIME_SRC_NA = 0, TX_RETIME_SRC_TXCLK = 1, @@ -160,13 +150,19 @@ static int sti_dwmac_set_mode(struct sti_dwmac *dwmac) { struct regmap *regmap = dwmac->regmap; u32 reg = dwmac->ctrl_reg; - u8 phy_intf_sel; + int phy_intf_sel; u32 val; if (dwmac->gmac_en) regmap_update_bits(regmap, reg, EN_MASK, EN); - phy_intf_sel = phy_intf_sels[dwmac->interface]; + phy_intf_sel = stmmac_get_phy_intf_sel(dwmac->interface); + if (phy_intf_sel != PHY_INTF_SEL_GMII_MII && + phy_intf_sel != PHY_INTF_SEL_RGMII && + phy_intf_sel != PHY_INTF_SEL_SGMII && + phy_intf_sel != PHY_INTF_SEL_RMII) + phy_intf_sel = PHY_INTF_SEL_GMII_MII; + regmap_update_bits(regmap, reg, MII_PHY_SEL_MASK, FIELD_PREP(MII_PHY_SEL_MASK, phy_intf_sel)); From e3c8f25cf2aa3f2547da26e7a46424c016550d31 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 6 Nov 2025 11:23:52 +0000 Subject: [PATCH 678/867] net: stmmac: sti: use ->set_phy_intf_sel() Rather than placing the phy_intf_sel() setup in the ->init() method, move it to the new ->set_phy_intf_sel() method. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vGy5o-0000000DhQn-34JE@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-sti.c | 25 +++++++------------ 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c index 593e154b59576..b0509ab6b31ca 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c @@ -146,17 +146,18 @@ static void stih4xx_fix_retime_src(void *priv, int spd, unsigned int mode) stih4xx_tx_retime_val[src]); } -static int sti_dwmac_set_mode(struct sti_dwmac *dwmac) +static int sti_set_phy_intf_sel(void *bsp_priv, u8 phy_intf_sel) { - struct regmap *regmap = dwmac->regmap; - u32 reg = dwmac->ctrl_reg; - int phy_intf_sel; - u32 val; + struct sti_dwmac *dwmac = bsp_priv; + struct regmap *regmap; + u32 reg, val; + + regmap = dwmac->regmap; + reg = dwmac->ctrl_reg; if (dwmac->gmac_en) regmap_update_bits(regmap, reg, EN_MASK, EN); - phy_intf_sel = stmmac_get_phy_intf_sel(dwmac->interface); if (phy_intf_sel != PHY_INTF_SEL_GMII_MII && phy_intf_sel != PHY_INTF_SEL_RGMII && phy_intf_sel != PHY_INTF_SEL_SGMII && @@ -231,17 +232,8 @@ static int sti_dwmac_parse_data(struct sti_dwmac *dwmac, static int sti_dwmac_init(struct platform_device *pdev, void *bsp_priv) { struct sti_dwmac *dwmac = bsp_priv; - int ret; - - ret = clk_prepare_enable(dwmac->clk); - if (ret) - return ret; - - ret = sti_dwmac_set_mode(dwmac); - if (ret) - clk_disable_unprepare(dwmac->clk); - return ret; + return clk_prepare_enable(dwmac->clk); } static void sti_dwmac_exit(struct platform_device *pdev, void *bsp_priv) @@ -286,6 +278,7 @@ static int sti_dwmac_probe(struct platform_device *pdev) dwmac->fix_retime_src = data->fix_retime_src; plat_dat->bsp_priv = dwmac; + plat_dat->set_phy_intf_sel = sti_set_phy_intf_sel; plat_dat->fix_mac_speed = data->fix_retime_src; plat_dat->init = sti_dwmac_init; plat_dat->exit = sti_dwmac_exit; From 8989d328dfe7c7a3b9f4b9f0ef60006d277f81cc Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 5 Nov 2025 21:19:38 +0100 Subject: [PATCH 679/867] net: Helper to move packet data and metadata after skb_push/pull Lay groundwork for fixing BPF helpers available to TC(X) programs. When skb_push() or skb_pull() is called in a TC(X) ingress BPF program, the skb metadata must be kept in front of the MAC header. Otherwise, BPF programs using the __sk_buff->data_meta pseudo-pointer lose access to it. Introduce a helper that moves both metadata and a specified number of packet data bytes together, suitable as a drop-in replacement for memmove(). Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251105-skb-meta-rx-path-v4-1-5ceb08a9b37b@cloudflare.com --- include/linux/skbuff.h | 75 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a7cc3d1f4fd11..ff90281ddf90e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4564,6 +4564,81 @@ static inline void skb_metadata_clear(struct sk_buff *skb) skb_metadata_set(skb, 0); } +/** + * skb_data_move - Move packet data and metadata after skb_push() or skb_pull(). + * @skb: packet to operate on + * @len: number of bytes pushed or pulled from &sk_buff->data + * @n: number of bytes to memmove() from pre-push/pull &sk_buff->data + * + * Moves @n bytes of packet data, can be zero, and all bytes of skb metadata. + * + * Assumes metadata is located immediately before &sk_buff->data prior to the + * push/pull, and that sufficient headroom exists to hold it after an + * skb_push(). Otherwise, metadata is cleared and a one-time warning is issued. + * + * Prefer skb_postpull_data_move() or skb_postpush_data_move() to calling this + * helper directly. + */ +static inline void skb_data_move(struct sk_buff *skb, const int len, + const unsigned int n) +{ + const u8 meta_len = skb_metadata_len(skb); + u8 *meta, *meta_end; + + if (!len || (!n && !meta_len)) + return; + + if (!meta_len) + goto no_metadata; + + meta_end = skb_metadata_end(skb); + meta = meta_end - meta_len; + + if (WARN_ON_ONCE(meta_end + len != skb->data || + meta_len > skb_headroom(skb))) { + skb_metadata_clear(skb); + goto no_metadata; + } + + memmove(meta + len, meta, meta_len + n); + return; + +no_metadata: + memmove(skb->data, skb->data - len, n); +} + +/** + * skb_postpull_data_move - Move packet data and metadata after skb_pull(). + * @skb: packet to operate on + * @len: number of bytes pulled from &sk_buff->data + * @n: number of bytes to memmove() from pre-pull &sk_buff->data + * + * See skb_data_move() for details. + */ +static inline void skb_postpull_data_move(struct sk_buff *skb, + const unsigned int len, + const unsigned int n) +{ + DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + skb_data_move(skb, len, n); +} + +/** + * skb_postpush_data_move - Move packet data and metadata after skb_push(). + * @skb: packet to operate on + * @len: number of bytes pushed onto &sk_buff->data + * @n: number of bytes to memmove() from pre-push &sk_buff->data + * + * See skb_data_move() for details. + */ +static inline void skb_postpush_data_move(struct sk_buff *skb, + const unsigned int len, + const unsigned int n) +{ + DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + skb_data_move(skb, -len, n); +} + struct sk_buff *skb_clone_sk(struct sk_buff *skb); #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING From 290fc0be09e2f6e08754782616bb7e67f8191fa1 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 5 Nov 2025 21:19:39 +0100 Subject: [PATCH 680/867] net: Preserve metadata on pskb_expand_head pskb_expand_head() copies headroom, including skb metadata, into the newly allocated head, but then clears the metadata. As a result, metadata is lost when BPF helpers trigger an skb head reallocation. Let the skb metadata remain in the newly created copy of head. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251105-skb-meta-rx-path-v4-2-5ceb08a9b37b@cloudflare.com --- net/core/skbuff.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7ac5f8aa1235a..d95658b738d19 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2234,6 +2234,10 @@ EXPORT_SYMBOL(__pskb_copy_fclone); * * All the pointers pointing into skb header may change and must be * reloaded after call to this function. + * + * Note: If you skb_push() the start of the buffer after reallocating the + * header, call skb_postpush_data_move() first to move the metadata out of + * the way before writing to &sk_buff->data. */ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, @@ -2305,8 +2309,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->nohdr = 0; atomic_set(&skb_shinfo(skb)->dataref, 1); - skb_metadata_clear(skb); - /* It is not generally safe to change skb->truesize. * For the moment, we really care of rx path, or * when skb is orphaned (not attached to a socket). From f38499ff45f567c932d0911e6a30b8ca022b9b52 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 5 Nov 2025 21:19:40 +0100 Subject: [PATCH 681/867] bpf: Unclone skb head on bpf_dynptr_write to skb metadata Currently bpf_dynptr_from_skb_meta() marks the dynptr as read-only when the skb is cloned, preventing writes to metadata. Remove this restriction and unclone the skb head on bpf_dynptr_write() to metadata, now that the metadata is preserved during uncloning. This makes metadata dynptr consistent with skb dynptr, allowing writes regardless of whether the skb is cloned. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251105-skb-meta-rx-path-v4-3-5ceb08a9b37b@cloudflare.com --- include/linux/filter.h | 9 +++++++++ kernel/bpf/helpers.c | 6 ++---- net/core/filter.c | 18 ++++++++++++------ 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index e116de7edc587..a104b39942305 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1781,6 +1781,8 @@ int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len); void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len); void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *buf, unsigned long len, bool flush); +int __bpf_skb_meta_store_bytes(struct sk_buff *skb, u32 offset, + const void *from, u32 len, u64 flags); void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset); #else /* CONFIG_NET */ static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, @@ -1817,6 +1819,13 @@ static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, voi { } +static inline int __bpf_skb_meta_store_bytes(struct sk_buff *skb, u32 offset, + const void *from, u32 len, + u64 flags) +{ + return -EOPNOTSUPP; +} + static inline void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset) { return ERR_PTR(-EOPNOTSUPP); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index eb25e70e0bdc0..3e830fd31f5ff 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1842,10 +1842,8 @@ int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src, return -EINVAL; return __bpf_xdp_store_bytes(dst->data, dst->offset + offset, src, len); case BPF_DYNPTR_TYPE_SKB_META: - if (flags) - return -EINVAL; - memmove(bpf_skb_meta_pointer(dst->data, dst->offset + offset), src, len); - return 0; + return __bpf_skb_meta_store_bytes(dst->data, dst->offset + offset, src, + len, flags); default: WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type); return -EFAULT; diff --git a/net/core/filter.c b/net/core/filter.c index 52721efba332f..673299fd3d589 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -12102,6 +12102,18 @@ void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset) return skb_metadata_end(skb) - skb_metadata_len(skb) + offset; } +int __bpf_skb_meta_store_bytes(struct sk_buff *skb, u32 offset, + const void *from, u32 len, u64 flags) +{ + if (unlikely(flags)) + return -EINVAL; + if (unlikely(bpf_try_make_writable(skb, 0))) + return -EFAULT; + + memmove(bpf_skb_meta_pointer(skb, offset), from, len); + return 0; +} + __bpf_kfunc_start_defs(); __bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags, struct bpf_dynptr *ptr__uninit) @@ -12129,9 +12141,6 @@ __bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags, * XDP context with bpf_xdp_adjust_meta(). Serves as an alternative to * &__sk_buff->data_meta. * - * If passed @skb_ is a clone which shares the data with the original, the - * dynptr will be read-only. This limitation may be lifted in the future. - * * Return: * * %0 - dynptr ready to use * * %-EINVAL - invalid flags, dynptr set to null @@ -12149,9 +12158,6 @@ __bpf_kfunc int bpf_dynptr_from_skb_meta(struct __sk_buff *skb_, u64 flags, bpf_dynptr_init(ptr, skb, BPF_DYNPTR_TYPE_SKB_META, 0, skb_metadata_len(skb)); - if (skb_cloned(skb)) - bpf_dynptr_set_rdonly(ptr); - return 0; } From b85be58e2f7cff47f7477ae61022644a198ee592 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 5 Nov 2025 21:19:41 +0100 Subject: [PATCH 682/867] vlan: Make vlan_remove_tag return nothing All callers ignore the return value. Prepare to reorder memmove() after skb_pull() which is a common pattern. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251105-skb-meta-rx-path-v4-4-5ceb08a9b37b@cloudflare.com --- include/linux/if_vlan.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 15e01935d3fad..afa5cc61a0fa9 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -731,10 +731,8 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, * * Expects the skb to contain a VLAN tag in the payload, and to have skb->data * pointing at the MAC header. - * - * Returns: a new pointer to skb->data, or NULL on failure to pull. */ -static inline void *vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci) +static inline void vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci) { struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN); @@ -742,7 +740,7 @@ static inline void *vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci) memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN); vlan_set_encap_proto(skb, vhdr); - return __skb_pull(skb, VLAN_HLEN); + __skb_pull(skb, VLAN_HLEN); } /** From efd35c26239bed39193201e958d65e695231ccda Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 5 Nov 2025 21:19:42 +0100 Subject: [PATCH 683/867] bpf: Make bpf_skb_vlan_pop helper metadata-safe Use the metadata-aware helper to move packet bytes after skb_pull(), ensuring metadata remains valid after calling the BPF helper. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251105-skb-meta-rx-path-v4-5-5ceb08a9b37b@cloudflare.com --- include/linux/if_vlan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index afa5cc61a0fa9..4ecc2509b0d43 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -738,9 +738,9 @@ static inline void vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci) *vlan_tci = ntohs(vhdr->h_vlan_TCI); - memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN); vlan_set_encap_proto(skb, vhdr); __skb_pull(skb, VLAN_HLEN); + skb_postpull_data_move(skb, VLAN_HLEN, 2 * ETH_ALEN); } /** From 55ffc98b44d28e13a218306666d16f2c7236d0ae Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 5 Nov 2025 21:19:43 +0100 Subject: [PATCH 684/867] bpf: Make bpf_skb_vlan_push helper metadata-safe Use the metadata-aware helper to move packet bytes after skb_push(), ensuring metadata remains valid after calling the BPF helper. Also, take care to reserve sufficient headroom for metadata to fit. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251105-skb-meta-rx-path-v4-6-5ceb08a9b37b@cloudflare.com --- include/linux/if_vlan.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 4ecc2509b0d43..f7f34eb15e068 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -355,16 +355,17 @@ static inline int __vlan_insert_inner_tag(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci, unsigned int mac_len) { + const u8 meta_len = mac_len > ETH_TLEN ? skb_metadata_len(skb) : 0; struct vlan_ethhdr *veth; - if (skb_cow_head(skb, VLAN_HLEN) < 0) + if (skb_cow_head(skb, meta_len + VLAN_HLEN) < 0) return -ENOMEM; skb_push(skb, VLAN_HLEN); /* Move the mac header sans proto to the beginning of the new header. */ if (likely(mac_len > ETH_TLEN)) - memmove(skb->data, skb->data + VLAN_HLEN, mac_len - ETH_TLEN); + skb_postpush_data_move(skb, VLAN_HLEN, mac_len - ETH_TLEN); if (skb_mac_header_was_set(skb)) skb->mac_header -= VLAN_HLEN; From be83105d38ab99737ac0488600efd59ddd57b74c Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 5 Nov 2025 21:19:44 +0100 Subject: [PATCH 685/867] bpf: Make bpf_skb_adjust_room metadata-safe bpf_skb_adjust_room() may push or pull bytes from skb->data. In both cases, skb metadata must be moved accordingly to stay accessible. Replace existing memmove() calls, which only move payload, with a helper that also handles metadata. Reserve enough space for metadata to fit after skb_push. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251105-skb-meta-rx-path-v4-7-5ceb08a9b37b@cloudflare.com --- net/core/filter.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 673299fd3d589..297f0ae9dc1fe 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3253,11 +3253,11 @@ static void bpf_skb_change_protocol(struct sk_buff *skb, u16 proto) static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) { - /* Caller already did skb_cow() with len as headroom, + /* Caller already did skb_cow() with meta_len+len as headroom, * so no need to do it here. */ skb_push(skb, len); - memmove(skb->data, skb->data + len, off); + skb_postpush_data_move(skb, len, off); memset(skb->data + off, 0, len); /* No skb_postpush_rcsum(skb, skb->data + off, len) @@ -3281,7 +3281,7 @@ static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len) old_data = skb->data; __skb_pull(skb, len); skb_postpull_rcsum(skb, old_data + off, len); - memmove(skb->data, old_data, off); + skb_postpull_data_move(skb, len, off); return 0; } @@ -3489,6 +3489,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, u8 inner_mac_len = flags >> BPF_ADJ_ROOM_ENCAP_L2_SHIFT; bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK; u16 mac_len = 0, inner_net = 0, inner_trans = 0; + const u8 meta_len = skb_metadata_len(skb); unsigned int gso_type = SKB_GSO_DODGY; int ret; @@ -3499,7 +3500,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, return -ENOTSUPP; } - ret = skb_cow_head(skb, len_diff); + ret = skb_cow_head(skb, meta_len + len_diff); if (unlikely(ret < 0)) return ret; From 8cfc172ce28e6559d4d2d1a96df77f0f2d6179d6 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 5 Nov 2025 21:19:45 +0100 Subject: [PATCH 686/867] bpf: Make bpf_skb_change_proto helper metadata-safe bpf_skb_change_proto reuses the same headroom operations as bpf_skb_adjust_room, already updated to handle metadata safely. The remaining step is to ensure that there is sufficient headroom to accommodate metadata on skb_push(). Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251105-skb-meta-rx-path-v4-8-5ceb08a9b37b@cloudflare.com --- net/core/filter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index 297f0ae9dc1fe..50775c01c4567 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3326,10 +3326,11 @@ static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len) static int bpf_skb_proto_4_to_6(struct sk_buff *skb) { const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr); + const u8 meta_len = skb_metadata_len(skb); u32 off = skb_mac_header_len(skb); int ret; - ret = skb_cow(skb, len_diff); + ret = skb_cow(skb, meta_len + len_diff); if (unlikely(ret < 0)) return ret; From fb206fc3129bc9d4749905d4870ba05dc89126d2 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 5 Nov 2025 21:19:46 +0100 Subject: [PATCH 687/867] bpf: Make bpf_skb_change_head helper metadata-safe Although bpf_skb_change_head() doesn't move packet data after skb_push(), skb metadata still needs to be relocated. Use the dedicated helper to handle it. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251105-skb-meta-rx-path-v4-9-5ceb08a9b37b@cloudflare.com --- net/core/filter.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index 50775c01c4567..4124becf86047 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3875,6 +3875,7 @@ static const struct bpf_func_proto sk_skb_change_tail_proto = { static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room, u64 flags) { + const u8 meta_len = skb_metadata_len(skb); u32 max_len = BPF_SKB_MAX_LEN; u32 new_len = skb->len + head_room; int ret; @@ -3884,7 +3885,7 @@ static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room, new_len < skb->len)) return -EINVAL; - ret = skb_cow(skb, head_room); + ret = skb_cow(skb, meta_len + head_room); if (likely(!ret)) { /* Idea for this helper is that we currently only * allow to expand on mac header. This means that @@ -3896,6 +3897,7 @@ static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room, * for redirection into L2 device. */ __skb_push(skb, head_room); + skb_postpush_data_move(skb, head_room, 0); memset(skb->data, 0, head_room); skb_reset_mac_header(skb); skb_reset_mac_len(skb); From 967534e57c4439ba43b31f4af4cb85e84c86e6b7 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 5 Nov 2025 21:19:47 +0100 Subject: [PATCH 688/867] selftests/bpf: Verify skb metadata in BPF instead of userspace Move metadata verification into the BPF TC programs. Previously, userspace read metadata from a map and verified it once at test end. Now TC programs compare metadata directly using __builtin_memcmp() and set a test_pass flag. This enables verification at multiple points during test execution rather than a single final check. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251105-skb-meta-rx-path-v4-10-5ceb08a9b37b@cloudflare.com --- .../bpf/prog_tests/xdp_context_test_run.c | 52 +++-------- .../selftests/bpf/progs/test_xdp_meta.c | 88 +++++++++---------- 2 files changed, 57 insertions(+), 83 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 178292d1251a1..93a1fbe6a4fdb 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -171,33 +171,6 @@ static int write_test_packet(int tap_fd) return 0; } -static void assert_test_result(const struct bpf_map *result_map) -{ - int err; - __u32 map_key = 0; - __u8 map_value[TEST_PAYLOAD_LEN]; - - err = bpf_map__lookup_elem(result_map, &map_key, sizeof(map_key), - &map_value, TEST_PAYLOAD_LEN, BPF_ANY); - if (!ASSERT_OK(err, "lookup test_result")) - return; - - ASSERT_MEMEQ(&map_value, &test_payload, TEST_PAYLOAD_LEN, - "test_result map contains test payload"); -} - -static bool clear_test_result(struct bpf_map *result_map) -{ - const __u8 v[sizeof(test_payload)] = {}; - const __u32 k = 0; - int err; - - err = bpf_map__update_elem(result_map, &k, sizeof(k), v, sizeof(v), BPF_ANY); - ASSERT_OK(err, "update test_result"); - - return err == 0; -} - void test_xdp_context_veth(void) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); @@ -270,11 +243,13 @@ void test_xdp_context_veth(void) if (!ASSERT_GE(tx_ifindex, 0, "if_nametoindex tx")) goto close; + skel->bss->test_pass = false; + ret = send_test_packet(tx_ifindex); if (!ASSERT_OK(ret, "send_test_packet")) goto close; - assert_test_result(skel->maps.test_result); + ASSERT_TRUE(skel->bss->test_pass, "test_pass"); close: close_netns(nstoken); @@ -286,7 +261,7 @@ void test_xdp_context_veth(void) static void test_tuntap(struct bpf_program *xdp_prog, struct bpf_program *tc_prio_1_prog, struct bpf_program *tc_prio_2_prog, - struct bpf_map *result_map) + bool *test_pass) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); @@ -295,8 +270,7 @@ static void test_tuntap(struct bpf_program *xdp_prog, int tap_ifindex; int ret; - if (!clear_test_result(result_map)) - return; + *test_pass = false; ns = netns_new(TAP_NETNS, true); if (!ASSERT_OK_PTR(ns, "create and open ns")) @@ -340,7 +314,7 @@ static void test_tuntap(struct bpf_program *xdp_prog, if (!ASSERT_OK(ret, "write_test_packet")) goto close; - assert_test_result(result_map); + ASSERT_TRUE(*test_pass, "test_pass"); close: if (tap_fd >= 0) @@ -431,37 +405,37 @@ void test_xdp_context_tuntap(void) test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls, NULL, /* tc prio 2 */ - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_read")) test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_read, NULL, /* tc prio 2 */ - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_slice")) test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_slice, NULL, /* tc prio 2 */ - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_write")) test_tuntap(skel->progs.ing_xdp_zalloc_meta, skel->progs.ing_cls_dynptr_write, skel->progs.ing_cls_dynptr_read, - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_slice_rdwr")) test_tuntap(skel->progs.ing_xdp_zalloc_meta, skel->progs.ing_cls_dynptr_slice_rdwr, skel->progs.ing_cls_dynptr_slice, - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_offset")) test_tuntap(skel->progs.ing_xdp_zalloc_meta, skel->progs.ing_cls_dynptr_offset_wr, skel->progs.ing_cls_dynptr_offset_rd, - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_offset_oob")) test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_offset_oob, skel->progs.ing_cls, - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("clone_data_meta_empty_on_data_write")) test_tuntap_mirred(skel->progs.ing_xdp, skel->progs.clone_data_meta_empty_on_data_write, diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index d79cb74b571e7..11288b20f56c7 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -11,37 +11,36 @@ #define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem -/* Demonstrates how metadata can be passed from an XDP program to a TC program - * using bpf_xdp_adjust_meta. - * For the sake of testing the metadata support in drivers, the XDP program uses - * a fixed-size payload after the Ethernet header as metadata. The TC program - * copies the metadata it receives into a map so it can be checked from - * userspace. +/* Demonstrate passing metadata from XDP to TC using bpf_xdp_adjust_meta. + * + * The XDP program extracts a fixed-size payload following the Ethernet header + * and stores it as packet metadata to test the driver's metadata support. The + * TC program then verifies if the passed metadata is correct. */ -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 1); - __type(key, __u32); - __uint(value_size, META_SIZE); -} test_result SEC(".maps"); - bool test_pass; +static const __u8 meta_want[META_SIZE] = { + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, + 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, + 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, +}; + SEC("tc") int ing_cls(struct __sk_buff *ctx) { - __u8 *data, *data_meta; - __u32 key = 0; - - data_meta = ctx_ptr(ctx, data_meta); - data = ctx_ptr(ctx, data); + __u8 *meta_have = ctx_ptr(ctx, data_meta); + __u8 *data = ctx_ptr(ctx, data); - if (data_meta + META_SIZE > data) - return TC_ACT_SHOT; + if (meta_have + META_SIZE > data) + goto out; - bpf_map_update_elem(&test_result, &key, data_meta, BPF_ANY); + if (__builtin_memcmp(meta_want, meta_have, META_SIZE)) + goto out; + test_pass = true; +out: return TC_ACT_SHOT; } @@ -49,17 +48,17 @@ int ing_cls(struct __sk_buff *ctx) SEC("tc") int ing_cls_dynptr_read(struct __sk_buff *ctx) { + __u8 meta_have[META_SIZE]; struct bpf_dynptr meta; - const __u32 zero = 0; - __u8 *dst; - - dst = bpf_map_lookup_elem(&test_result, &zero); - if (!dst) - return TC_ACT_SHOT; bpf_dynptr_from_skb_meta(ctx, 0, &meta); - bpf_dynptr_read(dst, META_SIZE, &meta, 0, 0); + bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + + if (__builtin_memcmp(meta_want, meta_have, META_SIZE)) + goto out; + test_pass = true; +out: return TC_ACT_SHOT; } @@ -86,20 +85,18 @@ SEC("tc") int ing_cls_dynptr_slice(struct __sk_buff *ctx) { struct bpf_dynptr meta; - const __u32 zero = 0; - __u8 *dst, *src; - - dst = bpf_map_lookup_elem(&test_result, &zero); - if (!dst) - return TC_ACT_SHOT; + __u8 *meta_have; bpf_dynptr_from_skb_meta(ctx, 0, &meta); - src = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE); - if (!src) - return TC_ACT_SHOT; + meta_have = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE); + if (!meta_have) + goto out; - __builtin_memcpy(dst, src, META_SIZE); + if (__builtin_memcmp(meta_want, meta_have, META_SIZE)) + goto out; + test_pass = true; +out: return TC_ACT_SHOT; } @@ -129,14 +126,12 @@ int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx) SEC("tc") int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx) { - struct bpf_dynptr meta; const __u32 chunk_len = META_SIZE / 4; - const __u32 zero = 0; + __u8 meta_have[META_SIZE]; + struct bpf_dynptr meta; __u8 *dst, *src; - dst = bpf_map_lookup_elem(&test_result, &zero); - if (!dst) - return TC_ACT_SHOT; + dst = meta_have; /* 1. Regular read */ bpf_dynptr_from_skb_meta(ctx, 0, &meta); @@ -155,9 +150,14 @@ int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx) /* 4. Read from a slice starting at an offset */ src = bpf_dynptr_slice(&meta, 2 * chunk_len, NULL, chunk_len); if (!src) - return TC_ACT_SHOT; + goto out; __builtin_memcpy(dst, src, chunk_len); + if (__builtin_memcmp(meta_want, meta_have, META_SIZE)) + goto out; + + test_pass = true; +out: return TC_ACT_SHOT; } From 9ef9ac15a527739135548b87053f4646099e4bd6 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 5 Nov 2025 21:19:48 +0100 Subject: [PATCH 689/867] selftests/bpf: Dump skb metadata on verification failure Add diagnostic output when metadata verification fails to help with troubleshooting test failures. Introduce a check_metadata() helper that prints both expected and received metadata to the BPF program's stderr stream on mismatch. The userspace test reads and dumps this stream on failure. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251105-skb-meta-rx-path-v4-11-5ceb08a9b37b@cloudflare.com --- .../bpf/prog_tests/xdp_context_test_run.c | 24 +++++++++++++++--- .../selftests/bpf/progs/test_xdp_meta.c | 25 ++++++++++++++++--- 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 93a1fbe6a4fdb..db3027564261e 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -171,6 +171,21 @@ static int write_test_packet(int tap_fd) return 0; } +static void dump_err_stream(const struct bpf_program *prog) +{ + char buf[512]; + int ret; + + ret = 0; + do { + ret = bpf_prog_stream_read(bpf_program__fd(prog), + BPF_STREAM_STDERR, buf, sizeof(buf), + NULL); + if (ret > 0) + fwrite(buf, sizeof(buf[0]), ret, stderr); + } while (ret > 0); +} + void test_xdp_context_veth(void) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); @@ -249,7 +264,8 @@ void test_xdp_context_veth(void) if (!ASSERT_OK(ret, "send_test_packet")) goto close; - ASSERT_TRUE(skel->bss->test_pass, "test_pass"); + if (!ASSERT_TRUE(skel->bss->test_pass, "test_pass")) + dump_err_stream(tc_prog); close: close_netns(nstoken); @@ -314,7 +330,8 @@ static void test_tuntap(struct bpf_program *xdp_prog, if (!ASSERT_OK(ret, "write_test_packet")) goto close; - ASSERT_TRUE(*test_pass, "test_pass"); + if (!ASSERT_TRUE(*test_pass, "test_pass")) + dump_err_stream(tc_prio_2_prog ? : tc_prio_1_prog); close: if (tap_fd >= 0) @@ -385,7 +402,8 @@ static void test_tuntap_mirred(struct bpf_program *xdp_prog, if (!ASSERT_OK(ret, "write_test_packet")) goto close; - ASSERT_TRUE(*test_pass, "test_pass"); + if (!ASSERT_TRUE(*test_pass, "test_pass")) + dump_err_stream(tc_prog); close: if (tap_fd >= 0) diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index 11288b20f56c7..3b137c4eed6cb 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -27,6 +27,23 @@ static const __u8 meta_want[META_SIZE] = { 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, }; +static bool check_metadata(const char *file, int line, __u8 *meta_have) +{ + if (!__builtin_memcmp(meta_have, meta_want, META_SIZE)) + return true; + + bpf_stream_printk(BPF_STREAM_STDERR, + "FAIL:%s:%d: metadata mismatch\n" + " have:\n %pI6\n %pI6\n" + " want:\n %pI6\n %pI6\n", + file, line, + &meta_have[0x00], &meta_have[0x10], + &meta_want[0x00], &meta_want[0x10]); + return false; +} + +#define check_metadata(meta_have) check_metadata(__FILE__, __LINE__, meta_have) + SEC("tc") int ing_cls(struct __sk_buff *ctx) { @@ -36,7 +53,7 @@ int ing_cls(struct __sk_buff *ctx) if (meta_have + META_SIZE > data) goto out; - if (__builtin_memcmp(meta_want, meta_have, META_SIZE)) + if (!check_metadata(meta_have)) goto out; test_pass = true; @@ -54,7 +71,7 @@ int ing_cls_dynptr_read(struct __sk_buff *ctx) bpf_dynptr_from_skb_meta(ctx, 0, &meta); bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); - if (__builtin_memcmp(meta_want, meta_have, META_SIZE)) + if (!check_metadata(meta_have)) goto out; test_pass = true; @@ -92,7 +109,7 @@ int ing_cls_dynptr_slice(struct __sk_buff *ctx) if (!meta_have) goto out; - if (__builtin_memcmp(meta_want, meta_have, META_SIZE)) + if (!check_metadata(meta_have)) goto out; test_pass = true; @@ -153,7 +170,7 @@ int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx) goto out; __builtin_memcpy(dst, src, chunk_len); - if (__builtin_memcmp(meta_want, meta_have, META_SIZE)) + if (!check_metadata(meta_have)) goto out; test_pass = true; From 1e1357fde808a35c6069759298660134e5dab053 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 5 Nov 2025 21:19:49 +0100 Subject: [PATCH 690/867] selftests/bpf: Expect unclone to preserve skb metadata Since pskb_expand_head() no longer clears metadata on unclone, update tests for cloned packets to expect metadata to remain intact. Also simplify the clone_dynptr_kept_on_{data,meta}_slice_write tests. Creating an r/w dynptr slice is sufficient to trigger an unclone in the prologue, so remove the extraneous writes to the data/meta slice. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251105-skb-meta-rx-path-v4-12-5ceb08a9b37b@cloudflare.com --- .../bpf/prog_tests/xdp_context_test_run.c | 24 ++-- .../selftests/bpf/progs/test_xdp_meta.c | 118 ++++++++++-------- 2 files changed, 79 insertions(+), 63 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index db3027564261e..a129c3057202b 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -454,29 +454,29 @@ void test_xdp_context_tuntap(void) skel->progs.ing_cls_dynptr_offset_oob, skel->progs.ing_cls, &skel->bss->test_pass); - if (test__start_subtest("clone_data_meta_empty_on_data_write")) + if (test__start_subtest("clone_data_meta_survives_data_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_data_meta_empty_on_data_write, + skel->progs.clone_data_meta_survives_data_write, &skel->bss->test_pass); - if (test__start_subtest("clone_data_meta_empty_on_meta_write")) + if (test__start_subtest("clone_data_meta_survives_meta_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_data_meta_empty_on_meta_write, + skel->progs.clone_data_meta_survives_meta_write, &skel->bss->test_pass); - if (test__start_subtest("clone_dynptr_empty_on_data_slice_write")) + if (test__start_subtest("clone_meta_dynptr_survives_data_slice_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_dynptr_empty_on_data_slice_write, + skel->progs.clone_meta_dynptr_survives_data_slice_write, &skel->bss->test_pass); - if (test__start_subtest("clone_dynptr_empty_on_meta_slice_write")) + if (test__start_subtest("clone_meta_dynptr_survives_meta_slice_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_dynptr_empty_on_meta_slice_write, + skel->progs.clone_meta_dynptr_survives_meta_slice_write, &skel->bss->test_pass); - if (test__start_subtest("clone_dynptr_rdonly_before_data_dynptr_write")) + if (test__start_subtest("clone_meta_dynptr_rw_before_data_dynptr_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_dynptr_rdonly_before_data_dynptr_write, + skel->progs.clone_meta_dynptr_rw_before_data_dynptr_write, &skel->bss->test_pass); - if (test__start_subtest("clone_dynptr_rdonly_before_meta_dynptr_write")) + if (test__start_subtest("clone_meta_dynptr_rw_before_meta_dynptr_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_dynptr_rdonly_before_meta_dynptr_write, + skel->progs.clone_meta_dynptr_rw_before_meta_dynptr_write, &skel->bss->test_pass); test_xdp_meta__destroy(skel); diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index 3b137c4eed6cb..a70de55c6997f 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -321,12 +321,13 @@ int ing_xdp(struct xdp_md *ctx) } /* - * Check that skb->data_meta..skb->data is empty if prog writes to packet - * _payload_ using packet pointers. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, skb->data_meta..skb->data is + * kept intact if prog writes to packet _payload_ using packet pointers. */ SEC("tc") -int clone_data_meta_empty_on_data_write(struct __sk_buff *ctx) +int clone_data_meta_survives_data_write(struct __sk_buff *ctx) { + __u8 *meta_have = ctx_ptr(ctx, data_meta); struct ethhdr *eth = ctx_ptr(ctx, data); if (eth + 1 > ctx_ptr(ctx, data_end)) @@ -335,8 +336,10 @@ int clone_data_meta_empty_on_data_write(struct __sk_buff *ctx) if (eth->h_proto != 0) goto out; - /* Expect no metadata */ - if (ctx->data_meta != ctx->data) + if (meta_have + META_SIZE > eth) + goto out; + + if (!check_metadata(meta_have)) goto out; /* Packet write to trigger unclone in prologue */ @@ -348,14 +351,14 @@ int clone_data_meta_empty_on_data_write(struct __sk_buff *ctx) } /* - * Check that skb->data_meta..skb->data is empty if prog writes to packet - * _metadata_ using packet pointers. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, skb->data_meta..skb->data is + * kept intact if prog writes to packet _metadata_ using packet pointers. */ SEC("tc") -int clone_data_meta_empty_on_meta_write(struct __sk_buff *ctx) +int clone_data_meta_survives_meta_write(struct __sk_buff *ctx) { + __u8 *meta_have = ctx_ptr(ctx, data_meta); struct ethhdr *eth = ctx_ptr(ctx, data); - __u8 *md = ctx_ptr(ctx, data_meta); if (eth + 1 > ctx_ptr(ctx, data_end)) goto out; @@ -363,25 +366,29 @@ int clone_data_meta_empty_on_meta_write(struct __sk_buff *ctx) if (eth->h_proto != 0) goto out; - if (md + 1 > ctx_ptr(ctx, data)) { - /* Expect no metadata */ - test_pass = true; - } else { - /* Metadata write to trigger unclone in prologue */ - *md = 42; - } + if (meta_have + META_SIZE > eth) + goto out; + + if (!check_metadata(meta_have)) + goto out; + + /* Metadata write to trigger unclone in prologue */ + *meta_have = 42; + + test_pass = true; out: return TC_ACT_SHOT; } /* - * Check that skb_meta dynptr is writable but empty if prog writes to packet - * _payload_ using a dynptr slice. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, metadata remains intact if + * prog creates a r/w slice to packet _payload_. */ SEC("tc") -int clone_dynptr_empty_on_data_slice_write(struct __sk_buff *ctx) +int clone_meta_dynptr_survives_data_slice_write(struct __sk_buff *ctx) { struct bpf_dynptr data, meta; + __u8 meta_have[META_SIZE]; struct ethhdr *eth; bpf_dynptr_from_skb(ctx, 0, &data); @@ -392,29 +399,26 @@ int clone_dynptr_empty_on_data_slice_write(struct __sk_buff *ctx) if (eth->h_proto != 0) goto out; - /* Expect no metadata */ bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0) + bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + if (!check_metadata(meta_have)) goto out; - /* Packet write to trigger unclone in prologue */ - eth->h_proto = 42; - test_pass = true; out: return TC_ACT_SHOT; } /* - * Check that skb_meta dynptr is writable but empty if prog writes to packet - * _metadata_ using a dynptr slice. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, metadata remains intact if + * prog creates an r/w slice to packet _metadata_. */ SEC("tc") -int clone_dynptr_empty_on_meta_slice_write(struct __sk_buff *ctx) +int clone_meta_dynptr_survives_meta_slice_write(struct __sk_buff *ctx) { struct bpf_dynptr data, meta; const struct ethhdr *eth; - __u8 *md; + __u8 *meta_have; bpf_dynptr_from_skb(ctx, 0, &data); eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); @@ -424,16 +428,13 @@ int clone_dynptr_empty_on_meta_slice_write(struct __sk_buff *ctx) if (eth->h_proto != 0) goto out; - /* Expect no metadata */ bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0) + meta_have = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE); + if (!meta_have) goto out; - /* Metadata write to trigger unclone in prologue */ - bpf_dynptr_from_skb_meta(ctx, 0, &meta); - md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); - if (md) - *md = 42; + if (!check_metadata(meta_have)) + goto out; test_pass = true; out: @@ -441,14 +442,17 @@ int clone_dynptr_empty_on_meta_slice_write(struct __sk_buff *ctx) } /* - * Check that skb_meta dynptr is read-only before prog writes to packet payload - * using dynptr_write helper. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, skb_meta dynptr is read-write + * before prog writes to packet _payload_ using dynptr_write helper and metadata + * remains intact before and after the write. */ SEC("tc") -int clone_dynptr_rdonly_before_data_dynptr_write(struct __sk_buff *ctx) +int clone_meta_dynptr_rw_before_data_dynptr_write(struct __sk_buff *ctx) { struct bpf_dynptr data, meta; + __u8 meta_have[META_SIZE]; const struct ethhdr *eth; + int err; bpf_dynptr_from_skb(ctx, 0, &data); eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); @@ -458,17 +462,20 @@ int clone_dynptr_rdonly_before_data_dynptr_write(struct __sk_buff *ctx) if (eth->h_proto != 0) goto out; - /* Expect read-only metadata before unclone */ + /* Expect read-write metadata before unclone */ bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE) + if (bpf_dynptr_is_rdonly(&meta)) + goto out; + + err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + if (err || !check_metadata(meta_have)) goto out; /* Helper write to payload will unclone the packet */ bpf_dynptr_write(&data, offsetof(struct ethhdr, h_proto), "x", 1, 0); - /* Expect no metadata after unclone */ - bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != 0) + err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + if (err || !check_metadata(meta_have)) goto out; test_pass = true; @@ -477,14 +484,17 @@ int clone_dynptr_rdonly_before_data_dynptr_write(struct __sk_buff *ctx) } /* - * Check that skb_meta dynptr is read-only if prog writes to packet - * metadata using dynptr_write helper. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, skb_meta dynptr is read-write + * before prog writes to packet _metadata_ using dynptr_write helper and + * metadata remains intact before and after the write. */ SEC("tc") -int clone_dynptr_rdonly_before_meta_dynptr_write(struct __sk_buff *ctx) +int clone_meta_dynptr_rw_before_meta_dynptr_write(struct __sk_buff *ctx) { struct bpf_dynptr data, meta; + __u8 meta_have[META_SIZE]; const struct ethhdr *eth; + int err; bpf_dynptr_from_skb(ctx, 0, &data); eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); @@ -494,14 +504,20 @@ int clone_dynptr_rdonly_before_meta_dynptr_write(struct __sk_buff *ctx) if (eth->h_proto != 0) goto out; - /* Expect read-only metadata */ + /* Expect read-write metadata before unclone */ bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE) + if (bpf_dynptr_is_rdonly(&meta)) goto out; - /* Metadata write. Expect failure. */ - bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (bpf_dynptr_write(&meta, 0, "x", 1, 0) != -EINVAL) + err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + if (err || !check_metadata(meta_have)) + goto out; + + /* Helper write to metadata will unclone the packet */ + bpf_dynptr_write(&meta, 0, &meta_have[0], 1, 0); + + err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + if (err || !check_metadata(meta_have)) goto out; test_pass = true; From 354d020c29f72513dce4f3902890158d99b67b67 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 5 Nov 2025 21:19:50 +0100 Subject: [PATCH 691/867] selftests/bpf: Cover skb metadata access after vlan push/pop helper Add a test to verify that skb metadata remains accessible after calling bpf_skb_vlan_push() and bpf_skb_vlan_pop(), which modify the packet headroom. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251105-skb-meta-rx-path-v4-13-5ceb08a9b37b@cloudflare.com --- .../bpf/prog_tests/xdp_context_test_run.c | 6 +++ .../selftests/bpf/progs/test_xdp_meta.c | 43 +++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index a129c3057202b..97c8f876f673b 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -478,6 +478,12 @@ void test_xdp_context_tuntap(void) test_tuntap_mirred(skel->progs.ing_xdp, skel->progs.clone_meta_dynptr_rw_before_meta_dynptr_write, &skel->bss->test_pass); + /* Tests for BPF helpers which touch headroom */ + if (test__start_subtest("helper_skb_vlan_push_pop")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.helper_skb_vlan_push_pop, + NULL, /* tc prio 2 */ + &skel->bss->test_pass); test_xdp_meta__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index a70de55c6997f..04c7487bb350e 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -44,6 +44,16 @@ static bool check_metadata(const char *file, int line, __u8 *meta_have) #define check_metadata(meta_have) check_metadata(__FILE__, __LINE__, meta_have) +static bool check_skb_metadata(const char *file, int line, struct __sk_buff *skb) +{ + __u8 *data_meta = ctx_ptr(skb, data_meta); + __u8 *data = ctx_ptr(skb, data); + + return data_meta + META_SIZE <= data && (check_metadata)(file, line, data_meta); +} + +#define check_skb_metadata(skb) check_skb_metadata(__FILE__, __LINE__, skb) + SEC("tc") int ing_cls(struct __sk_buff *ctx) { @@ -525,4 +535,37 @@ int clone_meta_dynptr_rw_before_meta_dynptr_write(struct __sk_buff *ctx) return TC_ACT_SHOT; } +SEC("tc") +int helper_skb_vlan_push_pop(struct __sk_buff *ctx) +{ + int err; + + /* bpf_skb_vlan_push assumes HW offload for primary VLAN tag. Only + * secondary tag push triggers an actual MAC header modification. + */ + err = bpf_skb_vlan_push(ctx, 0, 42); + if (err) + goto out; + err = bpf_skb_vlan_push(ctx, 0, 207); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + err = bpf_skb_vlan_pop(ctx); + if (err) + goto out; + err = bpf_skb_vlan_pop(ctx); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + char _license[] SEC("license") = "GPL"; From 29960e635b01b148e6db5a84957f99423fd85464 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 5 Nov 2025 21:19:51 +0100 Subject: [PATCH 692/867] selftests/bpf: Cover skb metadata access after bpf_skb_adjust_room Add a test to verify that skb metadata remains accessible after calling bpf_skb_adjust_room(), which modifies the packet headroom and can trigger head reallocation. The helper expects an Ethernet frame carrying an IP packet so switch test packet identification by source MAC address since we can no longer rely on Ethernet proto being set to zero. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251105-skb-meta-rx-path-v4-14-5ceb08a9b37b@cloudflare.com --- .../bpf/prog_tests/xdp_context_test_run.c | 25 ++++++-- .../selftests/bpf/progs/test_xdp_meta.c | 61 ++++++++++++++++--- 2 files changed, 71 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 97c8f876f673b..a3b82cf2f9e9b 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -124,10 +124,10 @@ static int send_test_packet(int ifindex) int n, sock = -1; __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; - /* The ethernet header is not relevant for this test and doesn't need to - * be meaningful. - */ - struct ethhdr eth = { 0 }; + /* We use the Ethernet header only to identify the test packet */ + struct ethhdr eth = { + .h_source = { 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF }, + }; memcpy(packet, ð, sizeof(eth)); memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN); @@ -160,8 +160,16 @@ static int write_test_packet(int tap_fd) __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; int n; - /* The ethernet header doesn't need to be valid for this test */ - memset(packet, 0, sizeof(struct ethhdr)); + /* The Ethernet header is mostly not relevant. We use it to identify the + * test packet and some BPF helpers we exercise expect to operate on + * Ethernet frames carrying IP packets. Pretend that's the case. + */ + struct ethhdr eth = { + .h_source = { 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF }, + .h_proto = htons(ETH_P_IP), + }; + + memcpy(packet, ð, sizeof(eth)); memcpy(packet + sizeof(struct ethhdr), test_payload, TEST_PAYLOAD_LEN); n = write(tap_fd, packet, sizeof(packet)); @@ -484,6 +492,11 @@ void test_xdp_context_tuntap(void) skel->progs.helper_skb_vlan_push_pop, NULL, /* tc prio 2 */ &skel->bss->test_pass); + if (test__start_subtest("helper_skb_adjust_room")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.helper_skb_adjust_room, + NULL, /* tc prio 2 */ + &skel->bss->test_pass); test_xdp_meta__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index 04c7487bb350e..6edc84d8dc52c 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -20,6 +20,10 @@ bool test_pass; +static const __u8 smac_want[ETH_ALEN] = { + 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF, +}; + static const __u8 meta_want[META_SIZE] = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, @@ -27,6 +31,11 @@ static const __u8 meta_want[META_SIZE] = { 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, }; +static bool check_smac(const struct ethhdr *eth) +{ + return !__builtin_memcmp(eth->h_source, smac_want, ETH_ALEN); +} + static bool check_metadata(const char *file, int line, __u8 *meta_have) { if (!__builtin_memcmp(meta_have, meta_want, META_SIZE)) @@ -281,7 +290,7 @@ int ing_xdp_zalloc_meta(struct xdp_md *ctx) /* Drop any non-test packets */ if (eth + 1 > ctx_ptr(ctx, data_end)) return XDP_DROP; - if (eth->h_proto != 0) + if (!check_smac(eth)) return XDP_DROP; ret = bpf_xdp_adjust_meta(ctx, -META_SIZE); @@ -321,9 +330,9 @@ int ing_xdp(struct xdp_md *ctx) /* The Linux networking stack may send other packets on the test * interface that interfere with the test. Just drop them. - * The test packets can be recognized by their ethertype of zero. + * The test packets can be recognized by their source MAC address. */ - if (eth->h_proto != 0) + if (!check_smac(eth)) return XDP_DROP; __builtin_memcpy(data_meta, payload, META_SIZE); @@ -343,7 +352,7 @@ int clone_data_meta_survives_data_write(struct __sk_buff *ctx) if (eth + 1 > ctx_ptr(ctx, data_end)) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) goto out; if (meta_have + META_SIZE > eth) @@ -373,7 +382,7 @@ int clone_data_meta_survives_meta_write(struct __sk_buff *ctx) if (eth + 1 > ctx_ptr(ctx, data_end)) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) goto out; if (meta_have + META_SIZE > eth) @@ -406,7 +415,7 @@ int clone_meta_dynptr_survives_data_slice_write(struct __sk_buff *ctx) if (!eth) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) goto out; bpf_dynptr_from_skb_meta(ctx, 0, &meta); @@ -435,7 +444,7 @@ int clone_meta_dynptr_survives_meta_slice_write(struct __sk_buff *ctx) if (!eth) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) goto out; bpf_dynptr_from_skb_meta(ctx, 0, &meta); @@ -469,7 +478,7 @@ int clone_meta_dynptr_rw_before_data_dynptr_write(struct __sk_buff *ctx) if (!eth) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) goto out; /* Expect read-write metadata before unclone */ @@ -511,7 +520,7 @@ int clone_meta_dynptr_rw_before_meta_dynptr_write(struct __sk_buff *ctx) if (!eth) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) goto out; /* Expect read-write metadata before unclone */ @@ -568,4 +577,38 @@ int helper_skb_vlan_push_pop(struct __sk_buff *ctx) return TC_ACT_SHOT; } +SEC("tc") +int helper_skb_adjust_room(struct __sk_buff *ctx) +{ + int err; + + /* Grow a 1 byte hole after the MAC header */ + err = bpf_skb_adjust_room(ctx, 1, BPF_ADJ_ROOM_MAC, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + /* Shrink a 1 byte hole after the MAC header */ + err = bpf_skb_adjust_room(ctx, -1, BPF_ADJ_ROOM_MAC, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + /* Grow a 256 byte hole to trigger head reallocation */ + err = bpf_skb_adjust_room(ctx, 256, BPF_ADJ_ROOM_MAC, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + char _license[] SEC("license") = "GPL"; From 85d454afef612f880c69cb39a7d74772a24411d3 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 5 Nov 2025 21:19:52 +0100 Subject: [PATCH 693/867] selftests/bpf: Cover skb metadata access after change_head/tail helper Add a test to verify that skb metadata remains accessible after calling bpf_skb_change_head() and bpf_skb_change_tail(), which modify packet headroom/tailroom and can trigger head reallocation. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251105-skb-meta-rx-path-v4-15-5ceb08a9b37b@cloudflare.com --- .../bpf/prog_tests/xdp_context_test_run.c | 5 +++ .../selftests/bpf/progs/test_xdp_meta.c | 34 +++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index a3b82cf2f9e9b..65735a134abb8 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -497,6 +497,11 @@ void test_xdp_context_tuntap(void) skel->progs.helper_skb_adjust_room, NULL, /* tc prio 2 */ &skel->bss->test_pass); + if (test__start_subtest("helper_skb_change_head_tail")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.helper_skb_change_head_tail, + NULL, /* tc prio 2 */ + &skel->bss->test_pass); test_xdp_meta__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index 6edc84d8dc52c..e0b2e8ed0cc51 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -611,4 +611,38 @@ int helper_skb_adjust_room(struct __sk_buff *ctx) return TC_ACT_SHOT; } +SEC("tc") +int helper_skb_change_head_tail(struct __sk_buff *ctx) +{ + int err; + + /* Reserve 1 extra in the front for packet data */ + err = bpf_skb_change_head(ctx, 1, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + /* Reserve 256 extra bytes in the front to trigger head reallocation */ + err = bpf_skb_change_head(ctx, 256, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + /* Reserve 4k extra bytes in the back to trigger head reallocation */ + err = bpf_skb_change_tail(ctx, ctx->len + 4096, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + char _license[] SEC("license") = "GPL"; From d2c5cca3fb58f96732e1dfd19e36b43e4e54d214 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 5 Nov 2025 21:19:53 +0100 Subject: [PATCH 694/867] selftests/bpf: Cover skb metadata access after bpf_skb_change_proto Add a test to verify that skb metadata remains accessible after calling bpf_skb_change_proto(), which modifies packet headroom to accommodate different IP header sizes. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251105-skb-meta-rx-path-v4-16-5ceb08a9b37b@cloudflare.com --- .../bpf/prog_tests/xdp_context_test_run.c | 5 ++++ .../selftests/bpf/progs/test_xdp_meta.c | 25 +++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 65735a134abb8..ee94c281888ae 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -502,6 +502,11 @@ void test_xdp_context_tuntap(void) skel->progs.helper_skb_change_head_tail, NULL, /* tc prio 2 */ &skel->bss->test_pass); + if (test__start_subtest("helper_skb_change_proto")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.helper_skb_change_proto, + NULL, /* tc prio 2 */ + &skel->bss->test_pass); test_xdp_meta__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index e0b2e8ed0cc51..0a0f371a2decc 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -4,6 +4,7 @@ #include #include +#include #include #include "bpf_kfuncs.h" @@ -645,4 +646,28 @@ int helper_skb_change_head_tail(struct __sk_buff *ctx) return TC_ACT_SHOT; } +SEC("tc") +int helper_skb_change_proto(struct __sk_buff *ctx) +{ + int err; + + err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IPV6), 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IP), 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + char _license[] SEC("license") = "GPL"; From 07c428ece3222832bdbfcc4ffa8b8d3991c5eb39 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Fri, 7 Nov 2025 11:56:30 +0800 Subject: [PATCH 695/867] bpf: Export necessary symbols for modules with struct_ops Exports three necessary symbols for implementing struct_ops with tristate subsystem. To hold or release refcnt of struct_ops refcnt by inline funcs bpf_try_module_get and bpf_module_put which use bpf_struct_ops_get(put) conditionally. And to copy obj name from one to the other with effective checks by bpf_obj_name_cpy. Signed-off-by: D. Wythe Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251107035632.115950-2-alibuda@linux.alibaba.com --- kernel/bpf/bpf_struct_ops.c | 2 ++ kernel/bpf/syscall.c | 1 + 2 files changed, 3 insertions(+) diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index a41e6730edcf3..278490683d288 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -1162,6 +1162,7 @@ bool bpf_struct_ops_get(const void *kdata) map = __bpf_map_inc_not_zero(&st_map->map, false); return !IS_ERR(map); } +EXPORT_SYMBOL_GPL(bpf_struct_ops_get); void bpf_struct_ops_put(const void *kdata) { @@ -1173,6 +1174,7 @@ void bpf_struct_ops_put(const void *kdata) bpf_map_put(&st_map->map); } +EXPORT_SYMBOL_GPL(bpf_struct_ops_put); u32 bpf_struct_ops_id(const void *kdata) { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8a129746bd6cc..80b86e9d3c39b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1234,6 +1234,7 @@ int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size) return src - orig_src; } +EXPORT_SYMBOL_GPL(bpf_obj_name_cpy); int map_check_no_btf(const struct bpf_map *map, const struct btf *btf, From 15f295f55656658e65bdbc9b901d6b2e49d68d72 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Fri, 7 Nov 2025 11:56:31 +0800 Subject: [PATCH 696/867] net/smc: bpf: Introduce generic hook for handshake flow The introduction of IPPROTO_SMC enables eBPF programs to determine whether to use SMC based on the context of socket creation, such as network namespaces, PID and comm name, etc. As a subsequent enhancement, to introduce a new generic hook that allows decisions on whether to use SMC or not at runtime, including but not limited to local/remote IP address or ports. User can write their own implememtion via bpf_struct_ops now to choose whether to use SMC or not before TCP 3rd handshake to be comleted. Signed-off-by: D. Wythe Signed-off-by: Martin KaFai Lau Reviewed-by: Dust Li Link: https://patch.msgid.link/20251107035632.115950-3-alibuda@linux.alibaba.com --- include/net/netns/smc.h | 3 + include/net/smc.h | 53 +++++++++++++++ net/ipv4/tcp_output.c | 31 +++++---- net/smc/Kconfig | 10 +++ net/smc/Makefile | 1 + net/smc/af_smc.c | 9 +++ net/smc/smc_hs_bpf.c | 140 ++++++++++++++++++++++++++++++++++++++++ net/smc/smc_hs_bpf.h | 31 +++++++++ net/smc/smc_sysctl.c | 91 ++++++++++++++++++++++++++ 9 files changed, 355 insertions(+), 14 deletions(-) create mode 100644 net/smc/smc_hs_bpf.c create mode 100644 net/smc/smc_hs_bpf.h diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h index 6ceb12baec241..ed24c9f638eea 100644 --- a/include/net/netns/smc.h +++ b/include/net/netns/smc.h @@ -17,6 +17,9 @@ struct netns_smc { #ifdef CONFIG_SYSCTL struct ctl_table_header *smc_hdr; #endif +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) + struct smc_hs_ctrl __rcu *hs_ctrl; +#endif /* CONFIG_SMC_HS_CTRL_BPF */ unsigned int sysctl_autocorking_size; unsigned int sysctl_smcr_buf_type; int sysctl_smcr_testlink_time; diff --git a/include/net/smc.h b/include/net/smc.h index 08bee529ed8d4..bfdc4c41f0198 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -17,6 +17,8 @@ #include #include +struct tcp_sock; +struct inet_request_sock; struct sock; #define SMC_MAX_PNETID_LEN 16 /* Max. length of PNET id */ @@ -50,4 +52,55 @@ struct smcd_dev { u8 going_away : 1; }; +#define SMC_HS_CTRL_NAME_MAX 16 + +enum { + /* ops can be inherit from init_net */ + SMC_HS_CTRL_FLAG_INHERITABLE = 0x1, + + SMC_HS_CTRL_ALL_FLAGS = SMC_HS_CTRL_FLAG_INHERITABLE, +}; + +struct smc_hs_ctrl { + /* private */ + + struct list_head list; + struct module *owner; + + /* public */ + + /* unique name */ + char name[SMC_HS_CTRL_NAME_MAX]; + int flags; + + /* Invoked before computing SMC option for SYN packets. + * We can control whether to set SMC options by returning various value. + * Return 0 to disable SMC, or return any other value to enable it. + */ + int (*syn_option)(struct tcp_sock *tp); + + /* Invoked before Set up SMC options for SYN-ACK packets + * We can control whether to respond SMC options by returning various + * value. Return 0 to disable SMC, or return any other value to enable + * it. + */ + int (*synack_option)(const struct tcp_sock *tp, + struct inet_request_sock *ireq); +}; + +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) +#define smc_call_hsbpf(init_val, tp, func, ...) ({ \ + typeof(init_val) __ret = (init_val); \ + struct smc_hs_ctrl *ctrl; \ + rcu_read_lock(); \ + ctrl = rcu_dereference(sock_net((struct sock *)(tp))->smc.hs_ctrl); \ + if (ctrl && ctrl->func) \ + __ret = ctrl->func(tp, ##__VA_ARGS__); \ + rcu_read_unlock(); \ + __ret; \ +}) +#else +#define smc_call_hsbpf(init_val, tp, ...) ({ (void)(tp); (init_val); }) +#endif /* CONFIG_SMC_HS_CTRL_BPF */ + #endif /* _SMC_H */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7f5df7a71f629..479afb714bdf9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -802,34 +803,36 @@ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp, mptcp_options_write(th, ptr, tp, opts); } -static void smc_set_option(const struct tcp_sock *tp, +static void smc_set_option(struct tcp_sock *tp, struct tcp_out_options *opts, unsigned int *remaining) { #if IS_ENABLED(CONFIG_SMC) - if (static_branch_unlikely(&tcp_have_smc)) { - if (tp->syn_smc) { - if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) { - opts->options |= OPTION_SMC; - *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED; - } + if (static_branch_unlikely(&tcp_have_smc) && tp->syn_smc) { + tp->syn_smc = !!smc_call_hsbpf(1, tp, syn_option); + /* re-check syn_smc */ + if (tp->syn_smc && + *remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) { + opts->options |= OPTION_SMC; + *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED; } } #endif } static void smc_set_option_cond(const struct tcp_sock *tp, - const struct inet_request_sock *ireq, + struct inet_request_sock *ireq, struct tcp_out_options *opts, unsigned int *remaining) { #if IS_ENABLED(CONFIG_SMC) - if (static_branch_unlikely(&tcp_have_smc)) { - if (tp->syn_smc && ireq->smc_ok) { - if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) { - opts->options |= OPTION_SMC; - *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED; - } + if (static_branch_unlikely(&tcp_have_smc) && tp->syn_smc && ireq->smc_ok) { + ireq->smc_ok = !!smc_call_hsbpf(1, tp, synack_option, ireq); + /* re-check smc_ok */ + if (ireq->smc_ok && + *remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) { + opts->options |= OPTION_SMC; + *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED; } } #endif diff --git a/net/smc/Kconfig b/net/smc/Kconfig index 99ecd59d1f4b8..325addf83cc69 100644 --- a/net/smc/Kconfig +++ b/net/smc/Kconfig @@ -19,3 +19,13 @@ config SMC_DIAG smcss. if unsure, say Y. + +config SMC_HS_CTRL_BPF + bool "Generic eBPF hook for SMC handshake flow" + depends on SMC && BPF_SYSCALL + default y + help + SMC_HS_CTRL_BPF enables support to register generic eBPF hook for SMC + handshake flow, which offer much greater flexibility in modifying the behavior + of the SMC protocol stack compared to a complete kernel-based approach. Select + this option if you want filtring the handshake process via eBPF programs. \ No newline at end of file diff --git a/net/smc/Makefile b/net/smc/Makefile index 0e754cbc38f9c..5368634c5dd6d 100644 --- a/net/smc/Makefile +++ b/net/smc/Makefile @@ -6,3 +6,4 @@ smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o smc-y += smc_tracepoint.o smc_inet.o smc-$(CONFIG_SYSCTL) += smc_sysctl.o +smc-$(CONFIG_SMC_HS_CTRL_BPF) += smc_hs_bpf.o diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 0ef3e16a8517a..e388de8dca097 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -58,6 +58,7 @@ #include "smc_tracepoint.h" #include "smc_sysctl.h" #include "smc_inet.h" +#include "smc_hs_bpf.h" static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group * creation on server @@ -3600,8 +3601,16 @@ static int __init smc_init(void) pr_err("%s: smc_inet_init fails with %d\n", __func__, rc); goto out_ulp; } + rc = bpf_smc_hs_ctrl_init(); + if (rc) { + pr_err("%s: bpf_smc_hs_ctrl_init fails with %d\n", __func__, + rc); + goto out_inet; + } static_branch_enable(&tcp_have_smc); return 0; +out_inet: + smc_inet_exit(); out_ulp: tcp_unregister_ulp(&smc_ulp_ops); out_ib: diff --git a/net/smc/smc_hs_bpf.c b/net/smc/smc_hs_bpf.c new file mode 100644 index 0000000000000..063d23d858508 --- /dev/null +++ b/net/smc/smc_hs_bpf.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * Generic hook for SMC handshake flow. + * + * Copyright IBM Corp. 2016 + * Copyright (c) 2025, Alibaba Inc. + * + * Author: D. Wythe + */ + +#include +#include +#include +#include + +#include "smc_hs_bpf.h" + +static DEFINE_SPINLOCK(smc_hs_ctrl_list_lock); +static LIST_HEAD(smc_hs_ctrl_list); + +static int smc_hs_ctrl_reg(struct smc_hs_ctrl *ctrl) +{ + int ret = 0; + + spin_lock(&smc_hs_ctrl_list_lock); + /* already exist or duplicate name */ + if (smc_hs_ctrl_find_by_name(ctrl->name)) + ret = -EEXIST; + else + list_add_tail_rcu(&ctrl->list, &smc_hs_ctrl_list); + spin_unlock(&smc_hs_ctrl_list_lock); + return ret; +} + +static void smc_hs_ctrl_unreg(struct smc_hs_ctrl *ctrl) +{ + spin_lock(&smc_hs_ctrl_list_lock); + list_del_rcu(&ctrl->list); + spin_unlock(&smc_hs_ctrl_list_lock); + + /* Ensure that all readers to complete */ + synchronize_rcu(); +} + +struct smc_hs_ctrl *smc_hs_ctrl_find_by_name(const char *name) +{ + struct smc_hs_ctrl *ctrl; + + list_for_each_entry_rcu(ctrl, &smc_hs_ctrl_list, list) { + if (strcmp(ctrl->name, name) == 0) + return ctrl; + } + return NULL; +} + +static int __smc_bpf_stub_set_tcp_option(struct tcp_sock *tp) { return 1; } +static int __smc_bpf_stub_set_tcp_option_cond(const struct tcp_sock *tp, + struct inet_request_sock *ireq) +{ + return 1; +} + +static struct smc_hs_ctrl __smc_bpf_hs_ctrl = { + .syn_option = __smc_bpf_stub_set_tcp_option, + .synack_option = __smc_bpf_stub_set_tcp_option_cond, +}; + +static int smc_bpf_hs_ctrl_init(struct btf *btf) { return 0; } + +static int smc_bpf_hs_ctrl_reg(void *kdata, struct bpf_link *link) +{ + if (link) + return -EOPNOTSUPP; + + return smc_hs_ctrl_reg(kdata); +} + +static void smc_bpf_hs_ctrl_unreg(void *kdata, struct bpf_link *link) +{ + smc_hs_ctrl_unreg(kdata); +} + +static int smc_bpf_hs_ctrl_init_member(const struct btf_type *t, + const struct btf_member *member, + void *kdata, const void *udata) +{ + const struct smc_hs_ctrl *u_ctrl; + struct smc_hs_ctrl *k_ctrl; + u32 moff; + + u_ctrl = (const struct smc_hs_ctrl *)udata; + k_ctrl = (struct smc_hs_ctrl *)kdata; + + moff = __btf_member_bit_offset(t, member) / 8; + switch (moff) { + case offsetof(struct smc_hs_ctrl, name): + if (bpf_obj_name_cpy(k_ctrl->name, u_ctrl->name, + sizeof(u_ctrl->name)) <= 0) + return -EINVAL; + return 1; + case offsetof(struct smc_hs_ctrl, flags): + if (u_ctrl->flags & ~SMC_HS_CTRL_ALL_FLAGS) + return -EINVAL; + k_ctrl->flags = u_ctrl->flags; + return 1; + default: + break; + } + + return 0; +} + +static const struct bpf_func_proto * +bpf_smc_hs_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + return bpf_base_func_proto(func_id, prog); +} + +static const struct bpf_verifier_ops smc_bpf_verifier_ops = { + .get_func_proto = bpf_smc_hs_func_proto, + .is_valid_access = bpf_tracing_btf_ctx_access, +}; + +static struct bpf_struct_ops bpf_smc_hs_ctrl_ops = { + .name = "smc_hs_ctrl", + .init = smc_bpf_hs_ctrl_init, + .reg = smc_bpf_hs_ctrl_reg, + .unreg = smc_bpf_hs_ctrl_unreg, + .cfi_stubs = &__smc_bpf_hs_ctrl, + .verifier_ops = &smc_bpf_verifier_ops, + .init_member = smc_bpf_hs_ctrl_init_member, + .owner = THIS_MODULE, +}; + +int bpf_smc_hs_ctrl_init(void) +{ + return register_bpf_struct_ops(&bpf_smc_hs_ctrl_ops, smc_hs_ctrl); +} diff --git a/net/smc/smc_hs_bpf.h b/net/smc/smc_hs_bpf.h new file mode 100644 index 0000000000000..f5f1807c079e8 --- /dev/null +++ b/net/smc/smc_hs_bpf.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * Generic hook for SMC handshake flow. + * + * Copyright IBM Corp. 2016 + * Copyright (c) 2025, Alibaba Inc. + * + * Author: D. Wythe + */ + +#ifndef __SMC_HS_CTRL +#define __SMC_HS_CTRL + +#include + +/* Find hs_ctrl by the target name, which required to be a c-string. + * Return NULL if no such ctrl was found,otherwise, return a valid ctrl. + * + * Note: Caller MUST ensure it's was invoked under rcu_read_lock. + */ +struct smc_hs_ctrl *smc_hs_ctrl_find_by_name(const char *name); + +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) +int bpf_smc_hs_ctrl_init(void); +#else +static inline int bpf_smc_hs_ctrl_init(void) { return 0; } +#endif /* CONFIG_SMC_HS_CTRL_BPF */ + +#endif /* __SMC_HS_CTRL */ diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c index 7b2471904d049..b1efed5462435 100644 --- a/net/smc/smc_sysctl.c +++ b/net/smc/smc_sysctl.c @@ -12,12 +12,14 @@ #include #include +#include #include #include "smc.h" #include "smc_core.h" #include "smc_llc.h" #include "smc_sysctl.h" +#include "smc_hs_bpf.h" static int min_sndbuf = SMC_BUF_MIN_SIZE; static int min_rcvbuf = SMC_BUF_MIN_SIZE; @@ -32,6 +34,69 @@ static int conns_per_lgr_max = SMC_CONN_PER_LGR_MAX; static unsigned int smcr_max_wr_min = 2; static unsigned int smcr_max_wr_max = 2048; +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) +static int smc_net_replace_smc_hs_ctrl(struct net *net, const char *name) +{ + struct smc_hs_ctrl *ctrl = NULL; + + rcu_read_lock(); + /* null or empty name ask to clear current ctrl */ + if (name && name[0]) { + ctrl = smc_hs_ctrl_find_by_name(name); + if (!ctrl) { + rcu_read_unlock(); + return -EINVAL; + } + /* no change, just return */ + if (ctrl == rcu_dereference(net->smc.hs_ctrl)) { + rcu_read_unlock(); + return 0; + } + if (!bpf_try_module_get(ctrl, ctrl->owner)) { + rcu_read_unlock(); + return -EBUSY; + } + } + /* xhcg old ctrl with the new one atomically */ + ctrl = unrcu_pointer(xchg(&net->smc.hs_ctrl, RCU_INITIALIZER(ctrl))); + /* release old ctrl */ + if (ctrl) + bpf_module_put(ctrl, ctrl->owner); + + rcu_read_unlock(); + return 0; +} + +static int proc_smc_hs_ctrl(const struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct net *net = container_of(ctl->data, struct net, smc.hs_ctrl); + char val[SMC_HS_CTRL_NAME_MAX]; + const struct ctl_table tbl = { + .data = val, + .maxlen = SMC_HS_CTRL_NAME_MAX, + }; + struct smc_hs_ctrl *ctrl; + int ret; + + rcu_read_lock(); + ctrl = rcu_dereference(net->smc.hs_ctrl); + if (ctrl) + memcpy(val, ctrl->name, sizeof(ctrl->name)); + else + val[0] = '\0'; + rcu_read_unlock(); + + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + if (ret) + return ret; + + if (write) + ret = smc_net_replace_smc_hs_ctrl(net, val); + return ret; +} +#endif /* CONFIG_SMC_HS_CTRL_BPF */ + static struct ctl_table smc_table[] = { { .procname = "autocorking_size", @@ -119,6 +184,15 @@ static struct ctl_table smc_table[] = { .extra1 = &smcr_max_wr_min, .extra2 = &smcr_max_wr_max, }, +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) + { + .procname = "hs_ctrl", + .data = &init_net.smc.hs_ctrl, + .mode = 0644, + .maxlen = SMC_HS_CTRL_NAME_MAX, + .proc_handler = proc_smc_hs_ctrl, + }, +#endif /* CONFIG_SMC_HS_CTRL_BPF */ }; int __net_init smc_sysctl_net_init(struct net *net) @@ -129,6 +203,16 @@ int __net_init smc_sysctl_net_init(struct net *net) table = smc_table; if (!net_eq(net, &init_net)) { int i; +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) + struct smc_hs_ctrl *ctrl; + + rcu_read_lock(); + ctrl = rcu_dereference(init_net.smc.hs_ctrl); + if (ctrl && ctrl->flags & SMC_HS_CTRL_FLAG_INHERITABLE && + bpf_try_module_get(ctrl, ctrl->owner)) + rcu_assign_pointer(net->smc.hs_ctrl, ctrl); + rcu_read_unlock(); +#endif /* CONFIG_SMC_HS_CTRL_BPF */ table = kmemdup(table, sizeof(smc_table), GFP_KERNEL); if (!table) @@ -161,6 +245,9 @@ int __net_init smc_sysctl_net_init(struct net *net) if (!net_eq(net, &init_net)) kfree(table); err_alloc: +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) + smc_net_replace_smc_hs_ctrl(net, NULL); +#endif /* CONFIG_SMC_HS_CTRL_BPF */ return -ENOMEM; } @@ -170,6 +257,10 @@ void __net_exit smc_sysctl_net_exit(struct net *net) table = net->smc.smc_hdr->ctl_table_arg; unregister_net_sysctl_table(net->smc.smc_hdr); +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) + smc_net_replace_smc_hs_ctrl(net, NULL); +#endif /* CONFIG_SMC_HS_CTRL_BPF */ + if (!net_eq(net, &init_net)) kfree(table); } From beb3c67297d92f9428484410cf79135d38d0aff3 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Fri, 7 Nov 2025 11:56:32 +0800 Subject: [PATCH 697/867] bpf/selftests: Add selftest for bpf_smc_hs_ctrl This tests introduces a tiny smc_hs_ctrl for filtering SMC connections based on IP pairs, and also adds a realistic topology model to verify it. Also, we can only use SMC loopback under CI test, so an additional configuration needs to be enabled. Follow the steps below to run this test. make -C tools/testing/selftests/bpf cd tools/testing/selftests/bpf sudo ./test_progs -t smc Results shows: Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: D. Wythe Signed-off-by: Martin KaFai Lau Tested-by: Saket Kumar Bhaskar Reviewed-by: Zhu Yanjun Link: https://patch.msgid.link/20251107035632.115950-4-alibuda@linux.alibaba.com --- tools/testing/selftests/bpf/config | 5 + .../selftests/bpf/prog_tests/test_bpf_smc.c | 390 ++++++++++++++++++ tools/testing/selftests/bpf/progs/bpf_smc.c | 117 ++++++ 3 files changed, 512 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c create mode 100644 tools/testing/selftests/bpf/progs/bpf_smc.c diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 70b28c1e653ea..fcd2f9bf78c99 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -123,3 +123,8 @@ CONFIG_XDP_SOCKETS=y CONFIG_XFRM_INTERFACE=y CONFIG_TCP_CONG_DCTCP=y CONFIG_TCP_CONG_BBR=y +CONFIG_INFINIBAND=y +CONFIG_SMC=y +CONFIG_SMC_HS_CTRL_BPF=y +CONFIG_DIBS=y +CONFIG_DIBS_LO=y \ No newline at end of file diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c new file mode 100644 index 0000000000000..de22734abc4d2 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c @@ -0,0 +1,390 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include "network_helpers.h" +#include "bpf_smc.skel.h" + +#ifndef IPPROTO_SMC +#define IPPROTO_SMC 256 +#endif + +#define CLIENT_IP "127.0.0.1" +#define SERVER_IP "127.0.1.0" +#define SERVER_IP_VIA_RISK_PATH "127.0.2.0" + +#define SERVICE_1 80 +#define SERVICE_2 443 +#define SERVICE_3 8443 + +#define TEST_NS "bpf_smc_netns" + +static struct netns_obj *test_netns; + +struct smc_policy_ip_key { + __u32 sip; + __u32 dip; +}; + +struct smc_policy_ip_value { + __u8 mode; +}; + +#if defined(__s390x__) +/* s390x has default seid */ +static bool setup_ueid(void) { return true; } +static void cleanup_ueid(void) {} +#else +enum { + SMC_NETLINK_ADD_UEID = 10, + SMC_NETLINK_REMOVE_UEID +}; + +enum { + SMC_NLA_EID_TABLE_UNSPEC, + SMC_NLA_EID_TABLE_ENTRY, /* string */ +}; + +struct msgtemplate { + struct nlmsghdr n; + struct genlmsghdr g; + char buf[1024]; +}; + +#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN)) +#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN) +#define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN)) +#define NLA_PAYLOAD(len) ((len) - NLA_HDRLEN) + +#define SMC_GENL_FAMILY_NAME "SMC_GEN_NETLINK" +#define SMC_BPFTEST_UEID "SMC-BPFTEST-UEID" + +static uint16_t smc_nl_family_id = -1; + +static int send_cmd(int fd, __u16 nlmsg_type, __u32 nlmsg_pid, + __u16 nlmsg_flags, __u8 genl_cmd, __u16 nla_type, + void *nla_data, int nla_len) +{ + struct nlattr *na; + struct sockaddr_nl nladdr; + int r, buflen; + char *buf; + + struct msgtemplate msg = {0}; + + msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); + msg.n.nlmsg_type = nlmsg_type; + msg.n.nlmsg_flags = nlmsg_flags; + msg.n.nlmsg_seq = 0; + msg.n.nlmsg_pid = nlmsg_pid; + msg.g.cmd = genl_cmd; + msg.g.version = 1; + na = (struct nlattr *)GENLMSG_DATA(&msg); + na->nla_type = nla_type; + na->nla_len = nla_len + 1 + NLA_HDRLEN; + memcpy(NLA_DATA(na), nla_data, nla_len); + msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); + + buf = (char *)&msg; + buflen = msg.n.nlmsg_len; + memset(&nladdr, 0, sizeof(nladdr)); + nladdr.nl_family = AF_NETLINK; + + while ((r = sendto(fd, buf, buflen, 0, (struct sockaddr *)&nladdr, + sizeof(nladdr))) < buflen) { + if (r > 0) { + buf += r; + buflen -= r; + } else if (errno != EAGAIN) { + return -1; + } + } + return 0; +} + +static bool get_smc_nl_family_id(void) +{ + struct sockaddr_nl nl_src; + struct msgtemplate msg; + struct nlattr *nl; + int fd, ret; + pid_t pid; + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (!ASSERT_OK_FD(fd, "nl_family socket")) + return false; + + pid = getpid(); + + memset(&nl_src, 0, sizeof(nl_src)); + nl_src.nl_family = AF_NETLINK; + nl_src.nl_pid = pid; + + ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src)); + if (!ASSERT_OK(ret, "nl_family bind")) + goto fail; + + ret = send_cmd(fd, GENL_ID_CTRL, pid, + NLM_F_REQUEST, CTRL_CMD_GETFAMILY, + CTRL_ATTR_FAMILY_NAME, (void *)SMC_GENL_FAMILY_NAME, + strlen(SMC_GENL_FAMILY_NAME)); + if (!ASSERT_OK(ret, "nl_family query")) + goto fail; + + ret = recv(fd, &msg, sizeof(msg), 0); + if (!ASSERT_FALSE(msg.n.nlmsg_type == NLMSG_ERROR || ret < 0 || + !NLMSG_OK(&msg.n, ret), "nl_family response")) + goto fail; + + nl = (struct nlattr *)GENLMSG_DATA(&msg); + nl = (struct nlattr *)((char *)nl + NLA_ALIGN(nl->nla_len)); + if (!ASSERT_EQ(nl->nla_type, CTRL_ATTR_FAMILY_ID, "nl_family nla type")) + goto fail; + + smc_nl_family_id = *(uint16_t *)NLA_DATA(nl); + close(fd); + return true; +fail: + close(fd); + return false; +} + +static bool smc_ueid(int op) +{ + struct sockaddr_nl nl_src; + struct msgtemplate msg; + struct nlmsgerr *err; + char test_ueid[32]; + int fd, ret; + pid_t pid; + + /* UEID required */ + memset(test_ueid, '\x20', sizeof(test_ueid)); + memcpy(test_ueid, SMC_BPFTEST_UEID, strlen(SMC_BPFTEST_UEID)); + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (!ASSERT_OK_FD(fd, "ueid socket")) + return false; + + pid = getpid(); + memset(&nl_src, 0, sizeof(nl_src)); + nl_src.nl_family = AF_NETLINK; + nl_src.nl_pid = pid; + + ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src)); + if (!ASSERT_OK(ret, "ueid bind")) + goto fail; + + ret = send_cmd(fd, smc_nl_family_id, pid, + NLM_F_REQUEST | NLM_F_ACK, op, SMC_NLA_EID_TABLE_ENTRY, + (void *)test_ueid, sizeof(test_ueid)); + if (!ASSERT_OK(ret, "ueid cmd")) + goto fail; + + ret = recv(fd, &msg, sizeof(msg), 0); + if (!ASSERT_FALSE(ret < 0 || + !NLMSG_OK(&msg.n, ret), "ueid response")) + goto fail; + + if (msg.n.nlmsg_type == NLMSG_ERROR) { + err = NLMSG_DATA(&msg); + switch (op) { + case SMC_NETLINK_REMOVE_UEID: + if (!ASSERT_FALSE((err->error && err->error != -ENOENT), + "ueid remove")) + goto fail; + break; + case SMC_NETLINK_ADD_UEID: + if (!ASSERT_OK(err->error, "ueid add")) + goto fail; + break; + default: + break; + } + } + close(fd); + return true; +fail: + close(fd); + return false; +} + +static bool setup_ueid(void) +{ + /* get smc nl id */ + if (!get_smc_nl_family_id()) + return false; + /* clear old ueid for bpftest */ + smc_ueid(SMC_NETLINK_REMOVE_UEID); + /* smc-loopback required ueid */ + return smc_ueid(SMC_NETLINK_ADD_UEID); +} + +static void cleanup_ueid(void) +{ + smc_ueid(SMC_NETLINK_REMOVE_UEID); +} +#endif /* __s390x__ */ + +static bool setup_netns(void) +{ + test_netns = netns_new(TEST_NS, true); + if (!ASSERT_OK_PTR(test_netns, "open net namespace")) + goto fail_netns; + + SYS(fail_ip, "ip addr add 127.0.1.0/8 dev lo"); + SYS(fail_ip, "ip addr add 127.0.2.0/8 dev lo"); + + return true; +fail_ip: + netns_free(test_netns); +fail_netns: + return false; +} + +static void cleanup_netns(void) +{ + netns_free(test_netns); +} + +static bool setup_smc(void) +{ + if (!setup_ueid()) + return false; + + if (!setup_netns()) + goto fail_netns; + + return true; +fail_netns: + cleanup_ueid(); + return false; +} + +static int set_client_addr_cb(int fd, void *opts) +{ + const char *src = (const char *)opts; + struct sockaddr_in localaddr; + + localaddr.sin_family = AF_INET; + localaddr.sin_port = htons(0); + localaddr.sin_addr.s_addr = inet_addr(src); + return !ASSERT_OK(bind(fd, &localaddr, sizeof(localaddr)), "client bind"); +} + +static void run_link(const char *src, const char *dst, int port) +{ + struct network_helper_opts opts = {0}; + int server, client; + + server = start_server_str(AF_INET, SOCK_STREAM, dst, port, NULL); + if (!ASSERT_OK_FD(server, "start service_1")) + return; + + opts.proto = IPPROTO_TCP; + opts.post_socket_cb = set_client_addr_cb; + opts.cb_opts = (void *)src; + + client = connect_to_fd_opts(server, &opts); + if (!ASSERT_OK_FD(client, "start connect")) + goto fail_client; + + close(client); +fail_client: + close(server); +} + +static void block_link(int map_fd, const char *src, const char *dst) +{ + struct smc_policy_ip_value val = { .mode = /* block */ 0 }; + struct smc_policy_ip_key key = { + .sip = inet_addr(src), + .dip = inet_addr(dst), + }; + + bpf_map_update_elem(map_fd, &key, &val, BPF_ANY); +} + +/* + * This test describes a real-life service topology as follows: + * + * +-------------> service_1 + * link 1 | | + * +--------------------> server | link 2 + * | | V + * | +-------------> service_2 + * | link 3 + * client -------------------> server_via_unsafe_path -> service_3 + * + * Among them, + * 1. link-1 is very suitable for using SMC. + * 2. link-2 is not suitable for using SMC, because the mode of this link is + * kind of short-link services. + * 3. link-3 is also not suitable for using SMC, because the RDMA link is + * unavailable and needs to go through a long timeout before it can fallback + * to TCP. + * To achieve this goal, we use a customized SMC ip strategy via smc_hs_ctrl. + */ +static void test_topo(void) +{ + struct bpf_smc *skel; + int rc, map_fd; + + skel = bpf_smc__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_smc__open_and_load")) + return; + + rc = bpf_smc__attach(skel); + if (!ASSERT_OK(rc, "bpf_smc__attach")) + goto fail; + + map_fd = bpf_map__fd(skel->maps.smc_policy_ip); + if (!ASSERT_OK_FD(map_fd, "bpf_map__fd")) + goto fail; + + /* Mock the process of transparent replacement, since we will modify + * protocol to ipproto_smc accropding to it via + * fmod_ret/update_socket_protocol. + */ + write_sysctl("/proc/sys/net/smc/hs_ctrl", "linkcheck"); + + /* Configure ip strat */ + block_link(map_fd, CLIENT_IP, SERVER_IP_VIA_RISK_PATH); + block_link(map_fd, SERVER_IP, SERVER_IP); + + /* should go with smc */ + run_link(CLIENT_IP, SERVER_IP, SERVICE_1); + /* should go with smc fallback */ + run_link(SERVER_IP, SERVER_IP, SERVICE_2); + + ASSERT_EQ(skel->bss->smc_cnt, 2, "smc count"); + ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count"); + + /* should go with smc */ + run_link(CLIENT_IP, SERVER_IP, SERVICE_2); + + ASSERT_EQ(skel->bss->smc_cnt, 3, "smc count"); + ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count"); + + /* should go with smc fallback */ + run_link(CLIENT_IP, SERVER_IP_VIA_RISK_PATH, SERVICE_3); + + ASSERT_EQ(skel->bss->smc_cnt, 4, "smc count"); + ASSERT_EQ(skel->bss->fallback_cnt, 2, "fallback count"); + +fail: + bpf_smc__destroy(skel); +} + +void test_bpf_smc(void) +{ + if (!setup_smc()) { + printf("setup for smc test failed, test SKIP:\n"); + test__skip(); + return; + } + + if (test__start_subtest("topo")) + test_topo(); + + cleanup_ueid(); + cleanup_netns(); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_smc.c b/tools/testing/selftests/bpf/progs/bpf_smc.c new file mode 100644 index 0000000000000..70d8b08f59140 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_smc.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" + +#include +#include +#include "bpf_tracing_net.h" + +char _license[] SEC("license") = "GPL"; + +enum { + BPF_SMC_LISTEN = 10, +}; + +struct smc_sock___local { + struct sock sk; + struct smc_sock *listen_smc; + bool use_fallback; +} __attribute__((preserve_access_index)); + +int smc_cnt = 0; +int fallback_cnt = 0; + +SEC("fentry/smc_release") +int BPF_PROG(bpf_smc_release, struct socket *sock) +{ + /* only count from one side (client) */ + if (sock->sk->__sk_common.skc_state == BPF_SMC_LISTEN) + return 0; + smc_cnt++; + return 0; +} + +SEC("fentry/smc_switch_to_fallback") +int BPF_PROG(bpf_smc_switch_to_fallback, struct smc_sock___local *smc) +{ + /* only count from one side (client) */ + if (smc && !smc->listen_smc) + fallback_cnt++; + return 0; +} + +/* go with default value if no strat was found */ +bool default_ip_strat_value = true; + +struct smc_policy_ip_key { + __u32 sip; + __u32 dip; +}; + +struct smc_policy_ip_value { + __u8 mode; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(struct smc_policy_ip_key)); + __uint(value_size, sizeof(struct smc_policy_ip_value)); + __uint(max_entries, 128); + __uint(map_flags, BPF_F_NO_PREALLOC); +} smc_policy_ip SEC(".maps"); + +static bool smc_check(__u32 src, __u32 dst) +{ + struct smc_policy_ip_value *value; + struct smc_policy_ip_key key = { + .sip = src, + .dip = dst, + }; + + value = bpf_map_lookup_elem(&smc_policy_ip, &key); + return value ? value->mode : default_ip_strat_value; +} + +SEC("fmod_ret/update_socket_protocol") +int BPF_PROG(smc_run, int family, int type, int protocol) +{ + struct task_struct *task; + + if (family != AF_INET && family != AF_INET6) + return protocol; + + if ((type & 0xf) != SOCK_STREAM) + return protocol; + + if (protocol != 0 && protocol != IPPROTO_TCP) + return protocol; + + task = bpf_get_current_task_btf(); + /* Prevent from affecting other tests */ + if (!task || !task->nsproxy->net_ns->smc.hs_ctrl) + return protocol; + + return IPPROTO_SMC; +} + +SEC("struct_ops") +int BPF_PROG(bpf_smc_set_tcp_option_cond, const struct tcp_sock *tp, + struct inet_request_sock *ireq) +{ + return smc_check(ireq->req.__req_common.skc_daddr, + ireq->req.__req_common.skc_rcv_saddr); +} + +SEC("struct_ops") +int BPF_PROG(bpf_smc_set_tcp_option, struct tcp_sock *tp) +{ + return smc_check(tp->inet_conn.icsk_inet.sk.__sk_common.skc_rcv_saddr, + tp->inet_conn.icsk_inet.sk.__sk_common.skc_daddr); +} + +SEC(".struct_ops") +struct smc_hs_ctrl linkcheck = { + .name = "linkcheck", + .syn_option = (void *)bpf_smc_set_tcp_option, + .synack_option = (void *)bpf_smc_set_tcp_option_cond, +}; From 1534ff77757e44bcc4b98d0196bc5c0052fce5fa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Nov 2025 11:10:54 +0000 Subject: [PATCH 698/867] sctp: prevent possible shift-out-of-bounds in sctp_transport_update_rto syzbot reported a possible shift-out-of-bounds [1] Blamed commit added rto_alpha_max and rto_beta_max set to 1000. It is unclear if some sctp users are setting very large rto_alpha and/or rto_beta. In order to prevent user regression, perform the test at run time. Also add READ_ONCE() annotations as sysctl values can change under us. [1] UBSAN: shift-out-of-bounds in net/sctp/transport.c:509:41 shift exponent 64 is too large for 32-bit type 'unsigned int' CPU: 0 UID: 0 PID: 16704 Comm: syz.2.2320 Not tainted syzkaller #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/02/2025 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x16c/0x1f0 lib/dump_stack.c:120 ubsan_epilogue lib/ubsan.c:233 [inline] __ubsan_handle_shift_out_of_bounds+0x27f/0x420 lib/ubsan.c:494 sctp_transport_update_rto.cold+0x1c/0x34b net/sctp/transport.c:509 sctp_check_transmitted+0x11c4/0x1c30 net/sctp/outqueue.c:1502 sctp_outq_sack+0x4ef/0x1b20 net/sctp/outqueue.c:1338 sctp_cmd_process_sack net/sctp/sm_sideeffect.c:840 [inline] sctp_cmd_interpreter net/sctp/sm_sideeffect.c:1372 [inline] Fixes: b58537a1f562 ("net: sctp: fix permissions for rto_alpha and rto_beta knobs") Reported-by: syzbot+f8c46c8b2b7f6e076e99@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/690c81ae.050a0220.3d0d33.014e.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Daniel Borkmann Acked-by: Xin Long Link: https://patch.msgid.link/20251106111054.3288127-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sctp/transport.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 0d48c61fe6ade..0c56d9673cc13 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -486,6 +486,7 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) if (tp->rttvar || tp->srtt) { struct net *net = tp->asoc->base.net; + unsigned int rto_beta, rto_alpha; /* 6.3.1 C3) When a new RTT measurement R' is made, set * RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'| * SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R' @@ -497,10 +498,14 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) * For example, assuming the default value of RTO.Alpha of * 1/8, rto_alpha would be expressed as 3. */ - tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta) - + (((__u32)abs((__s64)tp->srtt - (__s64)rtt)) >> net->sctp.rto_beta); - tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha) - + (rtt >> net->sctp.rto_alpha); + rto_beta = READ_ONCE(net->sctp.rto_beta); + if (rto_beta < 32) + tp->rttvar = tp->rttvar - (tp->rttvar >> rto_beta) + + (((__u32)abs((__s64)tp->srtt - (__s64)rtt)) >> rto_beta); + rto_alpha = READ_ONCE(net->sctp.rto_alpha); + if (rto_alpha < 32) + tp->srtt = tp->srtt - (tp->srtt >> rto_alpha) + + (rtt >> rto_alpha); } else { /* 6.3.1 C2) When the first RTT measurement R is made, set * SRTT <- R, RTTVAR <- R/2. From 73edb26b06ea0eb5bd8c6cae5b2df212ae3c7ab5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 6 Nov 2025 22:34:06 +0000 Subject: [PATCH 699/867] sctp: Don't inherit do_auto_asconf in sctp_clone_sock(). syzbot reported list_del(&sp->auto_asconf_list) corruption in sctp_destroy_sock(). The repro calls setsockopt(SCTP_AUTO_ASCONF, 1) to a SCTP listener, calls accept(), and close()s the child socket. setsockopt(SCTP_AUTO_ASCONF, 1) sets sp->do_auto_asconf to 1 and links sp->auto_asconf_list to a per-netns list. Both fields are placed after sp->pd_lobby in struct sctp_sock, and sctp_copy_descendant() did not copy the fields before the cited commit. Also, sctp_clone_sock() did not set them explicitly. In addition, sctp_auto_asconf_init() is called from sctp_sock_migrate(), but it initialises the fields only conditionally. The two fields relied on __GFP_ZERO added in sk_alloc(), but sk_clone() does not use it. Let's clear newsp->do_auto_asconf in sctp_clone_sock(). [0]: list_del corruption. prev->next should be ffff8880799e9148, but was ffff8880799e8808. (prev=ffff88803347d9f8) kernel BUG at lib/list_debug.c:64! Oops: invalid opcode: 0000 [#1] SMP KASAN PTI CPU: 0 UID: 0 PID: 6008 Comm: syz.0.17 Not tainted syzkaller #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/02/2025 RIP: 0010:__list_del_entry_valid_or_report+0x15a/0x190 lib/list_debug.c:62 Code: e8 7b 26 71 fd 43 80 3c 2c 00 74 08 4c 89 ff e8 7c ee 92 fd 49 8b 17 48 c7 c7 80 0a bf 8b 48 89 de 4c 89 f9 e8 07 c6 94 fc 90 <0f> 0b 4c 89 f7 e8 4c 26 71 fd 43 80 3c 2c 00 74 08 4c 89 ff e8 4d RSP: 0018:ffffc90003067ad8 EFLAGS: 00010246 RAX: 000000000000006d RBX: ffff8880799e9148 RCX: b056988859ee6e00 RDX: 0000000000000000 RSI: 0000000000000202 RDI: 0000000000000000 RBP: dffffc0000000000 R08: ffffc90003067807 R09: 1ffff9200060cf00 R10: dffffc0000000000 R11: fffff5200060cf01 R12: 1ffff1100668fb3f R13: dffffc0000000000 R14: ffff88803347d9f8 R15: ffff88803347d9f8 FS: 00005555823e5500(0000) GS:ffff88812613e000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000200000000480 CR3: 00000000741ce000 CR4: 00000000003526f0 Call Trace: __list_del_entry_valid include/linux/list.h:132 [inline] __list_del_entry include/linux/list.h:223 [inline] list_del include/linux/list.h:237 [inline] sctp_destroy_sock+0xb4/0x370 net/sctp/socket.c:5163 sk_common_release+0x75/0x310 net/core/sock.c:3961 sctp_close+0x77e/0x900 net/sctp/socket.c:1550 inet_release+0x144/0x190 net/ipv4/af_inet.c:437 __sock_release net/socket.c:662 [inline] sock_close+0xc3/0x240 net/socket.c:1455 __fput+0x44c/0xa70 fs/file_table.c:468 task_work_run+0x1d4/0x260 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop+0xe9/0x130 kernel/entry/common.c:43 exit_to_user_mode_prepare include/linux/irq-entry-common.h:225 [inline] syscall_exit_to_user_mode_work include/linux/entry-common.h:175 [inline] syscall_exit_to_user_mode include/linux/entry-common.h:210 [inline] do_syscall_64+0x2bd/0xfa0 arch/x86/entry/syscall_64.c:100 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 16942cf4d3e3 ("sctp: Use sk_clone() in sctp_accept().") Reported-by: syzbot+ba535cb417f106327741@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/690d2185.a70a0220.22f260.000e.GAE@google.com/ Signed-off-by: Kuniyuki Iwashima Acked-by: Xin Long Link: https://patch.msgid.link/20251106223418.1455510-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/sctp/structs.h | 4 ---- net/sctp/socket.c | 1 + 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 5900196d65fd1..affee44bd38e3 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -228,10 +228,6 @@ struct sctp_sock { atomic_t pd_mode; - /* Fields after this point will be skipped on copies, like on accept - * and peeloff operations - */ - /* Receive to here while partial delivery is in effect. */ struct sk_buff_head pd_lobby; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 38d2932acebfc..d808096f5ab17 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4885,6 +4885,7 @@ static struct sock *sctp_clone_sock(struct sock *sk, } #endif + newsp->do_auto_asconf = 0; skb_queue_head_init(&newsp->pd_lobby); newsp->ep = sctp_endpoint_new(newsk, GFP_KERNEL); From 5b9192c2c075edf8a35f8c6c2b5ef36cdc8ce9f9 Mon Sep 17 00:00:00 2001 From: Zhongqiu Han Date: Fri, 7 Nov 2025 15:45:33 +0800 Subject: [PATCH 700/867] ptp: ocp: Document sysfs output format for backward compatibility Add a comment to ptp_ocp_tty_show() explaining that the sysfs output intentionally does not include a trailing newline. This is required for backward compatibility with existing userspace software that reads the sysfs attribute and uses the value directly as a device path. A previous attempt to add a newline to align with common kernel conventions broke userspace applications that were opening device paths like "/dev/ttyS4\n" instead of "/dev/ttyS4", resulting in ENOENT errors. This comment prevents future attempts to "fix" this behavior, which would break existing userspace applications. Link: https://lore.kernel.org/netdev/20251030124519.1828058-1-zhongqiu.han@oss.qualcomm.com/ Link: https://lore.kernel.org/netdev/aef3b850-5f38-4c28-a018-3b0006dc2f08@linux.dev/ Suggested-by: Jakub Kicinski Signed-off-by: Zhongqiu Han Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20251107074533.416048-1-zhongqiu.han@oss.qualcomm.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_ocp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index a5c3632529862..eeebe4d149f71 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -3430,6 +3430,12 @@ ptp_ocp_tty_show(struct device *dev, struct device_attribute *attr, char *buf) struct dev_ext_attribute *ea = to_ext_attr(attr); struct ptp_ocp *bp = dev_get_drvdata(dev); + /* + * NOTE: This output does not include a trailing newline for backward + * compatibility. Existing userspace software uses this value directly + * as a device path (e.g., "/dev/ttyS4"), and adding a newline would + * break those applications. Do not add a newline to this output. + */ return sysfs_emit(buf, "ttyS%d", bp->port[(uintptr_t)ea->var].line); } From 38f073a71e85c726b09f935e7886de72bc57b15b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Fri, 7 Nov 2025 21:01:00 +0100 Subject: [PATCH 701/867] net: ravb: Correct bad check of timestamp control flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When converting the Renesas network drivers to use flags from enum hwtstamp_rx_filters to control when to timestamp packages instead of a driver specific schema with bit-wise flags an error was made. The bit-wise driver specific flags correct logic to set get_ts was: q: RAVB_BE + tstamp_rx_ctrl: 0 => 0 q: RAVB_NC + tstamp_rx_ctrl: 0 => 0 q: RAVB_BE + tstamp_rx_ctrl: RAVB_RXTSTAMP_TYPE_V2_L2_EVENT => 0 q: RAVB_NC + tstamp_rx_ctrl: RAVB_RXTSTAMP_TYPE_V2_L2_EVENT => 1 q: RAVB_BE + tstamp_rx_ctrl: RAVB_RXTSTAMP_TYPE_ALL => 1 q: RAVB_NC + tstamp_rx_ctrl: RAVB_RXTSTAMP_TYPE_ALL => 1 The converted logic to use enum flags mapped tstamp_rx_ctrl as 0 to HWTSTAMP_FILTER_NONE RAVB_RXTSTAMP_TYPE_V2_L2_EVENT to HWTSTAMP_FILTER_PTP_V2_L2_EVENT RAVB_RXTSTAMP_TYPE_ALL to HWTSTAMP_FILTER_ALL But the logic was incorrectly changed to: q: RAVB_BE + tstamp_rx_ctrl: HWTSTAMP_FILTER_NONE => 1 (error) q: RAVB_NC + tstamp_rx_ctrl: HWTSTAMP_FILTER_NONE => 0 q: RAVB_BE + tstamp_rx_ctrl: HWTSTAMP_FILTER_PTP_V2_L2_EVENT => 0 q: RAVB_NC + tstamp_rx_ctrl: HWTSTAMP_FILTER_PTP_V2_L2_EVENT => 1 q: RAVB_BE + tstamp_rx_ctrl: HWTSTAMP_FILTER_ALL => 1 q: RAVB_NC + tstamp_rx_ctrl: HWTSTAMP_FILTER_ALL => 0 (error) This change restores the converted flag check to the correct logic of the bit-wise driver specific flags. Reported-by: Simon Horman Closes: https://lore.kernel.org/linux-renesas-soc/aQ4xSv9629XF-Bt3@horms.kernel.org/ Fixes: 16e2e6cf75e6 ("net: ravb: Use common defines for time stamping control") Signed-off-by: Niklas Söderlund Link: https://patch.msgid.link/20251107200100.3637869-1-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 1680e94b92425..57b0db314fb5e 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -955,9 +955,9 @@ static void ravb_rx_rcar_hwstamp(struct ravb_private *priv, int q, bool get_ts; if (q == RAVB_NC) - get_ts = priv->tstamp_rx_ctrl == HWTSTAMP_FILTER_PTP_V2_L2_EVENT; + get_ts = priv->tstamp_rx_ctrl != HWTSTAMP_FILTER_NONE; else - get_ts = priv->tstamp_rx_ctrl != HWTSTAMP_FILTER_PTP_V2_L2_EVENT; + get_ts = priv->tstamp_rx_ctrl == HWTSTAMP_FILTER_ALL; if (!get_ts) return; From e781122d76f018ad17752ab1018b3ffbf7fad84e Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Thu, 6 Nov 2025 17:56:20 -0300 Subject: [PATCH 702/867] net/sched: Abort __tc_modify_qdisc if parent is a clsact/ingress qdisc Wang reported an illegal configuration [1] where the user attempts to add a child qdisc to the ingress qdisc as follows: tc qdisc add dev eth0 handle ffff:0 ingress tc qdisc add dev eth0 handle ffe0:0 parent ffff:a fq To solve this, we reject any configuration attempt to add a child qdisc to ingress or clsact. [1] https://lore.kernel.org/netdev/20251105022213.1981982-1-wangliang74@huawei.com/ Fixes: 5e50da01d0ce ("[NET_SCHED]: Fix endless loops (part 2): "simple" qdiscs") Reported-by: Wang Liang Closes: https://lore.kernel.org/netdev/20251105022213.1981982-1-wangliang74@huawei.com/ Reviewed-by: Pedro Tammela Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Reviewed-by: Cong Wang Link: https://patch.msgid.link/20251106205621.3307639-1-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/sch_api.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 1e058b46d3e17..f56b18c8aebf7 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1599,6 +1599,11 @@ static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, NL_SET_ERR_MSG(extack, "Failed to find specified qdisc"); return -ENOENT; } + if (p->flags & TCQ_F_INGRESS) { + NL_SET_ERR_MSG(extack, + "Cannot add children to ingress/clsact qdisc"); + return -EOPNOTSUPP; + } q = qdisc_leaf(p, clid, extack); if (IS_ERR(q)) return PTR_ERR(q); From 60260ad935861f6b8db7c65c23faa41c98d8fb15 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Thu, 6 Nov 2025 17:56:21 -0300 Subject: [PATCH 703/867] selftests/tc-testing: Create tests trying to add children to clsact/ingress qdiscs In response to Wang's bug report [1], add the following test cases: - Try and fail to add an fq child to an ingress qdisc - Try and fail to add an fq child to a clsact qdisc [1] https://lore.kernel.org/netdev/20251105022213.1981982-1-wangliang74@huawei.com/ Reviewed-by: Pedro Tammela Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Reviewed-by: Cong Wang Link: https://patch.msgid.link/20251106205621.3307639-2-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 998e5a2f45796..0091bcd91c2cc 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -961,5 +961,49 @@ "teardown": [ "$TC qdisc del dev $DUMMY root" ] + }, + { + "id": "4989", + "name": "Try to add an fq child to an ingress qdisc", + "category": [ + "qdisc", + "ingress" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY handle ffff:0 ingress" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent ffff:0 handle ffe0:0 fq", + "expExitCode": "2", + "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle ffe0:", + "matchJSON": [], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress" + ] + }, + { + "id": "c2b0", + "name": "Try to add an fq child to a clsact qdisc", + "category": [ + "qdisc", + "ingress" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY handle ffff:0 clsact" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent ffff:0 handle ffe0:0 fq", + "expExitCode": "2", + "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle ffe0:", + "matchJSON": [], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY clsact" + ] } ] From 762e7e174da91cf4babfe77e45bc6b67334b1503 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 9 Nov 2025 14:46:35 +0100 Subject: [PATCH 704/867] net: dsa: tag_brcm: do not mark link local traffic as offloaded Broadcom switches locally terminate link local traffic and do not forward it, so we should not mark it as offloaded. In some situations we still want/need to flood this traffic, e.g. if STP is disabled, or it is explicitly enabled via the group_fwd_mask. But if the skb is marked as offloaded, the kernel will assume this was already done in hardware, and the packets never reach other bridge ports. So ensure that link local traffic is never marked as offloaded, so that the kernel can forward/flood these packets in software if needed. Since the local termination in not configurable, check the destination MAC, and never mark packets as offloaded if it is a link local ether address. While modern switches set the tag reason code to BRCM_EG_RC_PROT_TERM for trapped link local traffic, they also set it for link local traffic that is flooded (01:80:c2:00:00:10 to 01:80:c2:00:00:2f), so we cannot use it and need to look at the destination address for them as well. Fixes: 964dbf186eaa ("net: dsa: tag_brcm: add support for legacy tags") Fixes: 0e62f543bed0 ("net: dsa: Fix duplicate frames flooded by learning") Signed-off-by: Jonas Gorski Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251109134635.243951-1-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- net/dsa/tag_brcm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index d9c77fa553b53..eadb358179ce3 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -176,7 +176,8 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, /* Remove Broadcom tag and update checksum */ skb_pull_rcsum(skb, BRCM_TAG_LEN); - dsa_default_offload_fwd_mark(skb); + if (likely(!is_link_local_ether_addr(eth_hdr(skb)->h_dest))) + dsa_default_offload_fwd_mark(skb); return skb; } @@ -250,7 +251,8 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, /* Remove Broadcom tag and update checksum */ skb_pull_rcsum(skb, len); - dsa_default_offload_fwd_mark(skb); + if (likely(!is_link_local_ether_addr(eth_hdr(skb)->h_dest))) + dsa_default_offload_fwd_mark(skb); dsa_strip_etype_header(skb, len); From a6e4fd38bf2f2e2363b61c27f4e6c49b14e4bb07 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Fri, 7 Nov 2025 09:07:42 +0100 Subject: [PATCH 705/867] net: dsa: b53: b53_arl_read{,25}(): use the entry for comparision Align the b53_arl_read{,25}() functions by consistently using the parsed arl entry instead of parsing the raw registers again. Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251107080749.26936-2-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index c911d7ea601fe..1b94cf7b06e89 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1850,7 +1850,7 @@ static int b53_arl_rw_op(struct b53_device *dev, unsigned int op) return b53_arl_op_wait(dev); } -static int b53_arl_read(struct b53_device *dev, u64 mac, +static int b53_arl_read(struct b53_device *dev, const u8 *mac, u16 vid, struct b53_arl_entry *ent, u8 *idx) { DECLARE_BITMAP(free_bins, B53_ARLTBL_MAX_BIN_ENTRIES); @@ -1874,14 +1874,13 @@ static int b53_arl_read(struct b53_device *dev, u64 mac, B53_ARLTBL_DATA_ENTRY(i), &fwd_entry); b53_arl_to_entry(ent, mac_vid, fwd_entry); - if (!(fwd_entry & ARLTBL_VALID)) { + if (!ent->is_valid) { set_bit(i, free_bins); continue; } - if ((mac_vid & ARLTBL_MAC_MASK) != mac) + if (!ether_addr_equal(ent->mac, mac)) continue; - if (dev->vlan_enabled && - ((mac_vid >> ARLTBL_VID_S) & ARLTBL_VID_MASK) != vid) + if (dev->vlan_enabled && ent->vid != vid) continue; *idx = i; return 0; @@ -1891,7 +1890,7 @@ static int b53_arl_read(struct b53_device *dev, u64 mac, return *idx >= dev->num_arl_bins ? -ENOSPC : -ENOENT; } -static int b53_arl_read_25(struct b53_device *dev, u64 mac, +static int b53_arl_read_25(struct b53_device *dev, const u8 *mac, u16 vid, struct b53_arl_entry *ent, u8 *idx) { DECLARE_BITMAP(free_bins, B53_ARLTBL_MAX_BIN_ENTRIES); @@ -1913,14 +1912,13 @@ static int b53_arl_read_25(struct b53_device *dev, u64 mac, b53_arl_to_entry_25(ent, mac_vid); - if (!(mac_vid & ARLTBL_VALID_25)) { + if (!ent->is_valid) { set_bit(i, free_bins); continue; } - if ((mac_vid & ARLTBL_MAC_MASK) != mac) + if (!ether_addr_equal(ent->mac, mac)) continue; - if (dev->vlan_enabled && - ((mac_vid >> ARLTBL_VID_S_65) & ARLTBL_VID_MASK_25) != vid) + if (dev->vlan_enabled && ent->vid != vid) continue; *idx = i; return 0; @@ -1953,9 +1951,9 @@ static int b53_arl_op(struct b53_device *dev, int op, int port, return ret; if (is5325(dev) || is5365(dev)) - ret = b53_arl_read_25(dev, mac, vid, &ent, &idx); + ret = b53_arl_read_25(dev, addr, vid, &ent, &idx); else - ret = b53_arl_read(dev, mac, vid, &ent, &idx); + ret = b53_arl_read(dev, addr, vid, &ent, &idx); /* If this is a read, just finish now */ if (op) From 4a291fe7226736a465ddb3fa93c21fcef7162ec7 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Fri, 7 Nov 2025 09:07:43 +0100 Subject: [PATCH 706/867] net: dsa: b53: move reading ARL entries into their own function Instead of duplicating the whole code iterating over all bins for BCM5325, factor out reading and parsing the entry into its own functions, and name it the modern one after the first chip with that ARL format, (BCM53)95. Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251107080749.26936-3-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 69 +++++++++++--------------------- 1 file changed, 23 insertions(+), 46 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 1b94cf7b06e89..d99e15a7a6bb9 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1850,48 +1850,30 @@ static int b53_arl_rw_op(struct b53_device *dev, unsigned int op) return b53_arl_op_wait(dev); } -static int b53_arl_read(struct b53_device *dev, const u8 *mac, - u16 vid, struct b53_arl_entry *ent, u8 *idx) +static void b53_arl_read_entry_25(struct b53_device *dev, + struct b53_arl_entry *ent, u8 idx) { - DECLARE_BITMAP(free_bins, B53_ARLTBL_MAX_BIN_ENTRIES); - unsigned int i; - int ret; - - ret = b53_arl_op_wait(dev); - if (ret) - return ret; - - bitmap_zero(free_bins, dev->num_arl_bins); - - /* Read the bins */ - for (i = 0; i < dev->num_arl_bins; i++) { - u64 mac_vid; - u32 fwd_entry; + u64 mac_vid; - b53_read64(dev, B53_ARLIO_PAGE, - B53_ARLTBL_MAC_VID_ENTRY(i), &mac_vid); - b53_read32(dev, B53_ARLIO_PAGE, - B53_ARLTBL_DATA_ENTRY(i), &fwd_entry); - b53_arl_to_entry(ent, mac_vid, fwd_entry); + b53_read64(dev, B53_ARLIO_PAGE, B53_ARLTBL_MAC_VID_ENTRY(idx), + &mac_vid); + b53_arl_to_entry_25(ent, mac_vid); +} - if (!ent->is_valid) { - set_bit(i, free_bins); - continue; - } - if (!ether_addr_equal(ent->mac, mac)) - continue; - if (dev->vlan_enabled && ent->vid != vid) - continue; - *idx = i; - return 0; - } +static void b53_arl_read_entry_95(struct b53_device *dev, + struct b53_arl_entry *ent, u8 idx) +{ + u32 fwd_entry; + u64 mac_vid; - *idx = find_first_bit(free_bins, dev->num_arl_bins); - return *idx >= dev->num_arl_bins ? -ENOSPC : -ENOENT; + b53_read64(dev, B53_ARLIO_PAGE, B53_ARLTBL_MAC_VID_ENTRY(idx), + &mac_vid); + b53_read32(dev, B53_ARLIO_PAGE, B53_ARLTBL_DATA_ENTRY(idx), &fwd_entry); + b53_arl_to_entry(ent, mac_vid, fwd_entry); } -static int b53_arl_read_25(struct b53_device *dev, const u8 *mac, - u16 vid, struct b53_arl_entry *ent, u8 *idx) +static int b53_arl_read(struct b53_device *dev, const u8 *mac, + u16 vid, struct b53_arl_entry *ent, u8 *idx) { DECLARE_BITMAP(free_bins, B53_ARLTBL_MAX_BIN_ENTRIES); unsigned int i; @@ -1905,12 +1887,10 @@ static int b53_arl_read_25(struct b53_device *dev, const u8 *mac, /* Read the bins */ for (i = 0; i < dev->num_arl_bins; i++) { - u64 mac_vid; - - b53_read64(dev, B53_ARLIO_PAGE, - B53_ARLTBL_MAC_VID_ENTRY(i), &mac_vid); - - b53_arl_to_entry_25(ent, mac_vid); + if (is5325(dev) || is5365(dev)) + b53_arl_read_entry_25(dev, ent, i); + else + b53_arl_read_entry_95(dev, ent, i); if (!ent->is_valid) { set_bit(i, free_bins); @@ -1950,10 +1930,7 @@ static int b53_arl_op(struct b53_device *dev, int op, int port, if (ret) return ret; - if (is5325(dev) || is5365(dev)) - ret = b53_arl_read_25(dev, addr, vid, &ent, &idx); - else - ret = b53_arl_read(dev, addr, vid, &ent, &idx); + ret = b53_arl_read(dev, addr, vid, &ent, &idx); /* If this is a read, just finish now */ if (op) From bf6e9d2ae1dbafee53ec4ccd126595172e1e5278 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Fri, 7 Nov 2025 09:07:44 +0100 Subject: [PATCH 707/867] net: dsa: b53: move writing ARL entries into their own functions Move writing ARL entries into individual functions for each format. Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251107080749.26936-4-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 38 ++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index d99e15a7a6bb9..9eb7ca878e301 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1860,6 +1860,16 @@ static void b53_arl_read_entry_25(struct b53_device *dev, b53_arl_to_entry_25(ent, mac_vid); } +static void b53_arl_write_entry_25(struct b53_device *dev, + const struct b53_arl_entry *ent, u8 idx) +{ + u64 mac_vid; + + b53_arl_from_entry_25(&mac_vid, ent); + b53_write64(dev, B53_ARLIO_PAGE, B53_ARLTBL_MAC_VID_ENTRY(idx), + mac_vid); +} + static void b53_arl_read_entry_95(struct b53_device *dev, struct b53_arl_entry *ent, u8 idx) { @@ -1872,6 +1882,19 @@ static void b53_arl_read_entry_95(struct b53_device *dev, b53_arl_to_entry(ent, mac_vid, fwd_entry); } +static void b53_arl_write_entry_95(struct b53_device *dev, + const struct b53_arl_entry *ent, u8 idx) +{ + u32 fwd_entry; + u64 mac_vid; + + b53_arl_from_entry(&mac_vid, &fwd_entry, ent); + b53_write64(dev, B53_ARLIO_PAGE, B53_ARLTBL_MAC_VID_ENTRY(idx), + mac_vid); + b53_write32(dev, B53_ARLIO_PAGE, B53_ARLTBL_DATA_ENTRY(idx), + fwd_entry); +} + static int b53_arl_read(struct b53_device *dev, const u8 *mac, u16 vid, struct b53_arl_entry *ent, u8 *idx) { @@ -1912,9 +1935,8 @@ static int b53_arl_op(struct b53_device *dev, int op, int port, const unsigned char *addr, u16 vid, bool is_valid) { struct b53_arl_entry ent; - u32 fwd_entry; - u64 mac, mac_vid = 0; u8 idx = 0; + u64 mac; int ret; /* Convert the array into a 64-bit MAC */ @@ -1947,7 +1969,6 @@ static int b53_arl_op(struct b53_device *dev, int op, int port, /* We could not find a matching MAC, so reset to a new entry */ dev_dbg(dev->dev, "{%pM,%.4d} not found, using idx: %d\n", addr, vid, idx); - fwd_entry = 0; break; default: dev_dbg(dev->dev, "{%pM,%.4d} found, using idx: %d\n", @@ -1975,16 +1996,9 @@ static int b53_arl_op(struct b53_device *dev, int op, int port, ent.is_age = false; memcpy(ent.mac, addr, ETH_ALEN); if (is5325(dev) || is5365(dev)) - b53_arl_from_entry_25(&mac_vid, &ent); + b53_arl_write_entry_25(dev, &ent, idx); else - b53_arl_from_entry(&mac_vid, &fwd_entry, &ent); - - b53_write64(dev, B53_ARLIO_PAGE, - B53_ARLTBL_MAC_VID_ENTRY(idx), mac_vid); - - if (!is5325(dev) && !is5365(dev)) - b53_write32(dev, B53_ARLIO_PAGE, - B53_ARLTBL_DATA_ENTRY(idx), fwd_entry); + b53_arl_write_entry_95(dev, &ent, idx); return b53_arl_rw_op(dev, 0); } From 1716be6db04af53bac9b869f01156a460595cf41 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Fri, 7 Nov 2025 09:07:45 +0100 Subject: [PATCH 708/867] net: dsa: b53: provide accessors for accessing ARL_SRCH_CTL In order to more easily support more formats, move accessing ARL_SRCH_CTL into helper functions to contain the differences. Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251107080749.26936-5-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 37 +++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 9eb7ca878e301..b13437ea21a09 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2033,18 +2033,37 @@ int b53_fdb_del(struct dsa_switch *ds, int port, } EXPORT_SYMBOL(b53_fdb_del); -static int b53_arl_search_wait(struct b53_device *dev) +static void b53_read_arl_srch_ctl(struct b53_device *dev, u8 *val) { - unsigned int timeout = 1000; - u8 reg, offset; + u8 offset; + + if (is5325(dev) || is5365(dev)) + offset = B53_ARL_SRCH_CTL_25; + else + offset = B53_ARL_SRCH_CTL; + + b53_read8(dev, B53_ARLIO_PAGE, offset, val); +} + +static void b53_write_arl_srch_ctl(struct b53_device *dev, u8 val) +{ + u8 offset; if (is5325(dev) || is5365(dev)) offset = B53_ARL_SRCH_CTL_25; else offset = B53_ARL_SRCH_CTL; + b53_write8(dev, B53_ARLIO_PAGE, offset, val); +} + +static int b53_arl_search_wait(struct b53_device *dev) +{ + unsigned int timeout = 1000; + u8 reg; + do { - b53_read8(dev, B53_ARLIO_PAGE, offset, ®); + b53_read_arl_srch_ctl(dev, ®); if (!(reg & ARL_SRCH_STDN)) return -ENOENT; @@ -2099,23 +2118,15 @@ int b53_fdb_dump(struct dsa_switch *ds, int port, unsigned int count = 0, results_per_hit = 1; struct b53_device *priv = ds->priv; struct b53_arl_entry results[2]; - u8 offset; int ret; - u8 reg; if (priv->num_arl_bins > 2) results_per_hit = 2; mutex_lock(&priv->arl_mutex); - if (is5325(priv) || is5365(priv)) - offset = B53_ARL_SRCH_CTL_25; - else - offset = B53_ARL_SRCH_CTL; - /* Start search operation */ - reg = ARL_SRCH_STDN; - b53_write8(priv, B53_ARLIO_PAGE, offset, reg); + b53_write_arl_srch_ctl(priv, ARL_SRCH_STDN); do { ret = b53_arl_search_wait(priv); From e0c476f325a8c9b961a3d446c24d3c8ecae7d186 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Fri, 7 Nov 2025 09:07:46 +0100 Subject: [PATCH 709/867] net: dsa: b53: split reading search entry into their own functions Split reading search entries into a function for each format. Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251107080749.26936-6-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 56 ++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index b13437ea21a09..fa4cf6ceddb8c 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2076,28 +2076,48 @@ static int b53_arl_search_wait(struct b53_device *dev) return -ETIMEDOUT; } -static void b53_arl_search_rd(struct b53_device *dev, u8 idx, - struct b53_arl_entry *ent) +static void b53_arl_search_read_25(struct b53_device *dev, u8 idx, + struct b53_arl_entry *ent) { u64 mac_vid; - if (is5325(dev)) { - b53_read64(dev, B53_ARLIO_PAGE, B53_ARL_SRCH_RSTL_0_MACVID_25, - &mac_vid); - b53_arl_to_entry_25(ent, mac_vid); - } else if (is5365(dev)) { - b53_read64(dev, B53_ARLIO_PAGE, B53_ARL_SRCH_RSTL_0_MACVID_65, - &mac_vid); - b53_arl_to_entry_25(ent, mac_vid); - } else { - u32 fwd_entry; + b53_read64(dev, B53_ARLIO_PAGE, B53_ARL_SRCH_RSTL_0_MACVID_25, + &mac_vid); + b53_arl_to_entry_25(ent, mac_vid); +} - b53_read64(dev, B53_ARLIO_PAGE, B53_ARL_SRCH_RSTL_MACVID(idx), - &mac_vid); - b53_read32(dev, B53_ARLIO_PAGE, B53_ARL_SRCH_RSTL(idx), - &fwd_entry); - b53_arl_to_entry(ent, mac_vid, fwd_entry); - } +static void b53_arl_search_read_65(struct b53_device *dev, u8 idx, + struct b53_arl_entry *ent) +{ + u64 mac_vid; + + b53_read64(dev, B53_ARLIO_PAGE, B53_ARL_SRCH_RSTL_0_MACVID_65, + &mac_vid); + b53_arl_to_entry_25(ent, mac_vid); +} + +static void b53_arl_search_read_95(struct b53_device *dev, u8 idx, + struct b53_arl_entry *ent) +{ + u32 fwd_entry; + u64 mac_vid; + + b53_read64(dev, B53_ARLIO_PAGE, B53_ARL_SRCH_RSTL_MACVID(idx), + &mac_vid); + b53_read32(dev, B53_ARLIO_PAGE, B53_ARL_SRCH_RSTL(idx), + &fwd_entry); + b53_arl_to_entry(ent, mac_vid, fwd_entry); +} + +static void b53_arl_search_rd(struct b53_device *dev, u8 idx, + struct b53_arl_entry *ent) +{ + if (is5325(dev)) + b53_arl_search_read_25(dev, idx, ent); + else if (is5365(dev)) + b53_arl_search_read_65(dev, idx, ent); + else + b53_arl_search_read_95(dev, idx, ent); } static int b53_fdb_copy(int port, const struct b53_arl_entry *ent, From a7e73339ad46ade76d29fb6cc7d7854222608c26 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Fri, 7 Nov 2025 09:07:47 +0100 Subject: [PATCH 710/867] net: dsa: b53: move ARL entry functions into ops struct Now that the differences in ARL entry formats are neatly contained into functions per chip family, wrap them into an ops struct and add wrapper functions to access them. Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251107080749.26936-7-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 67 ++++++++++++++++++++++---------- drivers/net/dsa/b53/b53_priv.h | 30 ++++++++++++++ 2 files changed, 76 insertions(+), 21 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index fa4cf6ceddb8c..c69022cc85bf3 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1910,10 +1910,7 @@ static int b53_arl_read(struct b53_device *dev, const u8 *mac, /* Read the bins */ for (i = 0; i < dev->num_arl_bins; i++) { - if (is5325(dev) || is5365(dev)) - b53_arl_read_entry_25(dev, ent, i); - else - b53_arl_read_entry_95(dev, ent, i); + b53_arl_read_entry(dev, ent, i); if (!ent->is_valid) { set_bit(i, free_bins); @@ -1995,10 +1992,7 @@ static int b53_arl_op(struct b53_device *dev, int op, int port, ent.is_static = true; ent.is_age = false; memcpy(ent.mac, addr, ETH_ALEN); - if (is5325(dev) || is5365(dev)) - b53_arl_write_entry_25(dev, &ent, idx); - else - b53_arl_write_entry_95(dev, &ent, idx); + b53_arl_write_entry(dev, &ent, idx); return b53_arl_rw_op(dev, 0); } @@ -2109,17 +2103,6 @@ static void b53_arl_search_read_95(struct b53_device *dev, u8 idx, b53_arl_to_entry(ent, mac_vid, fwd_entry); } -static void b53_arl_search_rd(struct b53_device *dev, u8 idx, - struct b53_arl_entry *ent) -{ - if (is5325(dev)) - b53_arl_search_read_25(dev, idx, ent); - else if (is5365(dev)) - b53_arl_search_read_65(dev, idx, ent); - else - b53_arl_search_read_95(dev, idx, ent); -} - static int b53_fdb_copy(int port, const struct b53_arl_entry *ent, dsa_fdb_dump_cb_t *cb, void *data) { @@ -2153,13 +2136,13 @@ int b53_fdb_dump(struct dsa_switch *ds, int port, if (ret) break; - b53_arl_search_rd(priv, 0, &results[0]); + b53_arl_search_read(priv, 0, &results[0]); ret = b53_fdb_copy(port, &results[0], cb, data); if (ret) break; if (results_per_hit == 2) { - b53_arl_search_rd(priv, 1, &results[1]); + b53_arl_search_read(priv, 1, &results[1]); ret = b53_fdb_copy(port, &results[1], cb, data); if (ret) break; @@ -2688,6 +2671,24 @@ static const struct dsa_switch_ops b53_switch_ops = { .port_change_mtu = b53_change_mtu, }; +static const struct b53_arl_ops b53_arl_ops_25 = { + .arl_read_entry = b53_arl_read_entry_25, + .arl_write_entry = b53_arl_write_entry_25, + .arl_search_read = b53_arl_search_read_25, +}; + +static const struct b53_arl_ops b53_arl_ops_65 = { + .arl_read_entry = b53_arl_read_entry_25, + .arl_write_entry = b53_arl_write_entry_25, + .arl_search_read = b53_arl_search_read_65, +}; + +static const struct b53_arl_ops b53_arl_ops_95 = { + .arl_read_entry = b53_arl_read_entry_95, + .arl_write_entry = b53_arl_write_entry_95, + .arl_search_read = b53_arl_search_read_95, +}; + struct b53_chip_data { u32 chip_id; const char *dev_name; @@ -2701,6 +2702,7 @@ struct b53_chip_data { u8 duplex_reg; u8 jumbo_pm_reg; u8 jumbo_size_reg; + const struct b53_arl_ops *arl_ops; }; #define B53_VTA_REGS \ @@ -2720,6 +2722,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .arl_buckets = 1024, .imp_port = 5, .duplex_reg = B53_DUPLEX_STAT_FE, + .arl_ops = &b53_arl_ops_25, }, { .chip_id = BCM5365_DEVICE_ID, @@ -2730,6 +2733,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .arl_buckets = 1024, .imp_port = 5, .duplex_reg = B53_DUPLEX_STAT_FE, + .arl_ops = &b53_arl_ops_65, }, { .chip_id = BCM5389_DEVICE_ID, @@ -2743,6 +2747,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + .arl_ops = &b53_arl_ops_95, }, { .chip_id = BCM5395_DEVICE_ID, @@ -2756,6 +2761,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + .arl_ops = &b53_arl_ops_95, }, { .chip_id = BCM5397_DEVICE_ID, @@ -2769,6 +2775,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + .arl_ops = &b53_arl_ops_95, }, { .chip_id = BCM5398_DEVICE_ID, @@ -2782,6 +2789,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + .arl_ops = &b53_arl_ops_95, }, { .chip_id = BCM53101_DEVICE_ID, @@ -2795,6 +2803,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + .arl_ops = &b53_arl_ops_95, }, { .chip_id = BCM53115_DEVICE_ID, @@ -2808,6 +2817,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + .arl_ops = &b53_arl_ops_95, }, { .chip_id = BCM53125_DEVICE_ID, @@ -2821,6 +2831,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + .arl_ops = &b53_arl_ops_95, }, { .chip_id = BCM53128_DEVICE_ID, @@ -2834,6 +2845,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + .arl_ops = &b53_arl_ops_95, }, { .chip_id = BCM63XX_DEVICE_ID, @@ -2847,6 +2859,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_63XX, .jumbo_pm_reg = B53_JUMBO_PORT_MASK_63XX, .jumbo_size_reg = B53_JUMBO_MAX_SIZE_63XX, + .arl_ops = &b53_arl_ops_95, }, { .chip_id = BCM53010_DEVICE_ID, @@ -2860,6 +2873,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + .arl_ops = &b53_arl_ops_95, }, { .chip_id = BCM53011_DEVICE_ID, @@ -2873,6 +2887,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + .arl_ops = &b53_arl_ops_95, }, { .chip_id = BCM53012_DEVICE_ID, @@ -2886,6 +2901,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + .arl_ops = &b53_arl_ops_95, }, { .chip_id = BCM53018_DEVICE_ID, @@ -2899,6 +2915,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + .arl_ops = &b53_arl_ops_95, }, { .chip_id = BCM53019_DEVICE_ID, @@ -2912,6 +2929,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + .arl_ops = &b53_arl_ops_95, }, { .chip_id = BCM58XX_DEVICE_ID, @@ -2925,6 +2943,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + .arl_ops = &b53_arl_ops_95, }, { .chip_id = BCM583XX_DEVICE_ID, @@ -2938,6 +2957,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + .arl_ops = &b53_arl_ops_95, }, /* Starfighter 2 */ { @@ -2952,6 +2972,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + .arl_ops = &b53_arl_ops_95, }, { .chip_id = BCM7445_DEVICE_ID, @@ -2965,6 +2986,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + .arl_ops = &b53_arl_ops_95, }, { .chip_id = BCM7278_DEVICE_ID, @@ -2978,6 +3000,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + .arl_ops = &b53_arl_ops_95, }, { .chip_id = BCM53134_DEVICE_ID, @@ -2992,6 +3015,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + .arl_ops = &b53_arl_ops_95, }, }; @@ -3020,6 +3044,7 @@ static int b53_switch_init(struct b53_device *dev) dev->num_vlans = chip->vlans; dev->num_arl_bins = chip->arl_bins; dev->num_arl_buckets = chip->arl_buckets; + dev->arl_ops = chip->arl_ops; break; } } diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 458775f951643..ef2413509b5db 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -58,6 +58,17 @@ struct b53_io_ops { bool link_up); }; +struct b53_arl_entry; + +struct b53_arl_ops { + void (*arl_read_entry)(struct b53_device *dev, + struct b53_arl_entry *ent, u8 idx); + void (*arl_write_entry)(struct b53_device *dev, + const struct b53_arl_entry *ent, u8 idx); + void (*arl_search_read)(struct b53_device *dev, u8 idx, + struct b53_arl_entry *ent); +}; + #define B53_INVALID_LANE 0xff enum { @@ -127,6 +138,7 @@ struct b53_device { struct mutex stats_mutex; struct mutex arl_mutex; const struct b53_io_ops *ops; + const struct b53_arl_ops *arl_ops; /* chip specific data */ u32 chip_id; @@ -371,6 +383,24 @@ static inline void b53_arl_from_entry_25(u64 *mac_vid, *mac_vid |= ARLTBL_AGE_25; } +static inline void b53_arl_read_entry(struct b53_device *dev, + struct b53_arl_entry *ent, u8 idx) +{ + dev->arl_ops->arl_read_entry(dev, ent, idx); +} + +static inline void b53_arl_write_entry(struct b53_device *dev, + const struct b53_arl_entry *ent, u8 idx) +{ + dev->arl_ops->arl_write_entry(dev, ent, idx); +} + +static inline void b53_arl_search_read(struct b53_device *dev, u8 idx, + struct b53_arl_entry *ent) +{ + dev->arl_ops->arl_search_read(dev, idx, ent); +} + #ifdef CONFIG_BCM47XX #include From 300f78e8b6b7be17c2c78afeded75be68acb1aa7 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Fri, 7 Nov 2025 09:07:48 +0100 Subject: [PATCH 711/867] net: dsa: b53: add support for 5389/5397/5398 ARL entry format BCM5389, BCM5397 and BCM5398 use a different ARL entry format with just a 16 bit fwdentry register, as well as different search control and data offsets. So add appropriate ops for them and switch those chips to use them. Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251107080749.26936-8-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 53 ++++++++++++++++++++++++++++++-- drivers/net/dsa/b53/b53_priv.h | 26 ++++++++++++++++ drivers/net/dsa/b53/b53_regs.h | 13 ++++++++ 3 files changed, 89 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index c69022cc85bf3..73ea9adb95b71 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1870,6 +1870,31 @@ static void b53_arl_write_entry_25(struct b53_device *dev, mac_vid); } +static void b53_arl_read_entry_89(struct b53_device *dev, + struct b53_arl_entry *ent, u8 idx) +{ + u64 mac_vid; + u16 fwd_entry; + + b53_read64(dev, B53_ARLIO_PAGE, B53_ARLTBL_MAC_VID_ENTRY(idx), + &mac_vid); + b53_read16(dev, B53_ARLIO_PAGE, B53_ARLTBL_DATA_ENTRY(idx), &fwd_entry); + b53_arl_to_entry_89(ent, mac_vid, fwd_entry); +} + +static void b53_arl_write_entry_89(struct b53_device *dev, + const struct b53_arl_entry *ent, u8 idx) +{ + u32 fwd_entry; + u64 mac_vid; + + b53_arl_from_entry_89(&mac_vid, &fwd_entry, ent); + b53_write64(dev, B53_ARLIO_PAGE, + B53_ARLTBL_MAC_VID_ENTRY(idx), mac_vid); + b53_write16(dev, B53_ARLIO_PAGE, + B53_ARLTBL_DATA_ENTRY(idx), fwd_entry); +} + static void b53_arl_read_entry_95(struct b53_device *dev, struct b53_arl_entry *ent, u8 idx) { @@ -2033,6 +2058,8 @@ static void b53_read_arl_srch_ctl(struct b53_device *dev, u8 *val) if (is5325(dev) || is5365(dev)) offset = B53_ARL_SRCH_CTL_25; + else if (dev->chip_id == BCM5389_DEVICE_ID || is5397_98(dev)) + offset = B53_ARL_SRCH_CTL_89; else offset = B53_ARL_SRCH_CTL; @@ -2045,6 +2072,8 @@ static void b53_write_arl_srch_ctl(struct b53_device *dev, u8 val) if (is5325(dev) || is5365(dev)) offset = B53_ARL_SRCH_CTL_25; + else if (dev->chip_id == BCM5389_DEVICE_ID || is5397_98(dev)) + offset = B53_ARL_SRCH_CTL_89; else offset = B53_ARL_SRCH_CTL; @@ -2090,6 +2119,18 @@ static void b53_arl_search_read_65(struct b53_device *dev, u8 idx, b53_arl_to_entry_25(ent, mac_vid); } +static void b53_arl_search_read_89(struct b53_device *dev, u8 idx, + struct b53_arl_entry *ent) +{ + u16 fwd_entry; + u64 mac_vid; + + b53_read64(dev, B53_ARLIO_PAGE, B53_ARL_SRCH_RSLT_MACVID_89, + &mac_vid); + b53_read16(dev, B53_ARLIO_PAGE, B53_ARL_SRCH_RSLT_89, &fwd_entry); + b53_arl_to_entry_89(ent, mac_vid, fwd_entry); +} + static void b53_arl_search_read_95(struct b53_device *dev, u8 idx, struct b53_arl_entry *ent) { @@ -2683,6 +2724,12 @@ static const struct b53_arl_ops b53_arl_ops_65 = { .arl_search_read = b53_arl_search_read_65, }; +static const struct b53_arl_ops b53_arl_ops_89 = { + .arl_read_entry = b53_arl_read_entry_89, + .arl_write_entry = b53_arl_write_entry_89, + .arl_search_read = b53_arl_search_read_89, +}; + static const struct b53_arl_ops b53_arl_ops_95 = { .arl_read_entry = b53_arl_read_entry_95, .arl_write_entry = b53_arl_write_entry_95, @@ -2747,7 +2794,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, - .arl_ops = &b53_arl_ops_95, + .arl_ops = &b53_arl_ops_89, }, { .chip_id = BCM5395_DEVICE_ID, @@ -2775,7 +2822,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, - .arl_ops = &b53_arl_ops_95, + .arl_ops = &b53_arl_ops_89, }, { .chip_id = BCM5398_DEVICE_ID, @@ -2789,7 +2836,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, - .arl_ops = &b53_arl_ops_95, + .arl_ops = &b53_arl_ops_89, }, { .chip_id = BCM53101_DEVICE_ID, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index ef2413509b5db..d6d25bb3945b4 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -353,6 +353,18 @@ static inline void b53_arl_to_entry_25(struct b53_arl_entry *ent, ent->vid = mac_vid >> ARLTBL_VID_S_65; } +static inline void b53_arl_to_entry_89(struct b53_arl_entry *ent, + u64 mac_vid, u16 fwd_entry) +{ + memset(ent, 0, sizeof(*ent)); + ent->port = fwd_entry & ARLTBL_DATA_PORT_ID_MASK_89; + ent->is_valid = !!(fwd_entry & ARLTBL_VALID_89); + ent->is_age = !!(fwd_entry & ARLTBL_AGE_89); + ent->is_static = !!(fwd_entry & ARLTBL_STATIC_89); + u64_to_ether_addr(mac_vid, ent->mac); + ent->vid = mac_vid >> ARLTBL_VID_S; +} + static inline void b53_arl_from_entry(u64 *mac_vid, u32 *fwd_entry, const struct b53_arl_entry *ent) { @@ -383,6 +395,20 @@ static inline void b53_arl_from_entry_25(u64 *mac_vid, *mac_vid |= ARLTBL_AGE_25; } +static inline void b53_arl_from_entry_89(u64 *mac_vid, u32 *fwd_entry, + const struct b53_arl_entry *ent) +{ + *mac_vid = ether_addr_to_u64(ent->mac); + *mac_vid |= (u64)(ent->vid & ARLTBL_VID_MASK) << ARLTBL_VID_S; + *fwd_entry = ent->port & ARLTBL_DATA_PORT_ID_MASK_89; + if (ent->is_valid) + *fwd_entry |= ARLTBL_VALID_89; + if (ent->is_static) + *fwd_entry |= ARLTBL_STATIC_89; + if (ent->is_age) + *fwd_entry |= ARLTBL_AGE_89; +} + static inline void b53_arl_read_entry(struct b53_device *dev, struct b53_arl_entry *ent, u8 idx) { diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index c36a3dfb2ee89..c303507d30343 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -346,12 +346,20 @@ #define ARLTBL_STATIC BIT(15) #define ARLTBL_VALID BIT(16) +/* BCM5389 ARL Table Data Entry N Register format (16 bit) */ +#define ARLTBL_DATA_PORT_ID_MASK_89 GENMASK(8, 0) +#define ARLTBL_TC_MASK_89 GENMASK(12, 10) +#define ARLTBL_AGE_89 BIT(13) +#define ARLTBL_STATIC_89 BIT(14) +#define ARLTBL_VALID_89 BIT(15) + /* Maximum number of bin entries in the ARL for all switches */ #define B53_ARLTBL_MAX_BIN_ENTRIES 4 /* ARL Search Control Register (8 bit) */ #define B53_ARL_SRCH_CTL 0x50 #define B53_ARL_SRCH_CTL_25 0x20 +#define B53_ARL_SRCH_CTL_89 0x30 #define ARL_SRCH_VLID BIT(0) #define ARL_SRCH_STDN BIT(7) @@ -359,10 +367,12 @@ #define B53_ARL_SRCH_ADDR 0x51 #define B53_ARL_SRCH_ADDR_25 0x22 #define B53_ARL_SRCH_ADDR_65 0x24 +#define B53_ARL_SRCH_ADDR_89 0x31 #define ARL_ADDR_MASK GENMASK(14, 0) /* ARL Search MAC/VID Result (64 bit) */ #define B53_ARL_SRCH_RSTL_0_MACVID 0x60 +#define B53_ARL_SRCH_RSLT_MACVID_89 0x33 /* Single register search result on 5325 */ #define B53_ARL_SRCH_RSTL_0_MACVID_25 0x24 @@ -372,6 +382,9 @@ /* ARL Search Data Result (32 bit) */ #define B53_ARL_SRCH_RSTL_0 0x68 +/* BCM5389 ARL Search Data Result (16 bit) */ +#define B53_ARL_SRCH_RSLT_89 0x3b + #define B53_ARL_SRCH_RSTL_MACVID(x) (B53_ARL_SRCH_RSTL_0_MACVID + ((x) * 0x10)) #define B53_ARL_SRCH_RSTL(x) (B53_ARL_SRCH_RSTL_0 + ((x) * 0x10)) From 2b3013ac03028a2364d8779719bb6bfbc0212435 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Fri, 7 Nov 2025 09:07:49 +0100 Subject: [PATCH 712/867] net: dsa: b53: add support for bcm63xx ARL entry format The ARL registers of BCM63XX embedded switches are somewhat unique. The normal ARL table access registers have the same format as BCM5389, but the ARL search registers differ: * SRCH_CTL is at the same offset of BCM5389, but 16 bits wide. It does not have more fields, just needs to be accessed by a 16 bit read. * SRCH_RSLT_MACVID and SRCH_RSLT are aligned to 32 bit, and have shifted offsets. * SRCH_RSLT has a different format than the normal ARL data entry register. * There is only one set of ENTRY_N registers, implying a 1 bin layout. So add appropriate ops for bcm63xx and let it use it. Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251107080749.26936-9-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 44 +++++++++++++++++++++++++++----- drivers/net/dsa/b53/b53_priv.h | 15 +++++++++++ drivers/net/dsa/b53/b53_regs.h | 9 +++++++ 3 files changed, 61 insertions(+), 7 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 73ea9adb95b71..72c85cd34a4ee 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2058,12 +2058,20 @@ static void b53_read_arl_srch_ctl(struct b53_device *dev, u8 *val) if (is5325(dev) || is5365(dev)) offset = B53_ARL_SRCH_CTL_25; - else if (dev->chip_id == BCM5389_DEVICE_ID || is5397_98(dev)) + else if (dev->chip_id == BCM5389_DEVICE_ID || is5397_98(dev) || + is63xx(dev)) offset = B53_ARL_SRCH_CTL_89; else offset = B53_ARL_SRCH_CTL; - b53_read8(dev, B53_ARLIO_PAGE, offset, val); + if (is63xx(dev)) { + u16 val16; + + b53_read16(dev, B53_ARLIO_PAGE, offset, &val16); + *val = val16 & 0xff; + } else { + b53_read8(dev, B53_ARLIO_PAGE, offset, val); + } } static void b53_write_arl_srch_ctl(struct b53_device *dev, u8 val) @@ -2072,12 +2080,16 @@ static void b53_write_arl_srch_ctl(struct b53_device *dev, u8 val) if (is5325(dev) || is5365(dev)) offset = B53_ARL_SRCH_CTL_25; - else if (dev->chip_id == BCM5389_DEVICE_ID || is5397_98(dev)) + else if (dev->chip_id == BCM5389_DEVICE_ID || is5397_98(dev) || + is63xx(dev)) offset = B53_ARL_SRCH_CTL_89; else offset = B53_ARL_SRCH_CTL; - b53_write8(dev, B53_ARLIO_PAGE, offset, val); + if (is63xx(dev)) + b53_write16(dev, B53_ARLIO_PAGE, offset, val); + else + b53_write8(dev, B53_ARLIO_PAGE, offset, val); } static int b53_arl_search_wait(struct b53_device *dev) @@ -2131,6 +2143,18 @@ static void b53_arl_search_read_89(struct b53_device *dev, u8 idx, b53_arl_to_entry_89(ent, mac_vid, fwd_entry); } +static void b53_arl_search_read_63xx(struct b53_device *dev, u8 idx, + struct b53_arl_entry *ent) +{ + u16 fwd_entry; + u64 mac_vid; + + b53_read64(dev, B53_ARLIO_PAGE, B53_ARL_SRCH_RSLT_MACVID_63XX, + &mac_vid); + b53_read16(dev, B53_ARLIO_PAGE, B53_ARL_SRCH_RSLT_63XX, &fwd_entry); + b53_arl_search_to_entry_63xx(ent, mac_vid, fwd_entry); +} + static void b53_arl_search_read_95(struct b53_device *dev, u8 idx, struct b53_arl_entry *ent) { @@ -2730,6 +2754,12 @@ static const struct b53_arl_ops b53_arl_ops_89 = { .arl_search_read = b53_arl_search_read_89, }; +static const struct b53_arl_ops b53_arl_ops_63xx = { + .arl_read_entry = b53_arl_read_entry_89, + .arl_write_entry = b53_arl_write_entry_89, + .arl_search_read = b53_arl_search_read_63xx, +}; + static const struct b53_arl_ops b53_arl_ops_95 = { .arl_read_entry = b53_arl_read_entry_95, .arl_write_entry = b53_arl_write_entry_95, @@ -2899,14 +2929,14 @@ static const struct b53_chip_data b53_switch_chips[] = { .dev_name = "BCM63xx", .vlans = 4096, .enabled_ports = 0, /* pdata must provide them */ - .arl_bins = 4, - .arl_buckets = 1024, + .arl_bins = 1, + .arl_buckets = 4096, .imp_port = 8, .vta_regs = B53_VTA_REGS_63XX, .duplex_reg = B53_DUPLEX_STAT_63XX, .jumbo_pm_reg = B53_JUMBO_PORT_MASK_63XX, .jumbo_size_reg = B53_JUMBO_MAX_SIZE_63XX, - .arl_ops = &b53_arl_ops_95, + .arl_ops = &b53_arl_ops_63xx, }, { .chip_id = BCM53010_DEVICE_ID, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index d6d25bb3945b4..2bfd0e7c95c98 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -409,6 +409,21 @@ static inline void b53_arl_from_entry_89(u64 *mac_vid, u32 *fwd_entry, *fwd_entry |= ARLTBL_AGE_89; } +static inline void b53_arl_search_to_entry_63xx(struct b53_arl_entry *ent, + u64 mac_vid, u16 fwd_entry) +{ + memset(ent, 0, sizeof(*ent)); + u64_to_ether_addr(mac_vid, ent->mac); + ent->vid = mac_vid >> ARLTBL_VID_S; + + ent->port = fwd_entry & ARL_SRST_PORT_ID_MASK_63XX; + ent->port >>= 1; + + ent->is_age = !!(fwd_entry & ARL_SRST_AGE_63XX); + ent->is_static = !!(fwd_entry & ARL_SRST_STATIC_63XX); + ent->is_valid = 1; +} + static inline void b53_arl_read_entry(struct b53_device *dev, struct b53_arl_entry *ent, u8 idx) { diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index c303507d30343..69ebbec932f65 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -368,11 +368,13 @@ #define B53_ARL_SRCH_ADDR_25 0x22 #define B53_ARL_SRCH_ADDR_65 0x24 #define B53_ARL_SRCH_ADDR_89 0x31 +#define B53_ARL_SRCH_ADDR_63XX 0x32 #define ARL_ADDR_MASK GENMASK(14, 0) /* ARL Search MAC/VID Result (64 bit) */ #define B53_ARL_SRCH_RSTL_0_MACVID 0x60 #define B53_ARL_SRCH_RSLT_MACVID_89 0x33 +#define B53_ARL_SRCH_RSLT_MACVID_63XX 0x34 /* Single register search result on 5325 */ #define B53_ARL_SRCH_RSTL_0_MACVID_25 0x24 @@ -388,6 +390,13 @@ #define B53_ARL_SRCH_RSTL_MACVID(x) (B53_ARL_SRCH_RSTL_0_MACVID + ((x) * 0x10)) #define B53_ARL_SRCH_RSTL(x) (B53_ARL_SRCH_RSTL_0 + ((x) * 0x10)) +/* 63XX ARL Search Data Result (16 bit) */ +#define B53_ARL_SRCH_RSLT_63XX 0x3c +#define ARL_SRST_PORT_ID_MASK_63XX GENMASK(9, 1) +#define ARL_SRST_TC_MASK_63XX GENMASK(13, 11) +#define ARL_SRST_AGE_63XX BIT(14) +#define ARL_SRST_STATIC_63XX BIT(15) + /************************************************************************* * IEEE 802.1X Registers *************************************************************************/ From 23c52b58cc38e398292f125e68b717001344444c Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 7 Nov 2025 02:36:59 -0800 Subject: [PATCH 713/867] tg3: Fix num of RX queues being reported by ethtool Using num_online_cpus() to report number of queues is actually not correct, as reported by Michael[1]. netif_get_num_default_rss_queues() was used to replace num_online_cpus() in the past, but tg3 ethtool callbacks didn't get converted. Doing it now. Link: https://lore.kernel.org/all/CACKFLim7ruspmqvjr6bNRq5Z_XXVk3vVaLZOons7kMCzsEG23A@mail.gmail.com/#t [1] Signed-off-by: Breno Leitao Suggested-by: Michael Chan Reviewed-by: Michael Chan Link: https://patch.msgid.link/20251107-tg3_counts-v1-1-337fe5c8ccb7@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/tg3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index fa58c3ffceb06..e21f7c6a6de70 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -12729,7 +12729,7 @@ static u32 tg3_get_rx_ring_count(struct net_device *dev) if (netif_running(tp->dev)) return tp->rxq_cnt; - return min(num_online_cpus(), TG3_RSS_MAX_NUM_QS); + return min_t(u32, netif_get_num_default_rss_queues(), tp->rxq_max); } static u32 tg3_get_rxfh_indir_size(struct net_device *dev) From 41d0c31be29fdee2535028ce70a6661e3a67bb25 Mon Sep 17 00:00:00 2001 From: Zahari Doychev Date: Thu, 6 Nov 2025 16:15:28 +0100 Subject: [PATCH 714/867] tools: ynl: call nested attribute free function for indexed arrays When freeing indexed arrays, the corresponding free function should be called for each entry of the indexed array. For example, for for 'struct tc_act_attrs' 'tc_act_attrs_free(...)' needs to be called for each entry. Previously, memory leaks were reported when enabling the ASAN analyzer. ================================================================= ==874==ERROR: LeakSanitizer: detected memory leaks Direct leak of 24 byte(s) in 1 object(s) allocated from: #0 0x7f221fd20cb5 in malloc ./debug/gcc/gcc/libsanitizer/asan/asan_malloc_linux.cpp:67 #1 0x55c98db048af in tc_act_attrs_set_options_vlan_parms ../generated/tc-user.h:2813 #2 0x55c98db048af in main ./linux/tools/net/ynl/samples/tc-filter-add.c:71 Direct leak of 24 byte(s) in 1 object(s) allocated from: #0 0x7f221fd20cb5 in malloc ./debug/gcc/gcc/libsanitizer/asan/asan_malloc_linux.cpp:67 #1 0x55c98db04a93 in tc_act_attrs_set_options_vlan_parms ../generated/tc-user.h:2813 #2 0x55c98db04a93 in main ./linux/tools/net/ynl/samples/tc-filter-add.c:74 Direct leak of 10 byte(s) in 2 object(s) allocated from: #0 0x7f221fd20cb5 in malloc ./debug/gcc/gcc/libsanitizer/asan/asan_malloc_linux.cpp:67 #1 0x55c98db0527d in tc_act_attrs_set_kind ../generated/tc-user.h:1622 SUMMARY: AddressSanitizer: 58 byte(s) leaked in 4 allocation(s). The following diff illustrates the changes introduced compared to the previous version of the code. void tc_flower_attrs_free(struct tc_flower_attrs *obj) { + unsigned int i; + free(obj->indev); + for (i = 0; i < obj->_count.act; i++) + tc_act_attrs_free(&obj->act[i]); free(obj->act); free(obj->key_eth_dst); free(obj->key_eth_dst_mask); Signed-off-by: Zahari Doychev Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20251106151529.453026-3-zahari.doychev@linux.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/ynl_gen_c.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index 58086b1010573..aadeb3abcad85 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -861,6 +861,18 @@ def _setter_lines(self, ri, member, presence): return [f"{member} = {self.c_name};", f"{presence} = n_{self.c_name};"] + def free_needs_iter(self): + return self.sub_type == 'nest' + + def _free_lines(self, ri, var, ref): + lines = [] + if self.sub_type == 'nest': + lines += [ + f"for (i = 0; i < {var}->{ref}_count.{self.c_name}; i++)", + f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);', + ] + lines += f"free({var}->{ref}{self.c_name});", + return lines class TypeNestTypeValue(Type): def _complex_member_type(self, ri): From 7ff14c52049eafecdd72cd0a12cae6905876566a Mon Sep 17 00:00:00 2001 From: Simon Schippers Date: Thu, 6 Nov 2025 18:56:15 +0100 Subject: [PATCH 715/867] usbnet: Add support for Byte Queue Limits (BQL) In the current implementation, usbnet uses a fixed tx_qlen of: USB2: 60 * 1518 bytes = 91.08 KB USB3: 60 * 5 * 1518 bytes = 454.80 KB Such large transmit queues can be problematic, especially for cellular modems. For example, with a typical celluar link speed of 10 Mbit/s, a fully occupied USB3 transmit queue results in: 454.80 KB / (10 Mbit/s / 8 bit/byte) = 363.84 ms of additional latency. This patch adds support for Byte Queue Limits (BQL) [1] to dynamically manage the transmit queue size and reduce latency without sacrificing throughput. Testing was performed on various devices using the usbnet driver for packet transmission: - DELOCK 66045: USB3 to 2.5 GbE adapter (ax88179_178a) - DELOCK 61969: USB2 to 1 GbE adapter (asix) - Quectel RM520: 5G modem (qmi_wwan) - USB2 Android tethering (cdc_ncm) No performance degradation was observed for iperf3 TCP or UDP traffic, while latency for a prioritized ping application was significantly reduced. For example, using the USB3 to 2.5 GbE adapter, which was fully utilized by iperf3 UDP traffic, the prioritized ping was improved from 1.6 ms to 0.6 ms. With the same setup but with a 100 Mbit/s Ethernet connection, the prioritized ping was improved from 35 ms to 5 ms. [1] https://lwn.net/Articles/469652/ Signed-off-by: Simon Schippers Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251106175615.26948-1-simon.schippers@tu-dortmund.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/usbnet.c | 11 +++++++++++ include/linux/usb/usbnet.h | 2 ++ 2 files changed, 13 insertions(+) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index f3087fb62f4f8..3d10cf791c51c 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -831,6 +831,7 @@ int usbnet_stop(struct net_device *net) clear_bit(EVENT_DEV_OPEN, &dev->flags); netif_stop_queue (net); + netdev_reset_queue(net); netif_info(dev, ifdown, dev->net, "stop stats: rx/tx %lu/%lu, errs %lu/%lu\n", @@ -939,6 +940,7 @@ int usbnet_open(struct net_device *net) } set_bit(EVENT_DEV_OPEN, &dev->flags); + netdev_reset_queue(net); netif_start_queue (net); netif_info(dev, ifup, dev->net, "open: enable queueing (rx %d, tx %d) mtu %d %s framing\n", @@ -1500,6 +1502,7 @@ netdev_tx_t usbnet_start_xmit(struct sk_buff *skb, struct net_device *net) case 0: netif_trans_update(net); __usbnet_queue_skb(&dev->txq, skb, tx_start); + netdev_sent_queue(net, skb->len); if (dev->txq.qlen >= TX_QLEN (dev)) netif_stop_queue (net); } @@ -1563,6 +1566,7 @@ static inline void usb_free_skb(struct sk_buff *skb) static void usbnet_bh(struct timer_list *t) { struct usbnet *dev = timer_container_of(dev, t, delay); + unsigned int bytes_compl = 0, pkts_compl = 0; struct sk_buff *skb; struct skb_data *entry; @@ -1574,6 +1578,8 @@ static void usbnet_bh(struct timer_list *t) usb_free_skb(skb); continue; case tx_done: + bytes_compl += skb->len; + pkts_compl++; kfree(entry->urb->sg); fallthrough; case rx_cleanup: @@ -1584,6 +1590,10 @@ static void usbnet_bh(struct timer_list *t) } } + spin_lock_bh(&dev->bql_spinlock); + netdev_completed_queue(dev->net, pkts_compl, bytes_compl); + spin_unlock_bh(&dev->bql_spinlock); + /* restart RX again after disabling due to high error rate */ clear_bit(EVENT_RX_KILL, &dev->flags); @@ -1755,6 +1765,7 @@ usbnet_probe(struct usb_interface *udev, const struct usb_device_id *prod) skb_queue_head_init (&dev->txq); skb_queue_head_init (&dev->done); skb_queue_head_init(&dev->rxq_pause); + spin_lock_init(&dev->bql_spinlock); INIT_WORK(&dev->bh_work, usbnet_bh_work); INIT_WORK (&dev->kevent, usbnet_deferred_kevent); init_usb_anchor(&dev->deferred); diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index a2d54122823da..2945923a8a958 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -14,6 +14,7 @@ #include #include #include +#include /* interface from usbnet core to each USB networking link we handle */ struct usbnet { @@ -59,6 +60,7 @@ struct usbnet { struct mutex interrupt_mutex; struct usb_anchor deferred; struct work_struct bh_work; + spinlock_t bql_spinlock; struct work_struct kevent; unsigned long flags; From 2dd63c36453408583c2c37a742705a0232612c57 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 7 Nov 2025 08:28:34 +0000 Subject: [PATCH 716/867] net: stmmac: ingenic: move ingenic_mac_init() Move ingenic_mac_init() to between variant specific set_mode() implementations and ingenic_mac_probe(). No code changes. Reviewed-by: Maxime Chevallier Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vHHpi-0000000Djqp-4910@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-ingenic.c | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c index c1670f6bae145..8d0627055799b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c @@ -71,20 +71,6 @@ struct ingenic_soc_info { int (*set_mode)(struct plat_stmmacenet_data *plat_dat); }; -static int ingenic_mac_init(struct platform_device *pdev, void *bsp_priv) -{ - struct ingenic_mac *mac = bsp_priv; - int ret; - - if (mac->soc_info->set_mode) { - ret = mac->soc_info->set_mode(mac->plat_dat); - if (ret) - return ret; - } - - return 0; -} - static int jz4775_mac_set_mode(struct plat_stmmacenet_data *plat_dat) { struct ingenic_mac *mac = plat_dat->bsp_priv; @@ -234,6 +220,20 @@ static int x2000_mac_set_mode(struct plat_stmmacenet_data *plat_dat) return regmap_update_bits(mac->regmap, 0, mac->soc_info->mask, val); } +static int ingenic_mac_init(struct platform_device *pdev, void *bsp_priv) +{ + struct ingenic_mac *mac = bsp_priv; + int ret; + + if (mac->soc_info->set_mode) { + ret = mac->soc_info->set_mode(mac->plat_dat); + if (ret) + return ret; + } + + return 0; +} + static int ingenic_mac_probe(struct platform_device *pdev) { struct plat_stmmacenet_data *plat_dat; From 307a575775fd06b1a63ec017a63a82485bedf62f Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 7 Nov 2025 08:28:40 +0000 Subject: [PATCH 717/867] net: stmmac: ingenic: simplify jz4775 mac_set_mode() All paths configure the transmit clock as an input. Move this out of the switch() statement to simplify the code. Reviewed-by: Maxime Chevallier Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vHHpo-0000000Djqv-0RD4@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-ingenic.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c index 8d0627055799b..c6c82f277f627 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c @@ -78,20 +78,17 @@ static int jz4775_mac_set_mode(struct plat_stmmacenet_data *plat_dat) switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_MII: - val = FIELD_PREP(MACPHYC_TXCLK_SEL_MASK, MACPHYC_TXCLK_SEL_INPUT) | - FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_MII); + val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_MII); dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_MII\n"); break; case PHY_INTERFACE_MODE_GMII: - val = FIELD_PREP(MACPHYC_TXCLK_SEL_MASK, MACPHYC_TXCLK_SEL_INPUT) | - FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_GMII); + val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_GMII); dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_GMII\n"); break; case PHY_INTERFACE_MODE_RMII: - val = FIELD_PREP(MACPHYC_TXCLK_SEL_MASK, MACPHYC_TXCLK_SEL_INPUT) | - FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_RMII); + val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_RMII); dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n"); break; @@ -99,8 +96,7 @@ static int jz4775_mac_set_mode(struct plat_stmmacenet_data *plat_dat) case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_TXID: case PHY_INTERFACE_MODE_RGMII_RXID: - val = FIELD_PREP(MACPHYC_TXCLK_SEL_MASK, MACPHYC_TXCLK_SEL_INPUT) | - FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_RGMII); + val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_RGMII); dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RGMII\n"); break; @@ -110,6 +106,8 @@ static int jz4775_mac_set_mode(struct plat_stmmacenet_data *plat_dat) return -EINVAL; } + val |= FIELD_PREP(MACPHYC_TXCLK_SEL_MASK, MACPHYC_TXCLK_SEL_INPUT); + /* Update MAC PHY control register */ return regmap_update_bits(mac->regmap, 0, mac->soc_info->mask, val); } From da6e9fd1046f1b936d8dd1db738ffb06f21851c0 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 7 Nov 2025 08:28:45 +0000 Subject: [PATCH 718/867] net: stmmac: ingenic: use PHY_INTF_SEL_x to select PHY interface Use the common dwmac definitions for the PHY interface selection field. Reviewed-by: Maxime Chevallier Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vHHpt-0000000Djr1-0wwr@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c index c6c82f277f627..5de2bd984d341 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c @@ -35,10 +35,10 @@ #define MACPHYC_RX_DELAY_MASK GENMASK(10, 4) #define MACPHYC_SOFT_RST_MASK GENMASK(3, 3) #define MACPHYC_PHY_INFT_MASK GENMASK(2, 0) -#define MACPHYC_PHY_INFT_RMII 0x4 -#define MACPHYC_PHY_INFT_RGMII 0x1 -#define MACPHYC_PHY_INFT_GMII 0x0 -#define MACPHYC_PHY_INFT_MII 0x0 +#define MACPHYC_PHY_INFT_RMII PHY_INTF_SEL_RMII +#define MACPHYC_PHY_INFT_RGMII PHY_INTF_SEL_RGMII +#define MACPHYC_PHY_INFT_GMII PHY_INTF_SEL_GMII_MII +#define MACPHYC_PHY_INFT_MII PHY_INTF_SEL_GMII_MII #define MACPHYC_TX_DELAY_PS_MAX 2496 #define MACPHYC_TX_DELAY_PS_MIN 20 From dbf99dc7d166944df4af1f52351a355512e1c573 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 7 Nov 2025 08:28:50 +0000 Subject: [PATCH 719/867] net: stmmac: ingenic: use PHY_INTF_SEL_x directly Use the PHY_INTF_SEL_x values directly in each of the mac_set_mode methods rather than the driver private MACPHYC_PHY_INFT_x definitions. Remove the MACPHYC_PHY_INFT_x definitions. Reviewed-by: Maxime Chevallier Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vHHpy-0000000Djr7-1R1m@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-ingenic.c | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c index 5de2bd984d341..b56d7ada19394 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c @@ -35,10 +35,6 @@ #define MACPHYC_RX_DELAY_MASK GENMASK(10, 4) #define MACPHYC_SOFT_RST_MASK GENMASK(3, 3) #define MACPHYC_PHY_INFT_MASK GENMASK(2, 0) -#define MACPHYC_PHY_INFT_RMII PHY_INTF_SEL_RMII -#define MACPHYC_PHY_INFT_RGMII PHY_INTF_SEL_RGMII -#define MACPHYC_PHY_INFT_GMII PHY_INTF_SEL_GMII_MII -#define MACPHYC_PHY_INFT_MII PHY_INTF_SEL_GMII_MII #define MACPHYC_TX_DELAY_PS_MAX 2496 #define MACPHYC_TX_DELAY_PS_MIN 20 @@ -78,17 +74,17 @@ static int jz4775_mac_set_mode(struct plat_stmmacenet_data *plat_dat) switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_MII: - val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_MII); + val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, PHY_INTF_SEL_GMII_MII); dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_MII\n"); break; case PHY_INTERFACE_MODE_GMII: - val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_GMII); + val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, PHY_INTF_SEL_GMII_MII); dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_GMII\n"); break; case PHY_INTERFACE_MODE_RMII: - val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_RMII); + val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, PHY_INTF_SEL_RMII); dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n"); break; @@ -96,7 +92,7 @@ static int jz4775_mac_set_mode(struct plat_stmmacenet_data *plat_dat) case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_TXID: case PHY_INTERFACE_MODE_RGMII_RXID: - val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_RGMII); + val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, PHY_INTF_SEL_RGMII); dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RGMII\n"); break; @@ -138,7 +134,7 @@ static int x1600_mac_set_mode(struct plat_stmmacenet_data *plat_dat) switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_RMII: - val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_RMII); + val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, PHY_INTF_SEL_RMII); dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n"); break; @@ -160,7 +156,7 @@ static int x1830_mac_set_mode(struct plat_stmmacenet_data *plat_dat) switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_RMII: val = FIELD_PREP(MACPHYC_MODE_SEL_MASK, MACPHYC_MODE_SEL_RMII) | - FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_RMII); + FIELD_PREP(MACPHYC_PHY_INFT_MASK, PHY_INTF_SEL_RMII); dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n"); break; @@ -183,7 +179,7 @@ static int x2000_mac_set_mode(struct plat_stmmacenet_data *plat_dat) case PHY_INTERFACE_MODE_RMII: val = FIELD_PREP(MACPHYC_TX_SEL_MASK, MACPHYC_TX_SEL_ORIGIN) | FIELD_PREP(MACPHYC_RX_SEL_MASK, MACPHYC_RX_SEL_ORIGIN) | - FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_RMII); + FIELD_PREP(MACPHYC_PHY_INFT_MASK, PHY_INTF_SEL_RMII); dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n"); break; @@ -191,7 +187,7 @@ static int x2000_mac_set_mode(struct plat_stmmacenet_data *plat_dat) case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_TXID: case PHY_INTERFACE_MODE_RGMII_RXID: - val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_RGMII); + val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, PHY_INTF_SEL_RGMII); if (mac->tx_delay == 0) val |= FIELD_PREP(MACPHYC_TX_SEL_MASK, MACPHYC_TX_SEL_ORIGIN); From 14497aaa5eb63f7d77cf72c281afe330baea970b Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 7 Nov 2025 08:28:55 +0000 Subject: [PATCH 720/867] net: stmmac: ingenic: prep PHY_INTF_SEL_x field after switch() Move the preparation of the PHY_INTF_SEL_x bitfield out of the switch() statement such that it only appears once. Reviewed-by: Maxime Chevallier Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vHHq3-0000000DjrD-1u8O@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-ingenic.c | 34 +++++++++++++------ 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c index b56d7ada19394..6680f7d3a4690 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c @@ -71,20 +71,21 @@ static int jz4775_mac_set_mode(struct plat_stmmacenet_data *plat_dat) { struct ingenic_mac *mac = plat_dat->bsp_priv; unsigned int val; + u8 phy_intf_sel; switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_MII: - val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, PHY_INTF_SEL_GMII_MII); + phy_intf_sel = PHY_INTF_SEL_GMII_MII; dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_MII\n"); break; case PHY_INTERFACE_MODE_GMII: - val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, PHY_INTF_SEL_GMII_MII); + phy_intf_sel = PHY_INTF_SEL_GMII_MII; dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_GMII\n"); break; case PHY_INTERFACE_MODE_RMII: - val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, PHY_INTF_SEL_RMII); + phy_intf_sel = PHY_INTF_SEL_RMII; dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n"); break; @@ -92,7 +93,7 @@ static int jz4775_mac_set_mode(struct plat_stmmacenet_data *plat_dat) case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_TXID: case PHY_INTERFACE_MODE_RGMII_RXID: - val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, PHY_INTF_SEL_RGMII); + phy_intf_sel = PHY_INTF_SEL_RGMII; dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RGMII\n"); break; @@ -102,7 +103,8 @@ static int jz4775_mac_set_mode(struct plat_stmmacenet_data *plat_dat) return -EINVAL; } - val |= FIELD_PREP(MACPHYC_TXCLK_SEL_MASK, MACPHYC_TXCLK_SEL_INPUT); + val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, phy_intf_sel) | + FIELD_PREP(MACPHYC_TXCLK_SEL_MASK, MACPHYC_TXCLK_SEL_INPUT); /* Update MAC PHY control register */ return regmap_update_bits(mac->regmap, 0, mac->soc_info->mask, val); @@ -131,10 +133,11 @@ static int x1600_mac_set_mode(struct plat_stmmacenet_data *plat_dat) { struct ingenic_mac *mac = plat_dat->bsp_priv; unsigned int val; + u8 phy_intf_sel; switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_RMII: - val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, PHY_INTF_SEL_RMII); + phy_intf_sel = PHY_INTF_SEL_RMII; dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n"); break; @@ -144,6 +147,8 @@ static int x1600_mac_set_mode(struct plat_stmmacenet_data *plat_dat) return -EINVAL; } + val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, phy_intf_sel); + /* Update MAC PHY control register */ return regmap_update_bits(mac->regmap, 0, mac->soc_info->mask, val); } @@ -152,11 +157,12 @@ static int x1830_mac_set_mode(struct plat_stmmacenet_data *plat_dat) { struct ingenic_mac *mac = plat_dat->bsp_priv; unsigned int val; + u8 phy_intf_sel; switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_RMII: - val = FIELD_PREP(MACPHYC_MODE_SEL_MASK, MACPHYC_MODE_SEL_RMII) | - FIELD_PREP(MACPHYC_PHY_INFT_MASK, PHY_INTF_SEL_RMII); + val = FIELD_PREP(MACPHYC_MODE_SEL_MASK, MACPHYC_MODE_SEL_RMII); + phy_intf_sel = PHY_INTF_SEL_RMII; dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n"); break; @@ -166,6 +172,8 @@ static int x1830_mac_set_mode(struct plat_stmmacenet_data *plat_dat) return -EINVAL; } + val |= FIELD_PREP(MACPHYC_PHY_INFT_MASK, phy_intf_sel); + /* Update MAC PHY control register */ return regmap_update_bits(mac->regmap, 0, mac->soc_info->mask, val); } @@ -174,12 +182,13 @@ static int x2000_mac_set_mode(struct plat_stmmacenet_data *plat_dat) { struct ingenic_mac *mac = plat_dat->bsp_priv; unsigned int val; + u8 phy_intf_sel; switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_RMII: val = FIELD_PREP(MACPHYC_TX_SEL_MASK, MACPHYC_TX_SEL_ORIGIN) | - FIELD_PREP(MACPHYC_RX_SEL_MASK, MACPHYC_RX_SEL_ORIGIN) | - FIELD_PREP(MACPHYC_PHY_INFT_MASK, PHY_INTF_SEL_RMII); + FIELD_PREP(MACPHYC_RX_SEL_MASK, MACPHYC_RX_SEL_ORIGIN); + phy_intf_sel = PHY_INTF_SEL_RMII; dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n"); break; @@ -187,7 +196,8 @@ static int x2000_mac_set_mode(struct plat_stmmacenet_data *plat_dat) case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_TXID: case PHY_INTERFACE_MODE_RGMII_RXID: - val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, PHY_INTF_SEL_RGMII); + val = 0; + phy_intf_sel = PHY_INTF_SEL_RGMII; if (mac->tx_delay == 0) val |= FIELD_PREP(MACPHYC_TX_SEL_MASK, MACPHYC_TX_SEL_ORIGIN); @@ -210,6 +220,8 @@ static int x2000_mac_set_mode(struct plat_stmmacenet_data *plat_dat) return -EINVAL; } + val |= FIELD_PREP(MACPHYC_PHY_INFT_MASK, phy_intf_sel); + /* Update MAC PHY control register */ return regmap_update_bits(mac->regmap, 0, mac->soc_info->mask, val); } From 0e2fa91c55c05f441417489957a1f909ff6c9aa1 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 7 Nov 2025 08:29:00 +0000 Subject: [PATCH 721/867] net: stmmac: ingenic: use stmmac_get_phy_intf_sel() Use stmmac_get_phy_intf_sel() to decode the PHY interface mode to the phy_intf_sel value, validate the result against the SoC specific supported phy_intf_sel values, and pass into the SoC specific set_mode() methods, replacing the local phy_intf_sel variable. This provides the value for the MACPHYC_PHY_INFT_MASK field. Reviewed-by: Maxime Chevallier Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vHHq8-0000000DjrJ-2NRK@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-ingenic.c | 55 ++++++++++++------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c index 6680f7d3a4690..79735a476e86d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c @@ -64,28 +64,27 @@ struct ingenic_soc_info { enum ingenic_mac_version version; u32 mask; - int (*set_mode)(struct plat_stmmacenet_data *plat_dat); + int (*set_mode)(struct plat_stmmacenet_data *plat_dat, u8 phy_intf_sel); + + u8 valid_phy_intf_sel; }; -static int jz4775_mac_set_mode(struct plat_stmmacenet_data *plat_dat) +static int jz4775_mac_set_mode(struct plat_stmmacenet_data *plat_dat, + u8 phy_intf_sel) { struct ingenic_mac *mac = plat_dat->bsp_priv; unsigned int val; - u8 phy_intf_sel; switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_MII: - phy_intf_sel = PHY_INTF_SEL_GMII_MII; dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_MII\n"); break; case PHY_INTERFACE_MODE_GMII: - phy_intf_sel = PHY_INTF_SEL_GMII_MII; dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_GMII\n"); break; case PHY_INTERFACE_MODE_RMII: - phy_intf_sel = PHY_INTF_SEL_RMII; dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n"); break; @@ -93,7 +92,6 @@ static int jz4775_mac_set_mode(struct plat_stmmacenet_data *plat_dat) case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_TXID: case PHY_INTERFACE_MODE_RGMII_RXID: - phy_intf_sel = PHY_INTF_SEL_RGMII; dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RGMII\n"); break; @@ -110,7 +108,8 @@ static int jz4775_mac_set_mode(struct plat_stmmacenet_data *plat_dat) return regmap_update_bits(mac->regmap, 0, mac->soc_info->mask, val); } -static int x1000_mac_set_mode(struct plat_stmmacenet_data *plat_dat) +static int x1000_mac_set_mode(struct plat_stmmacenet_data *plat_dat, + u8 phy_intf_sel) { struct ingenic_mac *mac = plat_dat->bsp_priv; @@ -129,15 +128,14 @@ static int x1000_mac_set_mode(struct plat_stmmacenet_data *plat_dat) return regmap_update_bits(mac->regmap, 0, mac->soc_info->mask, 0); } -static int x1600_mac_set_mode(struct plat_stmmacenet_data *plat_dat) +static int x1600_mac_set_mode(struct plat_stmmacenet_data *plat_dat, + u8 phy_intf_sel) { struct ingenic_mac *mac = plat_dat->bsp_priv; unsigned int val; - u8 phy_intf_sel; switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_RMII: - phy_intf_sel = PHY_INTF_SEL_RMII; dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n"); break; @@ -153,16 +151,15 @@ static int x1600_mac_set_mode(struct plat_stmmacenet_data *plat_dat) return regmap_update_bits(mac->regmap, 0, mac->soc_info->mask, val); } -static int x1830_mac_set_mode(struct plat_stmmacenet_data *plat_dat) +static int x1830_mac_set_mode(struct plat_stmmacenet_data *plat_dat, + u8 phy_intf_sel) { struct ingenic_mac *mac = plat_dat->bsp_priv; unsigned int val; - u8 phy_intf_sel; switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_RMII: val = FIELD_PREP(MACPHYC_MODE_SEL_MASK, MACPHYC_MODE_SEL_RMII); - phy_intf_sel = PHY_INTF_SEL_RMII; dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n"); break; @@ -178,17 +175,16 @@ static int x1830_mac_set_mode(struct plat_stmmacenet_data *plat_dat) return regmap_update_bits(mac->regmap, 0, mac->soc_info->mask, val); } -static int x2000_mac_set_mode(struct plat_stmmacenet_data *plat_dat) +static int x2000_mac_set_mode(struct plat_stmmacenet_data *plat_dat, + u8 phy_intf_sel) { struct ingenic_mac *mac = plat_dat->bsp_priv; unsigned int val; - u8 phy_intf_sel; switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_RMII: val = FIELD_PREP(MACPHYC_TX_SEL_MASK, MACPHYC_TX_SEL_ORIGIN) | FIELD_PREP(MACPHYC_RX_SEL_MASK, MACPHYC_RX_SEL_ORIGIN); - phy_intf_sel = PHY_INTF_SEL_RMII; dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n"); break; @@ -197,8 +193,6 @@ static int x2000_mac_set_mode(struct plat_stmmacenet_data *plat_dat) case PHY_INTERFACE_MODE_RGMII_TXID: case PHY_INTERFACE_MODE_RGMII_RXID: val = 0; - phy_intf_sel = PHY_INTF_SEL_RGMII; - if (mac->tx_delay == 0) val |= FIELD_PREP(MACPHYC_TX_SEL_MASK, MACPHYC_TX_SEL_ORIGIN); else @@ -229,10 +223,21 @@ static int x2000_mac_set_mode(struct plat_stmmacenet_data *plat_dat) static int ingenic_mac_init(struct platform_device *pdev, void *bsp_priv) { struct ingenic_mac *mac = bsp_priv; - int ret; + phy_interface_t interface; + int phy_intf_sel, ret; if (mac->soc_info->set_mode) { - ret = mac->soc_info->set_mode(mac->plat_dat); + interface = mac->plat_dat->phy_interface; + + phy_intf_sel = stmmac_get_phy_intf_sel(interface); + if (phy_intf_sel < 0 || phy_intf_sel >= BITS_PER_BYTE || + ~mac->soc_info->valid_phy_intf_sel & BIT(phy_intf_sel)) { + dev_err(mac->dev, "unsupported interface %s\n", + phy_modes(interface)); + return phy_intf_sel < 0 ? phy_intf_sel : -EINVAL; + } + + ret = mac->soc_info->set_mode(mac->plat_dat, phy_intf_sel); if (ret) return ret; } @@ -309,6 +314,9 @@ static struct ingenic_soc_info jz4775_soc_info = { .mask = MACPHYC_TXCLK_SEL_MASK | MACPHYC_SOFT_RST_MASK | MACPHYC_PHY_INFT_MASK, .set_mode = jz4775_mac_set_mode, + .valid_phy_intf_sel = BIT(PHY_INTF_SEL_GMII_MII) | + BIT(PHY_INTF_SEL_RGMII) | + BIT(PHY_INTF_SEL_RMII), }; static struct ingenic_soc_info x1000_soc_info = { @@ -316,6 +324,7 @@ static struct ingenic_soc_info x1000_soc_info = { .mask = MACPHYC_SOFT_RST_MASK, .set_mode = x1000_mac_set_mode, + .valid_phy_intf_sel = BIT(PHY_INTF_SEL_RMII), }; static struct ingenic_soc_info x1600_soc_info = { @@ -323,6 +332,7 @@ static struct ingenic_soc_info x1600_soc_info = { .mask = MACPHYC_SOFT_RST_MASK | MACPHYC_PHY_INFT_MASK, .set_mode = x1600_mac_set_mode, + .valid_phy_intf_sel = BIT(PHY_INTF_SEL_RMII), }; static struct ingenic_soc_info x1830_soc_info = { @@ -330,6 +340,7 @@ static struct ingenic_soc_info x1830_soc_info = { .mask = MACPHYC_MODE_SEL_MASK | MACPHYC_SOFT_RST_MASK | MACPHYC_PHY_INFT_MASK, .set_mode = x1830_mac_set_mode, + .valid_phy_intf_sel = BIT(PHY_INTF_SEL_RMII), }; static struct ingenic_soc_info x2000_soc_info = { @@ -338,6 +349,8 @@ static struct ingenic_soc_info x2000_soc_info = { MACPHYC_RX_DELAY_MASK | MACPHYC_SOFT_RST_MASK | MACPHYC_PHY_INFT_MASK, .set_mode = x2000_mac_set_mode, + .valid_phy_intf_sel = BIT(PHY_INTF_SEL_RGMII) | + BIT(PHY_INTF_SEL_RMII), }; static const struct of_device_id ingenic_mac_of_matches[] = { From 35147b5c9e414c1ced1903c0447609643d03e2f3 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 7 Nov 2025 08:29:05 +0000 Subject: [PATCH 722/867] net: stmmac: ingenic: move "MAC PHY control register" debug Move the printing of the MAC PHY control register interface mode setting into ingenic_set_phy_intf_sel(), and use phy_modes() to print the string rather than using the enum name. Reviewed-by: Maxime Chevallier Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vHHqD-0000000DjrP-3aaU@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-ingenic.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c index 79735a476e86d..539513890db1f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c @@ -77,22 +77,12 @@ static int jz4775_mac_set_mode(struct plat_stmmacenet_data *plat_dat, switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_MII: - dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_MII\n"); - break; - case PHY_INTERFACE_MODE_GMII: - dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_GMII\n"); - break; - case PHY_INTERFACE_MODE_RMII: - dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n"); - break; - case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_TXID: case PHY_INTERFACE_MODE_RGMII_RXID: - dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RGMII\n"); break; default: @@ -115,7 +105,6 @@ static int x1000_mac_set_mode(struct plat_stmmacenet_data *plat_dat, switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_RMII: - dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n"); break; default: @@ -136,7 +125,6 @@ static int x1600_mac_set_mode(struct plat_stmmacenet_data *plat_dat, switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_RMII: - dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n"); break; default: @@ -160,7 +148,6 @@ static int x1830_mac_set_mode(struct plat_stmmacenet_data *plat_dat, switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_RMII: val = FIELD_PREP(MACPHYC_MODE_SEL_MASK, MACPHYC_MODE_SEL_RMII); - dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n"); break; default: @@ -185,7 +172,6 @@ static int x2000_mac_set_mode(struct plat_stmmacenet_data *plat_dat, case PHY_INTERFACE_MODE_RMII: val = FIELD_PREP(MACPHYC_TX_SEL_MASK, MACPHYC_TX_SEL_ORIGIN) | FIELD_PREP(MACPHYC_RX_SEL_MASK, MACPHYC_RX_SEL_ORIGIN); - dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n"); break; case PHY_INTERFACE_MODE_RGMII: @@ -205,7 +191,6 @@ static int x2000_mac_set_mode(struct plat_stmmacenet_data *plat_dat, val |= FIELD_PREP(MACPHYC_RX_SEL_MASK, MACPHYC_RX_SEL_DELAY) | FIELD_PREP(MACPHYC_RX_DELAY_MASK, (mac->rx_delay + 9750) / 19500 - 1); - dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RGMII\n"); break; default: @@ -237,6 +222,9 @@ static int ingenic_mac_init(struct platform_device *pdev, void *bsp_priv) return phy_intf_sel < 0 ? phy_intf_sel : -EINVAL; } + dev_dbg(mac->dev, "MAC PHY control register: interface %s\n", + phy_modes(interface)); + ret = mac->soc_info->set_mode(mac->plat_dat, phy_intf_sel); if (ret) return ret; From 608975d4d791644d15b286b2b7bfcd25359c979f Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 7 Nov 2025 08:29:10 +0000 Subject: [PATCH 723/867] net: stmmac: ingenic: simplify mac_set_mode() methods x1000, x1600 and x1830 only accept RMII mode. PHY_INTF_SEL_RMII is only selected with PHY_INTERFACE_MODE_RMII, and PHY_INTF_SEL_RMII has been validated by the SoC's .valid_phy_intf_sel bitmask. Thus, checking the interface mode in these functions becomes unnecessary. Remove these. jz4775 is similar, except for a greater set of PHY_INTF_SEL_x valies. Also remove the switch statement here. Reviewed-by: Maxime Chevallier Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vHHqI-0000000DjrV-3ygL@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-ingenic.c | 50 +------------------ 1 file changed, 2 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c index 539513890db1f..7b2576fbb1e12 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c @@ -75,22 +75,6 @@ static int jz4775_mac_set_mode(struct plat_stmmacenet_data *plat_dat, struct ingenic_mac *mac = plat_dat->bsp_priv; unsigned int val; - switch (plat_dat->phy_interface) { - case PHY_INTERFACE_MODE_MII: - case PHY_INTERFACE_MODE_GMII: - case PHY_INTERFACE_MODE_RMII: - case PHY_INTERFACE_MODE_RGMII: - case PHY_INTERFACE_MODE_RGMII_ID: - case PHY_INTERFACE_MODE_RGMII_TXID: - case PHY_INTERFACE_MODE_RGMII_RXID: - break; - - default: - dev_err(mac->dev, "Unsupported interface %s\n", - phy_modes(plat_dat->phy_interface)); - return -EINVAL; - } - val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, phy_intf_sel) | FIELD_PREP(MACPHYC_TXCLK_SEL_MASK, MACPHYC_TXCLK_SEL_INPUT); @@ -103,16 +87,6 @@ static int x1000_mac_set_mode(struct plat_stmmacenet_data *plat_dat, { struct ingenic_mac *mac = plat_dat->bsp_priv; - switch (plat_dat->phy_interface) { - case PHY_INTERFACE_MODE_RMII: - break; - - default: - dev_err(mac->dev, "Unsupported interface %s\n", - phy_modes(plat_dat->phy_interface)); - return -EINVAL; - } - /* Update MAC PHY control register */ return regmap_update_bits(mac->regmap, 0, mac->soc_info->mask, 0); } @@ -123,16 +97,6 @@ static int x1600_mac_set_mode(struct plat_stmmacenet_data *plat_dat, struct ingenic_mac *mac = plat_dat->bsp_priv; unsigned int val; - switch (plat_dat->phy_interface) { - case PHY_INTERFACE_MODE_RMII: - break; - - default: - dev_err(mac->dev, "Unsupported interface %s\n", - phy_modes(plat_dat->phy_interface)); - return -EINVAL; - } - val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, phy_intf_sel); /* Update MAC PHY control register */ @@ -145,18 +109,8 @@ static int x1830_mac_set_mode(struct plat_stmmacenet_data *plat_dat, struct ingenic_mac *mac = plat_dat->bsp_priv; unsigned int val; - switch (plat_dat->phy_interface) { - case PHY_INTERFACE_MODE_RMII: - val = FIELD_PREP(MACPHYC_MODE_SEL_MASK, MACPHYC_MODE_SEL_RMII); - break; - - default: - dev_err(mac->dev, "Unsupported interface %s\n", - phy_modes(plat_dat->phy_interface)); - return -EINVAL; - } - - val |= FIELD_PREP(MACPHYC_PHY_INFT_MASK, phy_intf_sel); + val = FIELD_PREP(MACPHYC_MODE_SEL_MASK, MACPHYC_MODE_SEL_RMII) | + FIELD_PREP(MACPHYC_PHY_INFT_MASK, phy_intf_sel); /* Update MAC PHY control register */ return regmap_update_bits(mac->regmap, 0, mac->soc_info->mask, val); From 2284cca0bced3cd8c1cb643d1ae6923a75e47265 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 7 Nov 2025 08:29:16 +0000 Subject: [PATCH 724/867] net: stmmac: ingenic: simplify x2000 mac_set_mode() As per the previous commit, we have validated that the phy_intf_sel value is one that is permissible for this SoC, so there is no need to handle invalid PHY interface modes. We can also apply the other configuration based upon the phy_intf_sel value rather than the PHY interface mode. Reviewed-by: Maxime Chevallier Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vHHqO-0000000Djrb-0DPN@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-ingenic.c | 28 +++++-------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c index 7b2576fbb1e12..eb5744e0b9ea1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c @@ -122,39 +122,25 @@ static int x2000_mac_set_mode(struct plat_stmmacenet_data *plat_dat, struct ingenic_mac *mac = plat_dat->bsp_priv; unsigned int val; - switch (plat_dat->phy_interface) { - case PHY_INTERFACE_MODE_RMII: - val = FIELD_PREP(MACPHYC_TX_SEL_MASK, MACPHYC_TX_SEL_ORIGIN) | - FIELD_PREP(MACPHYC_RX_SEL_MASK, MACPHYC_RX_SEL_ORIGIN); - break; - - case PHY_INTERFACE_MODE_RGMII: - case PHY_INTERFACE_MODE_RGMII_ID: - case PHY_INTERFACE_MODE_RGMII_TXID: - case PHY_INTERFACE_MODE_RGMII_RXID: - val = 0; + val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, phy_intf_sel); + + if (phy_intf_sel == PHY_INTF_SEL_RMII) { + val |= FIELD_PREP(MACPHYC_TX_SEL_MASK, MACPHYC_TX_SEL_ORIGIN) | + FIELD_PREP(MACPHYC_RX_SEL_MASK, MACPHYC_RX_SEL_ORIGIN); + } else if (phy_intf_sel == PHY_INTF_SEL_RGMII) { if (mac->tx_delay == 0) val |= FIELD_PREP(MACPHYC_TX_SEL_MASK, MACPHYC_TX_SEL_ORIGIN); else val |= FIELD_PREP(MACPHYC_TX_SEL_MASK, MACPHYC_TX_SEL_DELAY) | - FIELD_PREP(MACPHYC_TX_DELAY_MASK, (mac->tx_delay + 9750) / 19500 - 1); + FIELD_PREP(MACPHYC_TX_DELAY_MASK, (mac->tx_delay + 9750) / 19500 - 1); if (mac->rx_delay == 0) val |= FIELD_PREP(MACPHYC_RX_SEL_MASK, MACPHYC_RX_SEL_ORIGIN); else val |= FIELD_PREP(MACPHYC_RX_SEL_MASK, MACPHYC_RX_SEL_DELAY) | FIELD_PREP(MACPHYC_RX_DELAY_MASK, (mac->rx_delay + 9750) / 19500 - 1); - - break; - - default: - dev_err(mac->dev, "Unsupported interface %s\n", - phy_modes(plat_dat->phy_interface)); - return -EINVAL; } - val |= FIELD_PREP(MACPHYC_PHY_INFT_MASK, phy_intf_sel); - /* Update MAC PHY control register */ return regmap_update_bits(mac->regmap, 0, mac->soc_info->mask, val); } From 9352f74fd13de34468be95291bdb0782aac8ca3d Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 7 Nov 2025 08:29:21 +0000 Subject: [PATCH 725/867] net: stmmac: ingenic: pass ingenic_mac struct rather than plat_dat It no longer makes sense to pass a pointer to struct plat_stmmacenet_data when calling the set_mode() methods to only use it to get a pointer to the ingenic_mac structure that we already had in the caller. Simplify this by passing the struct ingenic_mac pointer. Reviewed-by: Maxime Chevallier Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vHHqT-0000000Djrh-0ka3@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-ingenic.c | 25 ++++++------------- 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c index eb5744e0b9ea1..41a2071262bc7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c @@ -64,15 +64,13 @@ struct ingenic_soc_info { enum ingenic_mac_version version; u32 mask; - int (*set_mode)(struct plat_stmmacenet_data *plat_dat, u8 phy_intf_sel); + int (*set_mode)(struct ingenic_mac *mac, u8 phy_intf_sel); u8 valid_phy_intf_sel; }; -static int jz4775_mac_set_mode(struct plat_stmmacenet_data *plat_dat, - u8 phy_intf_sel) +static int jz4775_mac_set_mode(struct ingenic_mac *mac, u8 phy_intf_sel) { - struct ingenic_mac *mac = plat_dat->bsp_priv; unsigned int val; val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, phy_intf_sel) | @@ -82,19 +80,14 @@ static int jz4775_mac_set_mode(struct plat_stmmacenet_data *plat_dat, return regmap_update_bits(mac->regmap, 0, mac->soc_info->mask, val); } -static int x1000_mac_set_mode(struct plat_stmmacenet_data *plat_dat, - u8 phy_intf_sel) +static int x1000_mac_set_mode(struct ingenic_mac *mac, u8 phy_intf_sel) { - struct ingenic_mac *mac = plat_dat->bsp_priv; - /* Update MAC PHY control register */ return regmap_update_bits(mac->regmap, 0, mac->soc_info->mask, 0); } -static int x1600_mac_set_mode(struct plat_stmmacenet_data *plat_dat, - u8 phy_intf_sel) +static int x1600_mac_set_mode(struct ingenic_mac *mac, u8 phy_intf_sel) { - struct ingenic_mac *mac = plat_dat->bsp_priv; unsigned int val; val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, phy_intf_sel); @@ -103,10 +96,8 @@ static int x1600_mac_set_mode(struct plat_stmmacenet_data *plat_dat, return regmap_update_bits(mac->regmap, 0, mac->soc_info->mask, val); } -static int x1830_mac_set_mode(struct plat_stmmacenet_data *plat_dat, - u8 phy_intf_sel) +static int x1830_mac_set_mode(struct ingenic_mac *mac, u8 phy_intf_sel) { - struct ingenic_mac *mac = plat_dat->bsp_priv; unsigned int val; val = FIELD_PREP(MACPHYC_MODE_SEL_MASK, MACPHYC_MODE_SEL_RMII) | @@ -116,10 +107,8 @@ static int x1830_mac_set_mode(struct plat_stmmacenet_data *plat_dat, return regmap_update_bits(mac->regmap, 0, mac->soc_info->mask, val); } -static int x2000_mac_set_mode(struct plat_stmmacenet_data *plat_dat, - u8 phy_intf_sel) +static int x2000_mac_set_mode(struct ingenic_mac *mac, u8 phy_intf_sel) { - struct ingenic_mac *mac = plat_dat->bsp_priv; unsigned int val; val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, phy_intf_sel); @@ -165,7 +154,7 @@ static int ingenic_mac_init(struct platform_device *pdev, void *bsp_priv) dev_dbg(mac->dev, "MAC PHY control register: interface %s\n", phy_modes(interface)); - ret = mac->soc_info->set_mode(mac->plat_dat, phy_intf_sel); + ret = mac->soc_info->set_mode(mac, phy_intf_sel); if (ret) return ret; } From 34bf68a691227faac490a313b7925ed10cf8de15 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 7 Nov 2025 08:29:26 +0000 Subject: [PATCH 726/867] net: stmmac: ingenic: use ->set_phy_intf_sel() Rather than placing the phy_intf_sel() setup in the ->init() method, move it to the new ->set_phy_intf_sel() method. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vHHqY-0000000Djrn-1D6H@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-ingenic.c | 33 +++++++------------ 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c index 41a2071262bc7..8e4a30c11db06 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c @@ -134,32 +134,21 @@ static int x2000_mac_set_mode(struct ingenic_mac *mac, u8 phy_intf_sel) return regmap_update_bits(mac->regmap, 0, mac->soc_info->mask, val); } -static int ingenic_mac_init(struct platform_device *pdev, void *bsp_priv) +static int ingenic_set_phy_intf_sel(void *bsp_priv, u8 phy_intf_sel) { struct ingenic_mac *mac = bsp_priv; - phy_interface_t interface; - int phy_intf_sel, ret; - - if (mac->soc_info->set_mode) { - interface = mac->plat_dat->phy_interface; - - phy_intf_sel = stmmac_get_phy_intf_sel(interface); - if (phy_intf_sel < 0 || phy_intf_sel >= BITS_PER_BYTE || - ~mac->soc_info->valid_phy_intf_sel & BIT(phy_intf_sel)) { - dev_err(mac->dev, "unsupported interface %s\n", - phy_modes(interface)); - return phy_intf_sel < 0 ? phy_intf_sel : -EINVAL; - } - dev_dbg(mac->dev, "MAC PHY control register: interface %s\n", - phy_modes(interface)); + if (!mac->soc_info->set_mode) + return 0; - ret = mac->soc_info->set_mode(mac, phy_intf_sel); - if (ret) - return ret; - } + if (phy_intf_sel >= BITS_PER_BYTE || + ~mac->soc_info->valid_phy_intf_sel & BIT(phy_intf_sel)) + return -EINVAL; + + dev_dbg(mac->dev, "MAC PHY control register: interface %s\n", + phy_modes(mac->plat_dat->phy_interface)); - return 0; + return mac->soc_info->set_mode(mac, phy_intf_sel); } static int ingenic_mac_probe(struct platform_device *pdev) @@ -221,7 +210,7 @@ static int ingenic_mac_probe(struct platform_device *pdev) mac->plat_dat = plat_dat; plat_dat->bsp_priv = mac; - plat_dat->init = ingenic_mac_init; + plat_dat->set_phy_intf_sel = ingenic_set_phy_intf_sel; return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); } From 40fef85ceb9cce2021f7ba5ca6e3212baa193cb3 Mon Sep 17 00:00:00 2001 From: Ankit Garg Date: Thu, 6 Nov 2025 11:27:43 -0800 Subject: [PATCH 727/867] gve: Decouple header split from RX buffer length Previously, enabling header split via `gve_set_hsplit_config` also implicitly changed the RX buffer length to 4K (if supported by the device). This coupled two settings that should be orthogonal; this patch removes that side effect. After this change, `gve_set_hsplit_config` only toggles the header split configuration. The RX buffer length is no longer affected and must be configured independently. Signed-off-by: Ankit Garg Reviewed-by: Harshitha Ramamurthy Reviewed-by: Jordan Rhee Reviewed-by: Willem de Bruijn Signed-off-by: Joshua Washington Link: https://patch.msgid.link/20251106192746.243525-2-joshwash@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve.h | 3 --- drivers/net/ethernet/google/gve/gve_ethtool.c | 2 -- drivers/net/ethernet/google/gve/gve_main.c | 10 ---------- 3 files changed, 15 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index cf95ec25b11a3..c237d00c5ab39 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -59,8 +59,6 @@ #define GVE_DEFAULT_RX_BUFFER_SIZE 2048 -#define GVE_MAX_RX_BUFFER_SIZE 4096 - #define GVE_XDP_RX_BUFFER_SIZE_DQO 4096 #define GVE_DEFAULT_RX_BUFFER_OFFSET 2048 @@ -1249,7 +1247,6 @@ void gve_rx_free_rings_gqi(struct gve_priv *priv, struct gve_rx_alloc_rings_cfg *cfg); void gve_rx_start_ring_gqi(struct gve_priv *priv, int idx); void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx); -u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hplit); bool gve_header_split_supported(const struct gve_priv *priv); int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split, struct gve_rx_alloc_rings_cfg *rx_alloc_cfg); diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index b030a84b678ce..db6fc855a511d 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -606,8 +606,6 @@ static int gve_set_ringparam(struct net_device *netdev, } else { /* Set ring params for the next up */ priv->header_split_enabled = rx_alloc_cfg.enable_header_split; - priv->rx_cfg.packet_buffer_size = - rx_alloc_cfg.packet_buffer_size; priv->tx_desc_cnt = tx_alloc_cfg.ring_size; priv->rx_desc_cnt = rx_alloc_cfg.ring_size; } diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 29845e8f3c0dc..8d825218965ab 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -2041,14 +2041,6 @@ static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) priv->tx_timeo_cnt++; } -u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit) -{ - if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE) - return GVE_MAX_RX_BUFFER_SIZE; - else - return GVE_DEFAULT_RX_BUFFER_SIZE; -} - /* Header split is only supported on DQ RDA queue format. If XDP is enabled, * header split is not allowed. */ @@ -2080,8 +2072,6 @@ int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split, return 0; rx_alloc_cfg->enable_header_split = enable_hdr_split; - rx_alloc_cfg->packet_buffer_size = - gve_get_pkt_buf_size(priv, enable_hdr_split); return 0; } From 091a3b6ff2b98354270cb9278faad6d17d5aa27d Mon Sep 17 00:00:00 2001 From: Ankit Garg Date: Thu, 6 Nov 2025 11:27:44 -0800 Subject: [PATCH 728/867] gve: Use extack to log xdp config verification errors Plumb extack as it allows us to send more detailed error messages back and append 'gve' suffix to method name per convention. NL_SET_ERR_MSG_FMT_MOD doesn't support format string longer than 80 chars so keeping netdev warning with actual queue count details. Signed-off-by: Ankit Garg Reviewed-by: Harshitha Ramamurthy Reviewed-by: Willem de Bruijn Signed-off-by: Joshua Washington Link: https://patch.msgid.link/20251106192746.243525-3-joshwash@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_main.c | 24 ++++++++++++++-------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 8d825218965ab..616182f4fb81b 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1707,18 +1707,21 @@ static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) return 0; } -static int verify_xdp_configuration(struct net_device *dev) +static int gve_verify_xdp_configuration(struct net_device *dev, + struct netlink_ext_ack *extack) { struct gve_priv *priv = netdev_priv(dev); u16 max_xdp_mtu; if (dev->features & NETIF_F_LRO) { - netdev_warn(dev, "XDP is not supported when LRO is on.\n"); + NL_SET_ERR_MSG_MOD(extack, + "XDP is not supported when LRO is on."); return -EOPNOTSUPP; } if (priv->header_split_enabled) { - netdev_warn(dev, "XDP is not supported when header-data split is enabled.\n"); + NL_SET_ERR_MSG_MOD(extack, + "XDP is not supported when header-data split is enabled."); return -EOPNOTSUPP; } @@ -1727,17 +1730,20 @@ static int verify_xdp_configuration(struct net_device *dev) max_xdp_mtu -= GVE_RX_PAD; if (dev->mtu > max_xdp_mtu) { - netdev_warn(dev, "XDP is not supported for mtu %d.\n", - dev->mtu); + NL_SET_ERR_MSG_FMT_MOD(extack, + "XDP is not supported for mtu %d.", + dev->mtu); return -EOPNOTSUPP; } if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { - netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d", - priv->rx_cfg.num_queues, - priv->tx_cfg.num_queues, + netdev_warn(dev, + "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d.", + priv->rx_cfg.num_queues, priv->tx_cfg.num_queues, priv->tx_cfg.max_queues); + NL_SET_ERR_MSG_MOD(extack, + "XDP load failed: The number of configured RX queues should be equal to the number of configured TX queues and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues"); return -EINVAL; } return 0; @@ -1748,7 +1754,7 @@ static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) struct gve_priv *priv = netdev_priv(dev); int err; - err = verify_xdp_configuration(dev); + err = gve_verify_xdp_configuration(dev, xdp->extack); if (err) return err; switch (xdp->command) { From d235bb213f411ace8317bcca3740a1008628ea9c Mon Sep 17 00:00:00 2001 From: Ankit Garg Date: Thu, 6 Nov 2025 11:27:45 -0800 Subject: [PATCH 729/867] gve: Allow ethtool to configure rx_buf_len Add support for getting and setting the RX buffer length via the ethtool ring parameters (`ethtool -g`/`-G`). The driver restricts the allowed buffer length to 2048 (SZ_2K) by default and allows 4096 (SZ_4K) based on device options. As XDP is only supported when the `rx_buf_len` is 2048, the driver now enforces this in two places: 1. In `gve_xdp_set`, rejecting XDP programs if the current buffer length is not 2048. 2. In `gve_set_rx_buf_len_config`, rejecting buffer length changes if XDP is loaded and the new length is not 2048. Signed-off-by: Ankit Garg Reviewed-by: Harshitha Ramamurthy Reviewed-by: Jordan Rhee Reviewed-by: Willem de Bruijn Signed-off-by: Joshua Washington Link: https://patch.msgid.link/20251106192746.243525-4-joshwash@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve.h | 9 +++++ drivers/net/ethernet/google/gve/gve_ethtool.c | 13 ++++++- drivers/net/ethernet/google/gve/gve_main.c | 39 +++++++++++++++++++ 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index c237d00c5ab39..a33b44c1eb862 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -1167,6 +1167,12 @@ static inline bool gve_is_gqi(struct gve_priv *priv) priv->queue_format == GVE_GQI_QPL_FORMAT; } +static inline bool gve_is_dqo(struct gve_priv *priv) +{ + return priv->queue_format == GVE_DQO_RDA_FORMAT || + priv->queue_format == GVE_DQO_QPL_FORMAT; +} + static inline u32 gve_num_tx_queues(struct gve_priv *priv) { return priv->tx_cfg.num_queues + priv->tx_cfg.num_xdp_queues; @@ -1248,6 +1254,9 @@ void gve_rx_free_rings_gqi(struct gve_priv *priv, void gve_rx_start_ring_gqi(struct gve_priv *priv, int idx); void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx); bool gve_header_split_supported(const struct gve_priv *priv); +int gve_set_rx_buf_len_config(struct gve_priv *priv, u32 rx_buf_len, + struct netlink_ext_ack *extack, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg); int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split, struct gve_rx_alloc_rings_cfg *rx_alloc_cfg); /* rx buffer handling */ diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index db6fc855a511d..52500ae8348e6 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -529,6 +529,8 @@ static void gve_get_ringparam(struct net_device *netdev, cmd->rx_pending = priv->rx_desc_cnt; cmd->tx_pending = priv->tx_desc_cnt; + kernel_cmd->rx_buf_len = priv->rx_cfg.packet_buffer_size; + if (!gve_header_split_supported(priv)) kernel_cmd->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_UNKNOWN; else if (priv->header_split_enabled) @@ -589,6 +591,12 @@ static int gve_set_ringparam(struct net_device *netdev, int err; gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + + err = gve_set_rx_buf_len_config(priv, kernel_cmd->rx_buf_len, extack, + &rx_alloc_cfg); + if (err) + return err; + err = gve_set_hsplit_config(priv, kernel_cmd->tcp_data_split, &rx_alloc_cfg); if (err) @@ -605,6 +613,8 @@ static int gve_set_ringparam(struct net_device *netdev, return err; } else { /* Set ring params for the next up */ + priv->rx_cfg.packet_buffer_size = + rx_alloc_cfg.packet_buffer_size; priv->header_split_enabled = rx_alloc_cfg.enable_header_split; priv->tx_desc_cnt = tx_alloc_cfg.ring_size; priv->rx_desc_cnt = rx_alloc_cfg.ring_size; @@ -944,7 +954,8 @@ static int gve_get_ts_info(struct net_device *netdev, const struct ethtool_ops gve_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS, - .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT, + .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT | + ETHTOOL_RING_USE_RX_BUF_LEN, .get_drvinfo = gve_get_drvinfo, .get_strings = gve_get_strings, .get_sset_count = gve_get_sset_count, diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 616182f4fb81b..6fb8fbb38a7d3 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1725,6 +1725,13 @@ static int gve_verify_xdp_configuration(struct net_device *dev, return -EOPNOTSUPP; } + if (priv->rx_cfg.packet_buffer_size != SZ_2K) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "XDP is not supported for Rx buf len %d, only %d supported.", + priv->rx_cfg.packet_buffer_size, SZ_2K); + return -EOPNOTSUPP; + } + max_xdp_mtu = priv->rx_cfg.packet_buffer_size - sizeof(struct ethhdr); if (priv->queue_format == GVE_GQI_QPL_FORMAT) max_xdp_mtu -= GVE_RX_PAD; @@ -2056,6 +2063,38 @@ bool gve_header_split_supported(const struct gve_priv *priv) priv->queue_format == GVE_DQO_RDA_FORMAT && !priv->xdp_prog; } +int gve_set_rx_buf_len_config(struct gve_priv *priv, u32 rx_buf_len, + struct netlink_ext_ack *extack, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +{ + u32 old_rx_buf_len = rx_alloc_cfg->packet_buffer_size; + + if (rx_buf_len == old_rx_buf_len) + return 0; + + /* device options may not always contain support for 4K buffers */ + if (!gve_is_dqo(priv) || priv->max_rx_buffer_size < SZ_4K) { + NL_SET_ERR_MSG_MOD(extack, + "Modifying Rx buf len is not supported"); + return -EOPNOTSUPP; + } + + if (priv->xdp_prog && rx_buf_len != SZ_2K) { + NL_SET_ERR_MSG_MOD(extack, + "Rx buf len can only be 2048 when XDP is on"); + return -EINVAL; + } + + if (rx_buf_len != SZ_2K && rx_buf_len != SZ_4K) { + NL_SET_ERR_MSG_MOD(extack, + "Rx buf len can only be 2048 or 4096"); + return -EINVAL; + } + rx_alloc_cfg->packet_buffer_size = rx_buf_len; + + return 0; +} + int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split, struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) { From 09a81a0f4fb7346f80a7340f725aedccff8ef8e4 Mon Sep 17 00:00:00 2001 From: Ankit Garg Date: Thu, 6 Nov 2025 11:27:46 -0800 Subject: [PATCH 730/867] gve: Default to max_rx_buffer_size for DQO if device supported Change the driver's default behavior to prefer the largest available RX buffer length supported by the device for DQO format, rather than always using the hardcoded 2K default. Previously, the driver would initialize with `GVE_DEFAULT_RX_BUFFER_SIZE` (2K), even if the device advertised support for a larger length (e.g., 4K). Performance observations: - With LRO disabled, we observed >10% improvement in RX single stream throughput when MTU >=2048. - With LRO enabled, we observed >10% improvement in RX single stream throughput when MTU >=1460. - No regressions were observed. Signed-off-by: Ankit Garg Reviewed-by: Harshitha Ramamurthy Reviewed-by: Jordan Rhee Reviewed-by: Willem de Bruijn Signed-off-by: Joshua Washington Link: https://patch.msgid.link/20251106192746.243525-5-joshwash@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_adminq.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 4f33d094a2ef4..b72cc0fa2ba2b 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -987,6 +987,10 @@ static void gve_enable_supported_features(struct gve_priv *priv, dev_info(&priv->pdev->dev, "BUFFER SIZES device option enabled with max_rx_buffer_size of %u, header_buf_size of %u.\n", priv->max_rx_buffer_size, priv->header_buf_size); + if (gve_is_dqo(priv) && + priv->max_rx_buffer_size > GVE_DEFAULT_RX_BUFFER_SIZE) + priv->rx_cfg.packet_buffer_size = + priv->max_rx_buffer_size; } /* Read and store ring size ranges given by device */ From 2554559aba883803475e4ca4fae22eaad6d33d86 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 6 Nov 2025 19:02:52 +0100 Subject: [PATCH 731/867] bonding: fix mii_status when slave is down netif_carrier_ok() doesn't check if the slave is up. Before the below commit, netif_running() was also checked. Fixes: 23a6037ce76c ("bonding: Remove support for use_carrier") Signed-off-by: Nicolas Dichtel Acked-by: Jay Vosburgh Link: https://patch.msgid.link/20251106180252.3974772-1-nicolas.dichtel@6wind.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index e95e593cd12d7..5abef8a3b7758 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2120,7 +2120,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, /* check for initial state */ new_slave->link = BOND_LINK_NOCHANGE; if (bond->params.miimon) { - if (netif_carrier_ok(slave_dev)) { + if (netif_running(slave_dev) && netif_carrier_ok(slave_dev)) { if (bond->params.updelay) { bond_set_slave_link_state(new_slave, BOND_LINK_BACK, @@ -2665,7 +2665,8 @@ static int bond_miimon_inspect(struct bonding *bond) bond_for_each_slave_rcu(bond, slave, iter) { bond_propose_link_state(slave, BOND_LINK_NOCHANGE); - link_state = netif_carrier_ok(slave->dev); + link_state = netif_running(slave->dev) && + netif_carrier_ok(slave->dev); switch (slave->link) { case BOND_LINK_UP: From ec33f2e5a2d0dbbfd71435209aee812fdc9369b8 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Fri, 7 Nov 2025 10:40:29 +0800 Subject: [PATCH 732/867] net/smc: fix mismatch between CLC header and proposal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current CLC proposal message construction uses a mix of `ini->smc_type_v1/v2` and `pclc_base->hdr.typev1/v2` to decide whether to include optional extensions (IPv6 prefix extension for v1, and v2 extension). This leads to a critical inconsistency: when `smc_clc_prfx_set()` fails - for example, in IPv6-only environments with only link-local addresses, or when the local IP address and the outgoing interface’s network address are not in the same subnet. As a result, the proposal message is assembled using the stale `ini->smc_type_v1` value—causing the IPv6 prefix extension to be included even though the header indicates v1 is not supported. The peer then receives a malformed CLC proposal where the header type does not match the payload, and immediately resets the connection. The fix ensures consistency between the CLC header flags and the actual payload by synchronizing `ini->smc_type_v1` with `pclc_base->hdr.typev1` when prefix setup fails. Fixes: 8c3dca341aea ("net/smc: build and send V2 CLC proposal") Signed-off-by: D. Wythe Reviewed-by: Alexandra Winter Link: https://patch.msgid.link/20251107024029.88753-1-alibuda@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/smc/smc_clc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 157aace169d49..87c87edadde71 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -890,6 +890,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) return SMC_CLC_DECL_CNFERR; } pclc_base->hdr.typev1 = SMC_TYPE_N; + ini->smc_type_v1 = SMC_TYPE_N; } else { pclc_base->iparea_offset = htons(sizeof(*pclc_smcd)); plen += sizeof(*pclc_prfx) + From 3072f00bba764082fa41b3c3a2a7b013335353d2 Mon Sep 17 00:00:00 2001 From: Zilin Guan Date: Thu, 6 Nov 2025 14:45:11 +0000 Subject: [PATCH 733/867] net/handshake: Fix memory leak in tls_handshake_accept() In tls_handshake_accept(), a netlink message is allocated using genlmsg_new(). In the error handling path, genlmsg_cancel() is called to cancel the message construction, but the message itself is not freed. This leads to a memory leak. Fix this by calling nlmsg_free() in the error path after genlmsg_cancel() to release the allocated memory. Fixes: 2fd5532044a89 ("net/handshake: Add a kernel API for requesting a TLSv1.3 handshake") Signed-off-by: Zilin Guan Reviewed-by: Chuck Lever Link: https://patch.msgid.link/20251106144511.3859535-1-zilin@seu.edu.cn Signed-off-by: Jakub Kicinski --- net/handshake/tlshd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c index 081093dfd5533..8f9532a15f43f 100644 --- a/net/handshake/tlshd.c +++ b/net/handshake/tlshd.c @@ -259,6 +259,7 @@ static int tls_handshake_accept(struct handshake_req *req, out_cancel: genlmsg_cancel(msg, hdr); + nlmsg_free(msg); out: return ret; } From 49b3916465176a5abcb29a0e464825f553d55d58 Mon Sep 17 00:00:00 2001 From: Aksh Garg Date: Thu, 6 Nov 2025 14:53:04 +0530 Subject: [PATCH 734/867] net: ethernet: ti: am65-cpsw-qos: fix IET verify/response timeout The CPSW module uses the MAC_VERIFY_CNT bit field in the CPSW_PN_IET_VERIFY_REG_k register to set the verify/response timeout count. This register specifies the number of clock cycles to wait before resending a verify packet if the verification fails. The verify/response timeout count, as being set by the function am65_cpsw_iet_set_verify_timeout_count() is hardcoded for 125MHz clock frequency, which varies based on PHY mode and link speed. The respective clock frequencies are as follows: - RGMII mode: * 1000 Mbps: 125 MHz * 100 Mbps: 25 MHz * 10 Mbps: 2.5 MHz - QSGMII/SGMII mode: 125 MHz (all speeds) Fix this by adding logic to calculate the correct timeout counts based on the actual PHY interface mode and link speed. Fixes: 49a2eb9068246 ("net: ethernet: ti: am65-cpsw-qos: Add Frame Preemption MAC Merge support") Signed-off-by: Aksh Garg Link: https://patch.msgid.link/20251106092305.1437347-2-a-garg7@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-qos.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-qos.c b/drivers/net/ethernet/ti/am65-cpsw-qos.c index fa96db7c1a130..ad06942ce461a 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-qos.c +++ b/drivers/net/ethernet/ti/am65-cpsw-qos.c @@ -276,9 +276,31 @@ static int am65_cpsw_iet_set_verify_timeout_count(struct am65_cpsw_port *port) /* The number of wireside clocks contained in the verify * timeout counter. The default is 0x1312d0 * (10ms at 125Mhz in 1G mode). + * The frequency of the clock depends on the link speed + * and the PHY interface. */ - val = 125 * HZ_PER_MHZ; /* assuming 125MHz wireside clock */ + switch (port->slave.phy_if) { + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: + if (port->qos.link_speed == SPEED_1000) + val = 125 * HZ_PER_MHZ; /* 125 MHz at 1000Mbps*/ + else if (port->qos.link_speed == SPEED_100) + val = 25 * HZ_PER_MHZ; /* 25 MHz at 100Mbps*/ + else + val = (25 * HZ_PER_MHZ) / 10; /* 2.5 MHz at 10Mbps*/ + break; + + case PHY_INTERFACE_MODE_QSGMII: + case PHY_INTERFACE_MODE_SGMII: + val = 125 * HZ_PER_MHZ; /* 125 MHz */ + break; + default: + netdev_err(port->ndev, "selected mode does not supported IET\n"); + return -EOPNOTSUPP; + } val /= MILLIHZ_PER_HZ; /* count per ms timeout */ val *= verify_time_ms; /* count for timeout ms */ From d4b00d132d7cb70a74bc039c91c1d6120943c71b Mon Sep 17 00:00:00 2001 From: Aksh Garg Date: Thu, 6 Nov 2025 14:53:05 +0530 Subject: [PATCH 735/867] net: ethernet: ti: am65-cpsw-qos: fix IET verify retry mechanism The am65_cpsw_iet_verify_wait() function attempts verification 20 times, toggling the AM65_CPSW_PN_IET_MAC_LINKFAIL bit in each iteration. When the LINKFAIL bit transitions from 1 to 0, the MAC merge layer initiates the verification process and waits for the timeout configured in MAC_VERIFY_CNT before automatically retransmitting. The MAC_VERIFY_CNT register is configured according to the user-defined verify/response timeout in am65_cpsw_iet_set_verify_timeout_count(). As per IEEE 802.3 Clause 99, the hardware performs this automatic retry up to 3 times. Current implementation toggles LINKFAIL after the user-configured verify/response timeout in each iteration, forcing the hardware to restart verification instead of respecting the MAC_VERIFY_CNT timeout. This bypasses the hardware's automatic retry mechanism. Fix this by moving the LINKFAIL bit toggle outside the retry loop and reducing the retry count from 20 to 3. The software now only monitors the status register while the hardware autonomously handles the 3 verification attempts at proper MAC_VERIFY_CNT intervals. Fixes: 49a2eb9068246 ("net: ethernet: ti: am65-cpsw-qos: Add Frame Preemption MAC Merge support") Signed-off-by: Aksh Garg Link: https://patch.msgid.link/20251106092305.1437347-3-a-garg7@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-qos.c | 27 +++++++++++++------------ 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-qos.c b/drivers/net/ethernet/ti/am65-cpsw-qos.c index ad06942ce461a..66e8b224827b6 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-qos.c +++ b/drivers/net/ethernet/ti/am65-cpsw-qos.c @@ -317,20 +317,21 @@ static int am65_cpsw_iet_verify_wait(struct am65_cpsw_port *port) u32 ctrl, status; int try; - try = 20; - do { - /* Reset the verify state machine by writing 1 - * to LINKFAIL - */ - ctrl = readl(port->port_base + AM65_CPSW_PN_REG_IET_CTRL); - ctrl |= AM65_CPSW_PN_IET_MAC_LINKFAIL; - writel(ctrl, port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + try = 3; - /* Clear MAC_LINKFAIL bit to start Verify. */ - ctrl = readl(port->port_base + AM65_CPSW_PN_REG_IET_CTRL); - ctrl &= ~AM65_CPSW_PN_IET_MAC_LINKFAIL; - writel(ctrl, port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + /* Reset the verify state machine by writing 1 + * to LINKFAIL + */ + ctrl = readl(port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + ctrl |= AM65_CPSW_PN_IET_MAC_LINKFAIL; + writel(ctrl, port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + /* Clear MAC_LINKFAIL bit to start Verify. */ + ctrl = readl(port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + ctrl &= ~AM65_CPSW_PN_IET_MAC_LINKFAIL; + writel(ctrl, port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + + do { msleep(port->qos.iet.verify_time_ms); status = readl(port->port_base + AM65_CPSW_PN_REG_IET_STATUS); @@ -352,7 +353,7 @@ static int am65_cpsw_iet_verify_wait(struct am65_cpsw_port *port) netdev_dbg(port->ndev, "MAC Merge verify error\n"); return -ENODEV; } - } while (try-- > 0); + } while (--try > 0); netdev_dbg(port->ndev, "MAC Merge verify timeout\n"); return -ETIMEDOUT; From e483a615a609d558d7ca8c161f6aedfb39226e7b Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Fri, 7 Nov 2025 14:44:52 +0100 Subject: [PATCH 736/867] isdn: kcapi: add WQ_PERCPU to alloc_workqueue users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently if a user enqueues a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistency cannot be addressed without refactoring the API. alloc_workqueue() treats all queues as per-CPU by default, while unbound workqueues must opt-in via WQ_UNBOUND. This default is suboptimal: most workloads benefit from unbound queues, allowing the scheduler to place worker threads where they’re needed and reducing noise when CPUs are isolated. This continues the effort to refactor workqueue APIs, which began with the introduction of new workqueues and a new alloc_workqueue flag in: commit 128ea9f6ccfb ("workqueue: Add system_percpu_wq and system_dfl_wq") commit 930c2ea566af ("workqueue: Add new WQ_PERCPU flag") This change adds a new WQ_PERCPU flag to explicitly request alloc_workqueue() to be per-cpu when WQ_UNBOUND has not been specified. With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND), any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND must now use WQ_PERCPU. Once migration is complete, WQ_UNBOUND can be removed and unbound will become the implicit default. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Link: https://patch.msgid.link/20251107134452.198378-1-marco.crivellari@suse.com Signed-off-by: Jakub Kicinski --- drivers/isdn/capi/kcapi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c index c5d13bdc239be..e8f7e52354bc2 100644 --- a/drivers/isdn/capi/kcapi.c +++ b/drivers/isdn/capi/kcapi.c @@ -907,7 +907,7 @@ int __init kcapi_init(void) { int err; - kcapi_wq = alloc_workqueue("kcapi", 0, 0); + kcapi_wq = alloc_workqueue("kcapi", WQ_PERCPU, 0); if (!kcapi_wq) return -ENOMEM; From 0725e6afb55128be21a2ca36e9674f573ccec173 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 7 Nov 2025 06:40:25 +0000 Subject: [PATCH 737/867] tipc: Fix use-after-free in tipc_mon_reinit_self(). syzbot reported use-after-free of tipc_net(net)->monitors[] in tipc_mon_reinit_self(). [0] The array is protected by RTNL, but tipc_mon_reinit_self() iterates over it without RTNL. tipc_mon_reinit_self() is called from tipc_net_finalize(), which is always under RTNL except for tipc_net_finalize_work(). Let's hold RTNL in tipc_net_finalize_work(). [0]: BUG: KASAN: slab-use-after-free in __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] BUG: KASAN: slab-use-after-free in _raw_spin_lock_irqsave+0xa7/0xf0 kernel/locking/spinlock.c:162 Read of size 1 at addr ffff88805eae1030 by task kworker/0:7/5989 CPU: 0 UID: 0 PID: 5989 Comm: kworker/0:7 Not tainted syzkaller #0 PREEMPT_{RT,(full)} Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/18/2025 Workqueue: events tipc_net_finalize_work Call Trace: dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0xca/0x240 mm/kasan/report.c:482 kasan_report+0x118/0x150 mm/kasan/report.c:595 __kasan_check_byte+0x2a/0x40 mm/kasan/common.c:568 kasan_check_byte include/linux/kasan.h:399 [inline] lock_acquire+0x8d/0x360 kernel/locking/lockdep.c:5842 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0xa7/0xf0 kernel/locking/spinlock.c:162 rtlock_slowlock kernel/locking/rtmutex.c:1894 [inline] rwbase_rtmutex_lock_state kernel/locking/spinlock_rt.c:160 [inline] rwbase_write_lock+0xd3/0x7e0 kernel/locking/rwbase_rt.c:244 rt_write_lock+0x76/0x110 kernel/locking/spinlock_rt.c:243 write_lock_bh include/linux/rwlock_rt.h:99 [inline] tipc_mon_reinit_self+0x79/0x430 net/tipc/monitor.c:718 tipc_net_finalize+0x115/0x190 net/tipc/net.c:140 process_one_work kernel/workqueue.c:3236 [inline] process_scheduled_works+0xade/0x17b0 kernel/workqueue.c:3319 worker_thread+0x8a0/0xda0 kernel/workqueue.c:3400 kthread+0x70e/0x8a0 kernel/kthread.c:463 ret_from_fork+0x439/0x7d0 arch/x86/kernel/process.c:148 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 Allocated by task 6089: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3e/0x80 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:388 [inline] __kasan_kmalloc+0x93/0xb0 mm/kasan/common.c:405 kasan_kmalloc include/linux/kasan.h:260 [inline] __kmalloc_cache_noprof+0x1a8/0x320 mm/slub.c:4407 kmalloc_noprof include/linux/slab.h:905 [inline] kzalloc_noprof include/linux/slab.h:1039 [inline] tipc_mon_create+0xc3/0x4d0 net/tipc/monitor.c:657 tipc_enable_bearer net/tipc/bearer.c:357 [inline] __tipc_nl_bearer_enable+0xe16/0x13f0 net/tipc/bearer.c:1047 __tipc_nl_compat_doit net/tipc/netlink_compat.c:371 [inline] tipc_nl_compat_doit+0x3bc/0x5f0 net/tipc/netlink_compat.c:393 tipc_nl_compat_handle net/tipc/netlink_compat.c:-1 [inline] tipc_nl_compat_recv+0x83c/0xbe0 net/tipc/netlink_compat.c:1321 genl_family_rcv_msg_doit+0x215/0x300 net/netlink/genetlink.c:1115 genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline] genl_rcv_msg+0x60e/0x790 net/netlink/genetlink.c:1210 netlink_rcv_skb+0x208/0x470 net/netlink/af_netlink.c:2552 genl_rcv+0x28/0x40 net/netlink/genetlink.c:1219 netlink_unicast_kernel net/netlink/af_netlink.c:1320 [inline] netlink_unicast+0x846/0xa10 net/netlink/af_netlink.c:1346 netlink_sendmsg+0x805/0xb30 net/netlink/af_netlink.c:1896 sock_sendmsg_nosec net/socket.c:714 [inline] __sock_sendmsg+0x21c/0x270 net/socket.c:729 ____sys_sendmsg+0x508/0x820 net/socket.c:2614 ___sys_sendmsg+0x21f/0x2a0 net/socket.c:2668 __sys_sendmsg net/socket.c:2700 [inline] __do_sys_sendmsg net/socket.c:2705 [inline] __se_sys_sendmsg net/socket.c:2703 [inline] __x64_sys_sendmsg+0x1a1/0x260 net/socket.c:2703 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 6088: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3e/0x80 mm/kasan/common.c:68 kasan_save_free_info+0x46/0x50 mm/kasan/generic.c:576 poison_slab_object mm/kasan/common.c:243 [inline] __kasan_slab_free+0x5b/0x80 mm/kasan/common.c:275 kasan_slab_free include/linux/kasan.h:233 [inline] slab_free_hook mm/slub.c:2422 [inline] slab_free mm/slub.c:4695 [inline] kfree+0x195/0x550 mm/slub.c:4894 tipc_l2_device_event+0x380/0x650 net/tipc/bearer.c:-1 notifier_call_chain+0x1b3/0x3e0 kernel/notifier.c:85 call_netdevice_notifiers_extack net/core/dev.c:2267 [inline] call_netdevice_notifiers net/core/dev.c:2281 [inline] unregister_netdevice_many_notify+0x14d7/0x1fe0 net/core/dev.c:12166 unregister_netdevice_many net/core/dev.c:12229 [inline] unregister_netdevice_queue+0x33c/0x380 net/core/dev.c:12073 unregister_netdevice include/linux/netdevice.h:3385 [inline] __tun_detach+0xe4d/0x1620 drivers/net/tun.c:621 tun_detach drivers/net/tun.c:637 [inline] tun_chr_close+0x10d/0x1c0 drivers/net/tun.c:3433 __fput+0x458/0xa80 fs/file_table.c:468 task_work_run+0x1d4/0x260 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop+0xec/0x110 kernel/entry/common.c:43 exit_to_user_mode_prepare include/linux/irq-entry-common.h:225 [inline] syscall_exit_to_user_mode_work include/linux/entry-common.h:175 [inline] syscall_exit_to_user_mode include/linux/entry-common.h:210 [inline] do_syscall_64+0x2bd/0x3b0 arch/x86/entry/syscall_64.c:100 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 46cb01eeeb86 ("tipc: update mon's self addr when node addr generated") Reported-by: syzbot+d7dad7fd4b3921104957@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/690c323a.050a0220.baf87.007f.GAE@google.com/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251107064038.2361188-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/tipc/net.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tipc/net.c b/net/tipc/net.c index 0e95572e56b41..7e65d0b0c4a8d 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -145,7 +145,9 @@ void tipc_net_finalize_work(struct work_struct *work) { struct tipc_net *tn = container_of(work, struct tipc_net, work); + rtnl_lock(); tipc_net_finalize(tipc_link_net(tn->bcl), tn->trial_addr); + rtnl_unlock(); } void tipc_net_stop(struct net *net) From e6ca8f533ed41129fcf052297718f417f021cc7d Mon Sep 17 00:00:00 2001 From: Buday Csaba Date: Sat, 8 Nov 2025 07:49:22 +0100 Subject: [PATCH 738/867] net: mdio: fix resource leak in mdiobus_register_device() Fix a possible leak in mdiobus_register_device() when both a reset-gpio and a reset-controller are present. Clean up the already claimed reset-gpio, when the registration of the reset-controller fails, so when an error code is returned, the device retains its state before the registration attempt. Link: https://lore.kernel.org/all/20251106144603.39053c81@kernel.org/ Fixes: 71dd6c0dff51 ("net: phy: add support for reset-controller") Signed-off-by: Buday Csaba Link: https://patch.msgid.link/4b419377f8dd7d2f63f919d0f74a336c734f8fff.1762584481.git.buday.csaba@prolan.hu Signed-off-by: Jakub Kicinski --- drivers/net/phy/mdio_bus.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index cad6ed3aa10b6..4354241137d50 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -73,8 +73,11 @@ int mdiobus_register_device(struct mdio_device *mdiodev) return err; err = mdiobus_register_reset(mdiodev); - if (err) + if (err) { + gpiod_put(mdiodev->reset_gpio); + mdiodev->reset_gpio = NULL; return err; + } /* Assert the reset signal */ mdio_device_reset(mdiodev, 1); From b981e100c19dcd91ce8cca8562c3cdabd4fcf28c Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 8 Nov 2025 22:59:51 +0100 Subject: [PATCH 739/867] net: dsa: loop: use new helper fixed_phy_register_100fd to simplify the code Use new helper fixed_phy_register_100fd to simplify the code. Signed-off-by: Heiner Kallweit Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/922f1b45-1748-4dd2-87eb-9d018df44731@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/dsa_loop.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c index 650d93226d9fa..4a416f2717bae 100644 --- a/drivers/net/dsa/dsa_loop.c +++ b/drivers/net/dsa/dsa_loop.c @@ -441,11 +441,6 @@ static int __init dsa_loop_create_switch_mdiodev(void) static int __init dsa_loop_init(void) { - struct fixed_phy_status status = { - .link = 1, - .speed = SPEED_100, - .duplex = DUPLEX_FULL, - }; unsigned int i; int ret; @@ -454,7 +449,7 @@ static int __init dsa_loop_init(void) return ret; for (i = 0; i < NUM_FIXED_PHYS; i++) - phydevs[i] = fixed_phy_register(&status, NULL); + phydevs[i] = fixed_phy_register_100fd(); ret = mdio_driver_register(&dsa_loop_drv); if (ret) { From 49c8d2c1f94cc2f4d1a108530d7ba52614b874c2 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 7 Nov 2025 06:03:37 -0800 Subject: [PATCH 740/867] net: netpoll: fix incorrect refcount handling causing incorrect cleanup commit efa95b01da18 ("netpoll: fix use after free") incorrectly ignored the refcount and prematurely set dev->npinfo to NULL during netpoll cleanup, leading to improper behavior and memory leaks. Scenario causing lack of proper cleanup: 1) A netpoll is associated with a NIC (e.g., eth0) and netdev->npinfo is allocated, and refcnt = 1 - Keep in mind that npinfo is shared among all netpoll instances. In this case, there is just one. 2) Another netpoll is also associated with the same NIC and npinfo->refcnt += 1. - Now dev->npinfo->refcnt = 2; - There is just one npinfo associated to the netdev. 3) When the first netpolls goes to clean up: - The first cleanup succeeds and clears np->dev->npinfo, ignoring refcnt. - It basically calls `RCU_INIT_POINTER(np->dev->npinfo, NULL);` - Set dev->npinfo = NULL, without proper cleanup - No ->ndo_netpoll_cleanup() is either called 4) Now the second target tries to clean up - The second cleanup fails because np->dev->npinfo is already NULL. * In this case, ops->ndo_netpoll_cleanup() was never called, and the skb pool is not cleaned as well (for the second netpoll instance) - This leaks npinfo and skbpool skbs, which is clearly reported by kmemleak. Revert commit efa95b01da18 ("netpoll: fix use after free") and adds clarifying comments emphasizing that npinfo cleanup should only happen once the refcount reaches zero, ensuring stable and correct netpoll behavior. Cc: # 3.17.x Cc: Jay Vosburgh Fixes: efa95b01da18 ("netpoll: fix use after free") Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251107-netconsole_torture-v10-1-749227b55f63@debian.org Signed-off-by: Jakub Kicinski --- net/core/netpoll.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index c85f740065fc6..331764845e8fa 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -811,6 +811,10 @@ static void __netpoll_cleanup(struct netpoll *np) if (!npinfo) return; + /* At this point, there is a single npinfo instance per netdevice, and + * its refcnt tracks how many netpoll structures are linked to it. We + * only perform npinfo cleanup when the refcnt decrements to zero. + */ if (refcount_dec_and_test(&npinfo->refcnt)) { const struct net_device_ops *ops; @@ -820,8 +824,7 @@ static void __netpoll_cleanup(struct netpoll *np) RCU_INIT_POINTER(np->dev->npinfo, NULL); call_rcu(&npinfo->rcu, rcu_cleanup_netpoll_info); - } else - RCU_INIT_POINTER(np->dev->npinfo, NULL); + } skb_pool_flush(np); } From 39acc6a95eefcf814efa226d8813f89e7e03496e Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 7 Nov 2025 06:03:38 -0800 Subject: [PATCH 741/867] selftest: netcons: refactor target creation Extract the netconsole target creation from create_dynamic_target(), by moving it from create_dynamic_target() into a new helper function. This enables other tests to use the creation of netconsole targets with arbitrary parameters and no sleep. The new helper will be utilized by forthcoming torture-type selftests that require dynamic target management. Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251107-netconsole_torture-v10-2-749227b55f63@debian.org Signed-off-by: Jakub Kicinski --- .../drivers/net/lib/sh/lib_netcons.sh | 30 ++++++++++++------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 8e1085e896472..9b5ef8074440c 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -113,31 +113,39 @@ function set_network() { configure_ip } -function create_dynamic_target() { - local FORMAT=${1:-"extended"} +function _create_dynamic_target() { + local FORMAT="${1:?FORMAT parameter required}" + local NCPATH="${2:?NCPATH parameter required}" DSTMAC=$(ip netns exec "${NAMESPACE}" \ ip link show "${DSTIF}" | awk '/ether/ {print $2}') # Create a dynamic target - mkdir "${NETCONS_PATH}" + mkdir "${NCPATH}" - echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip - echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip - echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac - echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name + echo "${DSTIP}" > "${NCPATH}"/remote_ip + echo "${SRCIP}" > "${NCPATH}"/local_ip + echo "${DSTMAC}" > "${NCPATH}"/remote_mac + echo "${SRCIF}" > "${NCPATH}"/dev_name if [ "${FORMAT}" == "basic" ] then # Basic target does not support release - echo 0 > "${NETCONS_PATH}"/release - echo 0 > "${NETCONS_PATH}"/extended + echo 0 > "${NCPATH}"/release + echo 0 > "${NCPATH}"/extended elif [ "${FORMAT}" == "extended" ] then - echo 1 > "${NETCONS_PATH}"/extended + echo 1 > "${NCPATH}"/extended fi - echo 1 > "${NETCONS_PATH}"/enabled + echo 1 > "${NCPATH}"/enabled + +} + +function create_dynamic_target() { + local FORMAT=${1:-"extended"} + local NCPATH=${2:-"$NETCONS_PATH"} + _create_dynamic_target "${FORMAT}" "${NCPATH}" # This will make sure that the kernel was able to # load the netconsole driver configuration. The console message From 6701896eb90998ff16338f199144bd9deefb79ba Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 7 Nov 2025 06:03:39 -0800 Subject: [PATCH 742/867] selftest: netcons: create a torture test Create a netconsole test that puts a lot of pressure on the netconsole list manipulation. Do it by creating dynamic targets and deleting targets while messages are being sent. Also put interface down while the messages are being sent, as creating parallel targets. The code launches three background jobs on distinct schedules: * Toggle netcons target every 30 iterations * create and delete random_target every 50 iterations * toggle iface every 70 iterations This creates multiple concurrency sources that interact with netconsole states. This is good practice to simulate stress, and exercise netpoll and netconsole locks. This test already found an issue as reported in [1] Link: https://lore.kernel.org/all/20250901-netpoll_memleak-v1-1-34a181977dfc@debian.org/ [1] Signed-off-by: Breno Leitao Reviewed-by: Andre Carvalho Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251107-netconsole_torture-v10-3-749227b55f63@debian.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/Makefile | 1 + .../selftests/drivers/net/netcons_torture.sh | 130 ++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/netcons_torture.sh diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 6e41635bd55a4..71ee69e524d77 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -18,6 +18,7 @@ TEST_PROGS := \ netcons_fragmented_msg.sh \ netcons_overflow.sh \ netcons_sysdata.sh \ + netcons_torture.sh \ netpoll_basic.py \ ping.py \ psp.py \ diff --git a/tools/testing/selftests/drivers/net/netcons_torture.sh b/tools/testing/selftests/drivers/net/netcons_torture.sh new file mode 100755 index 0000000000000..2ce9ee3719d1a --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_torture.sh @@ -0,0 +1,130 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# Repeatedly send kernel messages, toggles netconsole targets on and off, +# creates and deletes targets in parallel, and toggles the source interface to +# simulate stress conditions. +# +# This test aims to verify the robustness of netconsole under dynamic +# configurations and concurrent operations. +# +# The major goal is to run this test with LOCKDEP, Kmemleak and KASAN to make +# sure no issues is reported. +# +# Author: Breno Leitao + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh + +# Number of times the main loop run +ITERATIONS=${1:-150} + +# Only test extended format +FORMAT="extended" +# And ipv6 only +IP_VERSION="ipv6" + +# Create, enable and delete some targets. +create_and_delete_random_target() { + COUNT=2 + RND_PREFIX=$(mktemp -u netcons_rnd_XXXX_) + + if [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}${COUNT}" ] || \ + [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}0" ]; then + echo "Function didn't finish yet, skipping it." >&2 + return + fi + + # enable COUNT targets + for i in $(seq ${COUNT}) + do + RND_TARGET="${RND_PREFIX}"${i} + RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}" + + # Basic population so the target can come up + _create_dynamic_target "${FORMAT}" "${RND_TARGET_PATH}" + done + + echo "netconsole selftest: ${COUNT} additional targets were created" > /dev/kmsg + # disable them all + for i in $(seq ${COUNT}) + do + RND_TARGET="${RND_PREFIX}"${i} + RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}" + if [[ $(cat "${RND_TARGET_PATH}/enabled") -eq 1 ]] + then + echo 0 > "${RND_TARGET_PATH}"/enabled + fi + rmdir "${RND_TARGET_PATH}" + done +} + +# Disable and enable the target mid-air, while messages +# are being transmitted. +toggle_netcons_target() { + for i in $(seq 2) + do + if [ ! -d "${NETCONS_PATH}" ] + then + break + fi + echo 0 > "${NETCONS_PATH}"/enabled 2> /dev/null || true + # Try to enable a bit harder, given it might fail to enable + # Write to `enabled` might fail depending on the lock, which is + # highly contentious here + for _ in $(seq 5) + do + echo 1 > "${NETCONS_PATH}"/enabled 2> /dev/null || true + done + done +} + +toggle_iface(){ + ip link set "${SRCIF}" down + ip link set "${SRCIF}" up +} + +# Start here + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# Check for basic system dependency and exit if not found +check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT +# Create one namespace and two interfaces +set_network "${IP_VERSION}" +# Create a dynamic target for netconsole +create_dynamic_target "${FORMAT}" + +for i in $(seq "$ITERATIONS") +do + for _ in $(seq 10) + do + echo "${MSG}: ${TARGET} ${i}" > /dev/kmsg + done + wait + + if (( i % 30 == 0 )); then + toggle_netcons_target & + fi + + if (( i % 50 == 0 )); then + # create some targets, enable them, send msg and disable + # all in a parallel thread + create_and_delete_random_target & + fi + + if (( i % 70 == 0 )); then + toggle_iface & + fi +done +wait + +exit "${EXIT_STATUS}" From 236682db3b6fe71cad76ac5e920ea4c14a33178e Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 7 Nov 2025 06:03:40 -0800 Subject: [PATCH 743/867] selftest: netcons: add test for netconsole over bonded interfaces This patch adds a selftest that verifies netconsole functionality over bonded network interfaces using netdevsim. It sets up two bonded interfaces acting as transmit (TX) and receive (RX) ends, placed in separate network namespaces. The test sends kernel log messages and verifies that they are properly received on the bonded RX interfaces with both IPv4 and IPv6, and using basic and extended netconsole formats. This patchset aims to test a long-standing netpoll subsystem where netpoll has multiple users. (in this case netconsole and bonding). A similar selftest has been discussed in [1] and [2]. This test also tries to enable bonding and netpoll in different order, just to guarantee that all the possibilities are exercised. Link: https://lore.kernel.org/all/20250905-netconsole_torture-v3-0-875c7febd316@debian.org/ [1] Link: https://lore.kernel.org/lkml/96b940137a50e5c387687bb4f57de8b0435a653f.1404857349.git.decot@googlers.com/ [2] Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251107-netconsole_torture-v10-4-749227b55f63@debian.org Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/bonding/Makefile | 2 + .../selftests/drivers/net/bonding/config | 4 + .../net/bonding/netcons_over_bonding.sh | 361 ++++++++++++++++++ .../drivers/net/lib/sh/lib_netcons.sh | 54 ++- 4 files changed, 414 insertions(+), 7 deletions(-) create mode 100755 tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 402d4ee84f2e8..6c5c60adb5e85 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -14,6 +14,7 @@ TEST_PROGS := \ dev_addr_lists.sh \ mode-1-recovery-updelay.sh \ mode-2-recovery-updelay.sh \ + netcons_over_bonding.sh \ # end of TEST_PROGS TEST_FILES := \ @@ -24,6 +25,7 @@ TEST_FILES := \ TEST_INCLUDES := \ ../../../net/lib.sh \ + ../lib/sh/lib_netcons.sh \ ../../../net/forwarding/lib.sh \ # end of TEST_INCLUDES diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config index 6bb290abd48bf..9914943762234 100644 --- a/tools/testing/selftests/drivers/net/bonding/config +++ b/tools/testing/selftests/drivers/net/bonding/config @@ -1,5 +1,6 @@ CONFIG_BONDING=y CONFIG_BRIDGE=y +CONFIG_CONFIGFS_FS=y CONFIG_DUMMY=y CONFIG_INET_ESP=y CONFIG_INET_ESP_OFFLOAD=y @@ -9,6 +10,9 @@ CONFIG_MACVLAN=y CONFIG_NET_ACT_GACT=y CONFIG_NET_CLS_FLOWER=y CONFIG_NET_CLS_MATCHALL=m +CONFIG_NETCONSOLE=m +CONFIG_NETCONSOLE_DYNAMIC=y +CONFIG_NETCONSOLE_EXTENDED_LOG=y CONFIG_NETDEVSIM=m CONFIG_NET_SCH_INGRESS=y CONFIG_NLMON=y diff --git a/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh b/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh new file mode 100755 index 0000000000000..477cc9379500a --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh @@ -0,0 +1,361 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 +# +# This selftest exercises trying to have multiple netpoll users at the same +# time. +# +# This selftest has multiple smalls test inside, and the goal is to +# get interfaces with bonding and netconsole in different orders in order +# to catch any possible issue. +# +# The main test composes of four interfaces being created using netdevsim; two +# of them are bonded to serve as the netconsole's transmit interface. The +# remaining two interfaces are similarly bonded and assigned to a separate +# network namespace, which acts as the receive interface, where socat monitors +# for incoming messages. +# +# A netconsole message is then sent to ensure it is properly received across +# this configuration. +# +# Later, run a few other tests, to make sure that bonding and netconsole +# cannot coexist. +# +# The test's objective is to exercise netpoll usage when managed simultaneously +# by multiple subsystems (netconsole and bonding). +# +# Author: Breno Leitao + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true +modprobe bonding 2> /dev/null || true +modprobe veth 2> /dev/null || true + +# The content of kmsg will be save to the following file +OUTPUT_FILE="/tmp/${TARGET}" + +# Check for basic system dependency and exit if not found +check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup_bond EXIT + +FORMAT="extended" +IP_VERSION="ipv4" +VETH0="veth"$(( RANDOM % 256)) +VETH1="veth"$((256 + RANDOM % 256)) +TXNS="" +RXNS="" + +# Create "bond_tx_XX" and "bond_rx_XX" interfaces, and set DSTIF and SRCIF with +# the bonding interfaces +function setup_bonding_ifaces() { + local RAND=$(( RANDOM % 100 )) + BOND_TX_MAIN_IF="bond_tx_$RAND" + BOND_RX_MAIN_IF="bond_rx_$RAND" + + # Setup TX + if ! ip -n "${TXNS}" link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr + then + echo "Failed to create bond TX interface. Is CONFIG_BONDING set?" >&2 + # only clean nsim ifaces and namespace. Nothing else has been + # initialized + cleanup_bond_nsim + trap - EXIT + exit "${ksft_skip}" + fi + + # create_netdevsim() got the interface up, but it needs to be down + # before being enslaved. + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" down + ip -n "${TXNS}" \ + link set "${BOND_TX2_SLAVE_IF}" down + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + ip -n "${TXNS}" \ + link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + ip -n "${TXNS}" \ + link set "${BOND_TX_MAIN_IF}" up + + # Setup RX + ip -n "${RXNS}" \ + link add "${BOND_RX_MAIN_IF}" type bond mode balance-rr + ip -n "${RXNS}" \ + link set "${BOND_RX1_SLAVE_IF}" down + ip -n "${RXNS}" \ + link set "${BOND_RX2_SLAVE_IF}" down + ip -n "${RXNS}" \ + link set "${BOND_RX1_SLAVE_IF}" master "${BOND_RX_MAIN_IF}" + ip -n "${RXNS}" \ + link set "${BOND_RX2_SLAVE_IF}" master "${BOND_RX_MAIN_IF}" + ip -n "${RXNS}" \ + link set "${BOND_RX_MAIN_IF}" up + + export DSTIF="${BOND_RX_MAIN_IF}" + export SRCIF="${BOND_TX_MAIN_IF}" +} + +# Create 4 netdevsim interfaces. Two of them will be bound to TX bonding iface +# and the other two will be bond to the RX interface (on the other namespace) +function create_ifaces_bond() { + BOND_TX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_1}" "${TXNS}") + BOND_TX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_2}" "${TXNS}") + BOND_RX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_1}" "${RXNS}") + BOND_RX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_2}" "${RXNS}") +} + +# netdevsim link BOND_TX to BOND_RX interfaces +function link_ifaces_bond() { + local BOND_TX1_SLAVE_IFIDX + local BOND_TX2_SLAVE_IFIDX + local BOND_RX1_SLAVE_IFIDX + local BOND_RX2_SLAVE_IFIDX + local TXNS_FD + local RXNS_FD + + BOND_TX1_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \ + cat /sys/class/net/"$BOND_TX1_SLAVE_IF"/ifindex) + BOND_TX2_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \ + cat /sys/class/net/"$BOND_TX2_SLAVE_IF"/ifindex) + BOND_RX1_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \ + cat /sys/class/net/"$BOND_RX1_SLAVE_IF"/ifindex) + BOND_RX2_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \ + cat /sys/class/net/"$BOND_RX2_SLAVE_IF"/ifindex) + + exec {TXNS_FD} "$NSIM_DEV_SYS_LINK" + echo "${TXNS_FD}:$BOND_TX2_SLAVE_IFIDX $RXNS_FD:$BOND_RX2_SLAVE_IFIDX" \ + > "$NSIM_DEV_SYS_LINK" + + exec {TXNS_FD}<&- + exec {RXNS_FD}<&- +} + +function create_all_ifaces() { + # setup_ns function is coming from lib.sh + setup_ns TXNS RXNS + export NAMESPACE="${RXNS}" + + # Create two interfaces for RX and two for TX + create_ifaces_bond + # Link netlink ifaces + link_ifaces_bond +} + +# configure DSTIF and SRCIF IPs +function configure_ifaces_ips() { + local IP_VERSION=${1:-"ipv4"} + select_ipv4_or_ipv6 "${IP_VERSION}" + + ip -n "${RXNS}" addr add "${DSTIP}"/24 dev "${DSTIF}" + ip -n "${RXNS}" link set "${DSTIF}" up + + ip -n "${TXNS}" addr add "${SRCIP}"/24 dev "${SRCIF}" + ip -n "${TXNS}" link set "${SRCIF}" up +} + +function test_enable_netpoll_on_enslaved_iface() { + echo 0 > "${NETCONS_PATH}"/enabled + + # At this stage, BOND_TX1_SLAVE_IF is enslaved to BOND_TX_MAIN_IF, and + # linked to BOND_RX1_SLAVE_IF inside the namespace. + echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name + + # This should fail with the following message in dmesg: + # netpoll: netconsole: ethX is a slave device, aborting + set +e + enable_netcons_ns 2> /dev/null + set -e + + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]] + then + echo "test failed: Bonding and netpoll cannot co-exists." >&2 + exit "${ksft_fail}" + fi +} + +function test_delete_bond_and_reenable_target() { + ip -n "${TXNS}" \ + link delete "${BOND_TX_MAIN_IF}" type bond + + # BOND_TX1_SLAVE_IF is not attached to a bond interface anymore + # netpoll can be plugged in there + echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name + + # this should work, since the interface is not enslaved + enable_netcons_ns + + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]] + then + echo "test failed: Unable to start netpoll on an unbond iface." >&2 + exit "${ksft_fail}" + fi +} + +# Send a netconsole message to the netconsole target +function test_send_netcons_msg_through_bond_iface() { + # Listen for netconsole port inside the namespace and + # destination interface + listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" & + # Wait for socat to start and listen to the port. + wait_for_port "${RXNS}" "${PORT}" "${IP_VERSION}" + # Send the message + echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + # Make sure the message was received in the dst part + # and exit + validate_result "${OUTPUT_FILE}" "${FORMAT}" + # kill socat in case it is still running + pkill_socat +} + +# BOND_TX1_SLAVE_IF has netconsole enabled on it, bind it to BOND_TX_MAIN_IF. +# Given BOND_TX_MAIN_IF was deleted, recreate it first +function test_enslave_netcons_enabled_iface { + # netconsole got disabled while the interface was down + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]] + then + echo "test failed: netconsole expected to be enabled against BOND_TX1_SLAVE_IF" >&2 + exit "${ksft_fail}" + fi + + # recreate the bonding iface. it got deleted by previous + # test (test_delete_bond_and_reenable_target) + ip -n "${TXNS}" \ + link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr + + # sub-interface need to be down before attaching to bonding + # This will also disable netconsole. + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" down + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + ip -n "${TXNS}" \ + link set "${BOND_TX_MAIN_IF}" up + + # netconsole got disabled while the interface was down + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]] + then + echo "test failed: Device is part of a bond iface, cannot have netcons enabled" >&2 + exit "${ksft_fail}" + fi +} + +# Get netconsole enabled on a bonding interface and attach a second +# sub-interface. +function test_enslave_iface_to_bond { + # BOND_TX_MAIN_IF has only BOND_TX1_SLAVE_IF right now + echo "${BOND_TX_MAIN_IF}" > "${NETCONS_PATH}"/dev_name + enable_netcons_ns + + # netcons is attached to bond0 and BOND_TX1_SLAVE_IF is + # part of BOND_TX_MAIN_IF. Attach BOND_TX2_SLAVE_IF to BOND_TX_MAIN_IF. + ip -n "${TXNS}" \ + link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]] + then + echo "test failed: Netconsole should be enabled on bonding interface. Failed" >&2 + exit "${ksft_fail}" + fi +} + +function test_enslave_iff_disabled_netpoll_iface { + local ret + + # Create two interfaces. veth interfaces it known to have + # IFF_DISABLE_NETPOLL set + if ! ip link add "${VETH0}" type veth peer name "${VETH1}" + then + echo "Failed to create veth TX interface. Is CONFIG_VETH set?" >&2 + exit "${ksft_skip}" + fi + set +e + # This will print RTNETLINK answers: Device or resource busy + ip link set "${VETH0}" master "${BOND_TX_MAIN_IF}" 2> /dev/null + ret=$? + set -e + if [[ $ret -eq 0 ]] + then + echo "test failed: veth interface could not be enslaved" + exit "${ksft_fail}" + fi +} + +# Given that netconsole picks the current net namespace, we need to enable it +# from inside the TXNS namespace +function enable_netcons_ns() { + ip netns exec "${TXNS}" sh -c \ + "mount -t configfs configfs /sys/kernel/config && echo 1 > $NETCONS_PATH/enabled" +} + +#################### +# Tests start here # +#################### + +# Create regular interfaces using netdevsim and link them +create_all_ifaces + +# Setup the bonding interfaces +# BOND_RX_MAIN_IF has BOND_RX{1,2}_SLAVE_IF +# BOND_TX_MAIN_IF has BOND_TX{1,2}_SLAVE_IF +setup_bonding_ifaces + +# Configure the ips as BOND_RX1_SLAVE_IF and BOND_TX1_SLAVE_IF +configure_ifaces_ips "${IP_VERSION}" + +_create_dynamic_target "${FORMAT}" "${NETCONS_PATH}" +enable_netcons_ns +set_user_data + +# Test #1 : Create an bonding interface and attach netpoll into +# the bonding interface. Netconsole/netpoll should work on +# the bonding interface. +test_send_netcons_msg_through_bond_iface +echo "test #1: netpoll on bonding interface worked. Test passed" >&2 + +# Test #2: Attach netpoll to an enslaved interface +# Try to attach netpoll to an enslaved sub-interface (while still being part of +# a bonding interface), which shouldn't be allowed +test_enable_netpoll_on_enslaved_iface +echo "test #2: netpoll correctly rejected enslaved interface (expected behavior). Test passed." >&2 + +# Test #3: Unplug the sub-interface from bond and enable netconsole +# Detach the interface from a bonding interface and attach netpoll again +test_delete_bond_and_reenable_target +echo "test #3: Able to attach to an unbound interface. Test passed." >&2 + +# Test #4: Enslave a sub-interface that had netconsole enabled +# Try to enslave an interface that has netconsole/netpoll enabled. +# Previous test has netconsole enabled in BOND_TX1_SLAVE_IF, try to enslave it +test_enslave_netcons_enabled_iface +echo "test #4: Enslaving an interface with netpoll attached. Test passed." >&2 + +# Test #5: Enslave a sub-interface to a bonding interface +# Enslave an interface to a bond interface that has netpoll attached +# At this stage, BOND_TX_MAIN_IF is created and BOND_TX1_SLAVE_IF is part of +# it. Netconsole is currently disabled +test_enslave_iface_to_bond +echo "test #5: Enslaving an interface to bond+netpoll. Test passed." >&2 + +# Test #6: Enslave a IFF_DISABLE_NETPOLL sub-interface to a bonding interface +# At this stage, BOND_TX_MAIN_IF has both sub interface and netconsole is +# enabled. This test will try to enslave an a veth (IFF_DISABLE_NETPOLL) interface +# and it should fail, with netpoll: veth0 doesn't support polling +test_enslave_iff_disabled_netpoll_iface +echo "test #6: Enslaving IFF_DISABLE_NETPOLL ifaces to bond iface is not supported. Test passed." >&2 + +cleanup_bond +trap - EXIT +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 9b5ef8074440c..87f89fd92f8c1 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -11,9 +11,11 @@ set -euo pipefail LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") SRCIF="" # to be populated later +SRCIP="" # to be populated later SRCIP4="192.0.2.1" SRCIP6="fc00::1" DSTIF="" # to be populated later +DSTIP="" # to be populated later DSTIP4="192.0.2.2" DSTIP6="fc00::2" @@ -28,17 +30,23 @@ NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}" # NAMESPACE will be populated by setup_ns with a random value NAMESPACE="" -# IDs for netdevsim +# IDs for netdevsim. We either use NSIM_DEV_{1,2}_ID for standard test +# or NSIM_BOND_{T,R}X_{1,2} for the bonding tests. Not both at the +# same time. NSIM_DEV_1_ID=$((256 + RANDOM % 256)) NSIM_DEV_2_ID=$((512 + RANDOM % 256)) +NSIM_BOND_TX_1=$((768 + RANDOM % 256)) +NSIM_BOND_TX_2=$((1024 + RANDOM % 256)) +NSIM_BOND_RX_1=$((1280 + RANDOM % 256)) +NSIM_BOND_RX_2=$((1536 + RANDOM % 256)) NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device" +NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device" # Used to create and delete namespaces source "${LIBDIR}"/../../../../net/lib.sh # Create netdevsim interfaces create_ifaces() { - echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW" echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW" udevadm settle 2> /dev/null || true @@ -137,9 +145,6 @@ function _create_dynamic_target() { then echo 1 > "${NCPATH}"/extended fi - - echo 1 > "${NCPATH}"/enabled - } function create_dynamic_target() { @@ -147,6 +152,8 @@ function create_dynamic_target() { local NCPATH=${2:-"$NETCONS_PATH"} _create_dynamic_target "${FORMAT}" "${NCPATH}" + echo 1 > "${NCPATH}"/enabled + # This will make sure that the kernel was able to # load the netconsole driver configuration. The console message # gets more organized/sequential as well. @@ -193,14 +200,26 @@ function do_cleanup() { echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk } -function cleanup() { +function cleanup_netcons() { # delete netconsole dynamic reconfiguration - echo 0 > "${NETCONS_PATH}"/enabled + # do not fail if the target is already disabled + if [[ ! -d "${NETCONS_PATH}" ]] + then + # in some cases this is called before netcons path is created + return + fi + if [[ $(cat "${NETCONS_PATH}"/enabled) != 0 ]] + then + echo 0 > "${NETCONS_PATH}"/enabled || true + fi # Remove all the keys that got created during the selftest find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete # Remove the configfs entry rmdir "${NETCONS_PATH}" +} +function cleanup() { + cleanup_netcons do_cleanup } @@ -377,3 +396,24 @@ function wait_for_port() { # more frequently on IPv6 sleep 1 } + +# Clean up netdevsim ifaces created for bonding test +function cleanup_bond_nsim() { + ip -n "${TXNS}" \ + link delete "${BOND_TX_MAIN_IF}" type bond || true + ip -n "${RXNS}" \ + link delete "${BOND_RX_MAIN_IF}" type bond || true + + cleanup_netdevsim "$NSIM_BOND_TX_1" + cleanup_netdevsim "$NSIM_BOND_TX_2" + cleanup_netdevsim "$NSIM_BOND_RX_1" + cleanup_netdevsim "$NSIM_BOND_RX_2" +} + +# cleanup tests that use bonding interfaces +function cleanup_bond() { + cleanup_netcons + cleanup_bond_nsim + cleanup_all_ns + ip link delete "${VETH0}" || true +} From 8da7bea7db692e786165b71729fb68b7ff65ee56 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Fri, 31 Oct 2025 18:33:28 +0800 Subject: [PATCH 744/867] xsk: add indirect call for xsk_destruct_skb Since Eric proposed an idea about adding indirect call wrappers for UDP and managed to see a huge improvement[1], the same situation can also be applied in xsk scenario. This patch adds an indirect call for xsk and helps current copy mode improve the performance by around 1% stably which was observed with IXGBE at 10Gb/sec loaded. If the throughput grows, the positive effect will be magnified. I applied this patch on top of batch xmit series[2], and was able to see <5% improvement from our internal application which is a little bit unstable though. Use INDIRECT wrappers to keep xsk_destruct_skb static as it used to be when the mitigation config is off. Be aware of the freeing path that can be very hot since the frequency can reach around 2,000,000 times per second with the xdpsock test. [1]: https://lore.kernel.org/netdev/20251006193103.2684156-2-edumazet@google.com/ [2]: https://lore.kernel.org/all/20251021131209.41491-1-kerneljasonxing@gmail.com/ Suggested-by: Alexander Lobakin Signed-off-by: Jason Xing Reviewed-by: Alexander Lobakin Link: https://patch.msgid.link/20251031103328.95468-1-kerneljasonxing@gmail.com Signed-off-by: Paolo Abeni --- include/net/xdp_sock.h | 7 +++++++ net/core/skbuff.c | 8 +++++--- net/xdp/xsk.c | 3 ++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index ce587a2256618..23e8861e8b25e 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -125,6 +125,7 @@ struct xsk_tx_metadata_ops { int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp); void __xsk_map_flush(struct list_head *flush_list); +INDIRECT_CALLABLE_DECLARE(void xsk_destruct_skb(struct sk_buff *)); /** * xsk_tx_metadata_to_compl - Save enough relevant metadata information @@ -218,6 +219,12 @@ static inline void __xsk_map_flush(struct list_head *flush_list) { } +#ifdef CONFIG_MITIGATION_RETPOLINE +static inline void xsk_destruct_skb(struct sk_buff *skb) +{ +} +#endif + static inline void xsk_tx_metadata_to_compl(struct xsk_tx_metadata *meta, struct xsk_tx_metadata_compl *compl) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d95658b738d19..4f4d7ab7057f1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -81,6 +81,7 @@ #include #include #include +#include #include #include @@ -1140,12 +1141,13 @@ void skb_release_head_state(struct sk_buff *skb) if (skb->destructor) { DEBUG_NET_WARN_ON_ONCE(in_hardirq()); #ifdef CONFIG_INET - INDIRECT_CALL_3(skb->destructor, + INDIRECT_CALL_4(skb->destructor, tcp_wfree, __sock_wfree, sock_wfree, + xsk_destruct_skb, skb); #else - INDIRECT_CALL_1(skb->destructor, - sock_wfree, + INDIRECT_CALL_2(skb->destructor, + sock_wfree, xsk_destruct_skb, skb); #endif diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index ed8b612ec29d0..bcfd400e9cf8b 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -602,7 +602,8 @@ static u32 xsk_get_num_desc(struct sk_buff *skb) return XSKCB(skb)->num_descs; } -static void xsk_destruct_skb(struct sk_buff *skb) +INDIRECT_CALLABLE_SCOPE +void xsk_destruct_skb(struct sk_buff *skb) { struct xsk_tx_metadata_compl *compl = &skb_shinfo(skb)->xsk_meta; From b02d229013aad864d89c2cbe8c713ff7da04c253 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 7 Nov 2025 08:22:24 -0800 Subject: [PATCH 745/867] tools: ynltool: create skeleton for the C command Based on past discussions it seems like integration of YNL into iproute2 is unlikely. YNL itself is not great as a C library, since it has no backward compat (we routinely change types). Most of the operations can be performed with the generic Python CLI directly. There is, however, a handful of operations where summarization of kernel output is very useful (mostly related to stats: page-pool, qstat). Create a command (inspired by bpftool, I think it stood the test of time reasonably well) to be able to plug the subcommands into. Link: https://lore.kernel.org/1754895902-8790-1-git-send-email-ernis@linux.microsoft.com Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20251107162227.980672-2-kuba@kernel.org Acked-by: Stanislav Fomichev Signed-off-by: Paolo Abeni --- tools/net/ynl/Makefile | 3 +- tools/net/ynl/ynltool/.gitignore | 1 + tools/net/ynl/ynltool/Makefile | 52 +++++ tools/net/ynl/ynltool/json_writer.c | 288 ++++++++++++++++++++++++++++ tools/net/ynl/ynltool/json_writer.h | 75 ++++++++ tools/net/ynl/ynltool/main.c | 240 +++++++++++++++++++++++ tools/net/ynl/ynltool/main.h | 62 ++++++ 7 files changed, 720 insertions(+), 1 deletion(-) create mode 100644 tools/net/ynl/ynltool/.gitignore create mode 100644 tools/net/ynl/ynltool/Makefile create mode 100644 tools/net/ynl/ynltool/json_writer.c create mode 100644 tools/net/ynl/ynltool/json_writer.h create mode 100644 tools/net/ynl/ynltool/main.c create mode 100644 tools/net/ynl/ynltool/main.h diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile index 211df5a93ad93..31ed20c0f3f8a 100644 --- a/tools/net/ynl/Makefile +++ b/tools/net/ynl/Makefile @@ -12,10 +12,11 @@ endif libdir ?= $(prefix)/$(libdir_relative) includedir ?= $(prefix)/include -SUBDIRS = lib generated samples +SUBDIRS = lib generated samples ynltool all: $(SUBDIRS) libynl.a +ynltool: | lib generated libynl.a samples: | lib generated libynl.a: | lib generated @echo -e "\tAR $@" diff --git a/tools/net/ynl/ynltool/.gitignore b/tools/net/ynl/ynltool/.gitignore new file mode 100644 index 0000000000000..f38848dbb0d30 --- /dev/null +++ b/tools/net/ynl/ynltool/.gitignore @@ -0,0 +1 @@ +ynltool diff --git a/tools/net/ynl/ynltool/Makefile b/tools/net/ynl/ynltool/Makefile new file mode 100644 index 0000000000000..cfabab3a20da6 --- /dev/null +++ b/tools/net/ynl/ynltool/Makefile @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: GPL-2.0-only + +include ../Makefile.deps + +INSTALL ?= install +prefix ?= /usr + +CC := gcc +CFLAGS := -Wall -Wextra -Werror -O2 +ifeq ("$(DEBUG)","1") + CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan +endif +CFLAGS += -I../lib + +SRC_VERSION := \ + $(shell make --no-print-directory -sC ../../../.. kernelversion || \ + echo "unknown") + +CFLAGS += -DSRC_VERSION='"$(SRC_VERSION)"' + +SRCS := $(wildcard *.c) +OBJS := $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) + +YNLTOOL := $(OUTPUT)ynltool + +include $(wildcard *.d) + +all: $(YNLTOOL) + +Q = @ + +$(YNLTOOL): $(OBJS) + $(Q)echo -e "\tLINK $@" + $(Q)$(CC) $(CFLAGS) -o $@ $(OBJS) + +%.o: %.c main.h json_writer.h + $(Q)echo -e "\tCC $@" + $(Q)$(COMPILE.c) -MMD -c -o $@ $< + +clean: + rm -f *.o *.d *~ + +distclean: clean + rm -f $(YNLTOOL) + +bindir ?= /usr/bin + +install: $(YNLTOOL) + install -m 0755 $(YNLTOOL) $(DESTDIR)$(bindir)/$(YNLTOOL) + +.PHONY: all clean distclean +.DEFAULT_GOAL=all diff --git a/tools/net/ynl/ynltool/json_writer.c b/tools/net/ynl/ynltool/json_writer.c new file mode 100644 index 0000000000000..c8685e592cd37 --- /dev/null +++ b/tools/net/ynl/ynltool/json_writer.c @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * Simple streaming JSON writer + * + * This takes care of the annoying bits of JSON syntax like the commas + * after elements + * + * Authors: Stephen Hemminger + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "json_writer.h" + +struct json_writer { + FILE *out; + unsigned depth; + bool pretty; + char sep; +}; + +static void jsonw_indent(json_writer_t *self) +{ + unsigned i; + for (i = 0; i < self->depth; ++i) + fputs(" ", self->out); +} + +static void jsonw_eol(json_writer_t *self) +{ + if (!self->pretty) + return; + + putc('\n', self->out); + jsonw_indent(self); +} + +static void jsonw_eor(json_writer_t *self) +{ + if (self->sep != '\0') + putc(self->sep, self->out); + self->sep = ','; +} + +static void jsonw_puts(json_writer_t *self, const char *str) +{ + putc('"', self->out); + for (; *str; ++str) + switch (*str) { + case '\t': + fputs("\\t", self->out); + break; + case '\n': + fputs("\\n", self->out); + break; + case '\r': + fputs("\\r", self->out); + break; + case '\f': + fputs("\\f", self->out); + break; + case '\b': + fputs("\\b", self->out); + break; + case '\\': + fputs("\\\\", self->out); + break; + case '"': + fputs("\\\"", self->out); + break; + default: + putc(*str, self->out); + } + putc('"', self->out); +} + +json_writer_t *jsonw_new(FILE *f) +{ + json_writer_t *self = malloc(sizeof(*self)); + if (self) { + self->out = f; + self->depth = 0; + self->pretty = false; + self->sep = '\0'; + } + return self; +} + +void jsonw_destroy(json_writer_t **self_p) +{ + json_writer_t *self = *self_p; + + assert(self->depth == 0); + fputs("\n", self->out); + fflush(self->out); + free(self); + *self_p = NULL; +} + +void jsonw_pretty(json_writer_t *self, bool on) +{ + self->pretty = on; +} + +void jsonw_reset(json_writer_t *self) +{ + assert(self->depth == 0); + self->sep = '\0'; +} + +static void jsonw_begin(json_writer_t *self, int c) +{ + jsonw_eor(self); + putc(c, self->out); + ++self->depth; + self->sep = '\0'; +} + +static void jsonw_end(json_writer_t *self, int c) +{ + assert(self->depth > 0); + + --self->depth; + if (self->sep != '\0') + jsonw_eol(self); + putc(c, self->out); + self->sep = ','; +} + +void jsonw_name(json_writer_t *self, const char *name) +{ + jsonw_eor(self); + jsonw_eol(self); + self->sep = '\0'; + jsonw_puts(self, name); + putc(':', self->out); + if (self->pretty) + putc(' ', self->out); +} + +void jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap) +{ + jsonw_eor(self); + putc('"', self->out); + vfprintf(self->out, fmt, ap); + putc('"', self->out); +} + +void jsonw_printf(json_writer_t *self, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + jsonw_eor(self); + vfprintf(self->out, fmt, ap); + va_end(ap); +} + +void jsonw_start_object(json_writer_t *self) +{ + jsonw_begin(self, '{'); +} + +void jsonw_end_object(json_writer_t *self) +{ + jsonw_end(self, '}'); +} + +void jsonw_start_array(json_writer_t *self) +{ + jsonw_begin(self, '['); +} + +void jsonw_end_array(json_writer_t *self) +{ + jsonw_end(self, ']'); +} + +void jsonw_string(json_writer_t *self, const char *value) +{ + jsonw_eor(self); + jsonw_puts(self, value); +} + +void jsonw_bool(json_writer_t *self, bool val) +{ + jsonw_printf(self, "%s", val ? "true" : "false"); +} + +void jsonw_null(json_writer_t *self) +{ + jsonw_printf(self, "null"); +} + +void jsonw_float_fmt(json_writer_t *self, const char *fmt, double num) +{ + jsonw_printf(self, fmt, num); +} + +void jsonw_float(json_writer_t *self, double num) +{ + jsonw_printf(self, "%g", num); +} + +void jsonw_hu(json_writer_t *self, unsigned short num) +{ + jsonw_printf(self, "%hu", num); +} + +void jsonw_uint(json_writer_t *self, uint64_t num) +{ + jsonw_printf(self, "%"PRIu64, num); +} + +void jsonw_lluint(json_writer_t *self, unsigned long long int num) +{ + jsonw_printf(self, "%llu", num); +} + +void jsonw_int(json_writer_t *self, int64_t num) +{ + jsonw_printf(self, "%"PRId64, num); +} + +void jsonw_string_field(json_writer_t *self, const char *prop, const char *val) +{ + jsonw_name(self, prop); + jsonw_string(self, val); +} + +void jsonw_bool_field(json_writer_t *self, const char *prop, bool val) +{ + jsonw_name(self, prop); + jsonw_bool(self, val); +} + +void jsonw_float_field(json_writer_t *self, const char *prop, double val) +{ + jsonw_name(self, prop); + jsonw_float(self, val); +} + +void jsonw_float_field_fmt(json_writer_t *self, + const char *prop, + const char *fmt, + double val) +{ + jsonw_name(self, prop); + jsonw_float_fmt(self, fmt, val); +} + +void jsonw_uint_field(json_writer_t *self, const char *prop, uint64_t num) +{ + jsonw_name(self, prop); + jsonw_uint(self, num); +} + +void jsonw_hu_field(json_writer_t *self, const char *prop, unsigned short num) +{ + jsonw_name(self, prop); + jsonw_hu(self, num); +} + +void jsonw_lluint_field(json_writer_t *self, + const char *prop, + unsigned long long int num) +{ + jsonw_name(self, prop); + jsonw_lluint(self, num); +} + +void jsonw_int_field(json_writer_t *self, const char *prop, int64_t num) +{ + jsonw_name(self, prop); + jsonw_int(self, num); +} + +void jsonw_null_field(json_writer_t *self, const char *prop) +{ + jsonw_name(self, prop); + jsonw_null(self); +} diff --git a/tools/net/ynl/ynltool/json_writer.h b/tools/net/ynl/ynltool/json_writer.h new file mode 100644 index 0000000000000..0f1e63c88f6ac --- /dev/null +++ b/tools/net/ynl/ynltool/json_writer.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Simple streaming JSON writer + * + * This takes care of the annoying bits of JSON syntax like the commas + * after elements + * + * Authors: Stephen Hemminger + */ + +#ifndef _JSON_WRITER_H_ +#define _JSON_WRITER_H_ + +#include +#include +#include +#include + +/* Opaque class structure */ +typedef struct json_writer json_writer_t; + +/* Create a new JSON stream */ +json_writer_t *jsonw_new(FILE *f); +/* End output to JSON stream */ +void jsonw_destroy(json_writer_t **self_p); + +/* Cause output to have pretty whitespace */ +void jsonw_pretty(json_writer_t *self, bool on); + +/* Reset separator to create new JSON */ +void jsonw_reset(json_writer_t *self); + +/* Add property name */ +void jsonw_name(json_writer_t *self, const char *name); + +/* Add value */ +void __attribute__((format(printf, 2, 0))) jsonw_vprintf_enquote(json_writer_t *self, + const char *fmt, + va_list ap); +void __attribute__((format(printf, 2, 3))) jsonw_printf(json_writer_t *self, + const char *fmt, ...); +void jsonw_string(json_writer_t *self, const char *value); +void jsonw_bool(json_writer_t *self, bool value); +void jsonw_float(json_writer_t *self, double number); +void jsonw_float_fmt(json_writer_t *self, const char *fmt, double num); +void jsonw_uint(json_writer_t *self, uint64_t number); +void jsonw_hu(json_writer_t *self, unsigned short number); +void jsonw_int(json_writer_t *self, int64_t number); +void jsonw_null(json_writer_t *self); +void jsonw_lluint(json_writer_t *self, unsigned long long int num); + +/* Useful Combinations of name and value */ +void jsonw_string_field(json_writer_t *self, const char *prop, const char *val); +void jsonw_bool_field(json_writer_t *self, const char *prop, bool value); +void jsonw_float_field(json_writer_t *self, const char *prop, double num); +void jsonw_uint_field(json_writer_t *self, const char *prop, uint64_t num); +void jsonw_hu_field(json_writer_t *self, const char *prop, unsigned short num); +void jsonw_int_field(json_writer_t *self, const char *prop, int64_t num); +void jsonw_null_field(json_writer_t *self, const char *prop); +void jsonw_lluint_field(json_writer_t *self, const char *prop, + unsigned long long int num); +void jsonw_float_field_fmt(json_writer_t *self, const char *prop, + const char *fmt, double val); + +/* Collections */ +void jsonw_start_object(json_writer_t *self); +void jsonw_end_object(json_writer_t *self); + +void jsonw_start_array(json_writer_t *self); +void jsonw_end_array(json_writer_t *self); + +/* Override default exception handling */ +typedef void (jsonw_err_handler_fn)(const char *); + +#endif /* _JSON_WRITER_H_ */ diff --git a/tools/net/ynl/ynltool/main.c b/tools/net/ynl/ynltool/main.c new file mode 100644 index 0000000000000..8e15e4ee543f1 --- /dev/null +++ b/tools/net/ynl/ynltool/main.c @@ -0,0 +1,240 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ +/* Copyright Meta Platforms, Inc. and affiliates */ + +#include +#include +#include +#include +#include +#include +#include + +#include "main.h" + +const char *bin_name; +static int last_argc; +static char **last_argv; +static int (*last_do_help)(int argc, char **argv); +json_writer_t *json_wtr; +bool pretty_output; +bool json_output; + +static void __attribute__((noreturn)) clean_and_exit(int i) +{ + if (json_output) + jsonw_destroy(&json_wtr); + + exit(i); +} + +void usage(void) +{ + last_do_help(last_argc - 1, last_argv + 1); + + clean_and_exit(-1); +} + +static int do_help(int argc __attribute__((unused)), + char **argv __attribute__((unused))) +{ + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + + fprintf(stderr, + "Usage: %s [OPTIONS] OBJECT { COMMAND | help }\n" + " %s version\n" + "\n" + " OBJECT := { }\n" + " " HELP_SPEC_OPTIONS "\n" + "", + bin_name, bin_name); + + return 0; +} + +static int do_version(int argc __attribute__((unused)), + char **argv __attribute__((unused))) +{ + if (json_output) { + jsonw_start_object(json_wtr); + jsonw_name(json_wtr, "version"); + jsonw_printf(json_wtr, SRC_VERSION); + jsonw_end_object(json_wtr); + } else { + printf("%s " SRC_VERSION "\n", bin_name); + } + return 0; +} + +static const struct cmd commands[] = { + { "help", do_help }, + { "version", do_version }, + { 0 } +}; + +int cmd_select(const struct cmd *cmds, int argc, char **argv, + int (*help)(int argc, char **argv)) +{ + unsigned int i; + + last_argc = argc; + last_argv = argv; + last_do_help = help; + + if (argc < 1 && cmds[0].func) + return cmds[0].func(argc, argv); + + for (i = 0; cmds[i].cmd; i++) { + if (is_prefix(*argv, cmds[i].cmd)) { + if (!cmds[i].func) { + p_err("command '%s' is not available", cmds[i].cmd); + return -1; + } + return cmds[i].func(argc - 1, argv + 1); + } + } + + help(argc - 1, argv + 1); + + return -1; +} + +bool is_prefix(const char *pfx, const char *str) +{ + if (!pfx) + return false; + if (strlen(str) < strlen(pfx)) + return false; + + return !memcmp(str, pfx, strlen(pfx)); +} + +/* Last argument MUST be NULL pointer */ +int detect_common_prefix(const char *arg, ...) +{ + unsigned int count = 0; + const char *ref; + char msg[256]; + va_list ap; + + snprintf(msg, sizeof(msg), "ambiguous prefix: '%s' could be '", arg); + va_start(ap, arg); + while ((ref = va_arg(ap, const char *))) { + if (!is_prefix(arg, ref)) + continue; + count++; + if (count > 1) + strncat(msg, "' or '", sizeof(msg) - strlen(msg) - 1); + strncat(msg, ref, sizeof(msg) - strlen(msg) - 1); + } + va_end(ap); + strncat(msg, "'", sizeof(msg) - strlen(msg) - 1); + + if (count >= 2) { + p_err("%s", msg); + return -1; + } + + return 0; +} + +void p_err(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + if (json_output) { + jsonw_start_object(json_wtr); + jsonw_name(json_wtr, "error"); + jsonw_vprintf_enquote(json_wtr, fmt, ap); + jsonw_end_object(json_wtr); + } else { + fprintf(stderr, "Error: "); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + } + va_end(ap); +} + +void p_info(const char *fmt, ...) +{ + va_list ap; + + if (json_output) + return; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + va_end(ap); +} + +int main(int argc, char **argv) +{ + static const struct option options[] = { + { "json", no_argument, NULL, 'j' }, + { "help", no_argument, NULL, 'h' }, + { "pretty", no_argument, NULL, 'p' }, + { "version", no_argument, NULL, 'V' }, + { 0 } + }; + bool version_requested = false; + int opt, ret; + + setlinebuf(stdout); + + last_do_help = do_help; + pretty_output = false; + json_output = false; + bin_name = "ynltool"; + + opterr = 0; + while ((opt = getopt_long(argc, argv, "Vhjp", + options, NULL)) >= 0) { + switch (opt) { + case 'V': + version_requested = true; + break; + case 'h': + return do_help(argc, argv); + case 'p': + pretty_output = true; + /* fall through */ + case 'j': + if (!json_output) { + json_wtr = jsonw_new(stdout); + if (!json_wtr) { + p_err("failed to create JSON writer"); + return -1; + } + json_output = true; + } + jsonw_pretty(json_wtr, pretty_output); + break; + default: + p_err("unrecognized option '%s'", argv[optind - 1]); + if (json_output) + clean_and_exit(-1); + else + usage(); + } + } + + argc -= optind; + argv += optind; + if (argc < 0) + usage(); + + if (version_requested) + ret = do_version(argc, argv); + else + ret = cmd_select(commands, argc, argv, do_help); + + if (json_output) + jsonw_destroy(&json_wtr); + + return ret; +} diff --git a/tools/net/ynl/ynltool/main.h b/tools/net/ynl/ynltool/main.h new file mode 100644 index 0000000000000..f4a70acf20858 --- /dev/null +++ b/tools/net/ynl/ynltool/main.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ +/* Copyright Meta Platforms, Inc. and affiliates */ + +#ifndef __YNLTOOL_H +#define __YNLTOOL_H + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include +#include +#include +#include +#include + +#include "json_writer.h" + +#define NEXT_ARG() ({ argc--; argv++; if (argc < 0) usage(); }) +#define NEXT_ARGP() ({ (*argc)--; (*argv)++; if (*argc < 0) usage(); }) +#define BAD_ARG() ({ p_err("what is '%s'?", *argv); -1; }) +#define GET_ARG() ({ argc--; *argv++; }) +#define REQ_ARGS(cnt) \ + ({ \ + int _cnt = (cnt); \ + bool _res; \ + \ + if (argc < _cnt) { \ + p_err("'%s' needs at least %d arguments, %d found", \ + argv[-1], _cnt, argc); \ + _res = false; \ + } else { \ + _res = true; \ + } \ + _res; \ + }) + +#define HELP_SPEC_OPTIONS \ + "OPTIONS := { {-j|--json} [{-p|--pretty}] }" + +extern const char *bin_name; + +extern json_writer_t *json_wtr; +extern bool json_output; +extern bool pretty_output; + +void __attribute__((format(printf, 1, 2))) p_err(const char *fmt, ...); +void __attribute__((format(printf, 1, 2))) p_info(const char *fmt, ...); + +bool is_prefix(const char *pfx, const char *str); +int detect_common_prefix(const char *arg, ...); +void usage(void) __attribute__((noreturn)); + +struct cmd { + const char *cmd; + int (*func)(int argc, char **argv); +}; + +int cmd_select(const struct cmd *cmds, int argc, char **argv, + int (*help)(int argc, char **argv)); + +#endif /* __YNLTOOL_H */ From 124dac9b421ca8e69ed11ad7a0fc1794d03c2519 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 7 Nov 2025 08:22:25 -0800 Subject: [PATCH 746/867] tools: ynltool: add page-pool stats Replace the page-pool sample with page pool support in ynltool. # ynltool page-pool stats eth0[2] page pools: 18 (zombies: 0) refs: 171456 bytes: 702283776 (refs: 0 bytes: 0) recycling: 97.3% (alloc: 2679:6134966 recycle: 1250981:4719386) # ynltool -j page-pool stats | jq [ { "ifname": "eth0", "ifindex": 2, "page_pools": 18, "zombies": 0, "live": { "refs": 171456, "bytes": 702283776 }, "zombie": { "refs": 0, "bytes": 0 }, "recycling_pct": 97.2746, "alloc": { "slow": 2679, "fast": 6135029 }, "recycle": { "ring": 1250997, "cache": 4719432 } } ] # ynltool page-pool stats group-by pp pool id: 108 dev: eth0[2] napi: 530 inflight: 9472 pages 38797312 bytes recycling: 95.5% (alloc: 148:208379 recycle: 45386:153842) pool id: 107 dev: eth0[2] napi: 529 inflight: 9408 pages 38535168 bytes recycling: 94.9% (alloc: 147:180178 recycle: 42251:128808) Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20251107162227.980672-3-kuba@kernel.org Acked-by: Stanislav Fomichev Signed-off-by: Paolo Abeni --- tools/net/ynl/samples/page-pool.c | 149 ---------- tools/net/ynl/ynltool/Makefile | 11 +- tools/net/ynl/ynltool/main.c | 3 +- tools/net/ynl/ynltool/main.h | 3 + tools/net/ynl/ynltool/page-pool.c | 461 ++++++++++++++++++++++++++++++ 5 files changed, 473 insertions(+), 154 deletions(-) delete mode 100644 tools/net/ynl/samples/page-pool.c create mode 100644 tools/net/ynl/ynltool/page-pool.c diff --git a/tools/net/ynl/samples/page-pool.c b/tools/net/ynl/samples/page-pool.c deleted file mode 100644 index e5d521320fbf3..0000000000000 --- a/tools/net/ynl/samples/page-pool.c +++ /dev/null @@ -1,149 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#define _GNU_SOURCE - -#include -#include - -#include - -#include - -#include "netdev-user.h" - -struct stat { - unsigned int ifc; - - struct { - unsigned int cnt; - size_t refs, bytes; - } live[2]; - - size_t alloc_slow, alloc_fast, recycle_ring, recycle_cache; -}; - -struct stats_array { - unsigned int i, max; - struct stat *s; -}; - -static struct stat *find_ifc(struct stats_array *a, unsigned int ifindex) -{ - unsigned int i; - - for (i = 0; i < a->i; i++) { - if (a->s[i].ifc == ifindex) - return &a->s[i]; - } - - a->i++; - if (a->i == a->max) { - a->max *= 2; - a->s = reallocarray(a->s, a->max, sizeof(*a->s)); - } - a->s[i].ifc = ifindex; - return &a->s[i]; -} - -static void count(struct stat *s, unsigned int l, - struct netdev_page_pool_get_rsp *pp) -{ - s->live[l].cnt++; - if (pp->_present.inflight) - s->live[l].refs += pp->inflight; - if (pp->_present.inflight_mem) - s->live[l].bytes += pp->inflight_mem; -} - -int main(int argc, char **argv) -{ - struct netdev_page_pool_stats_get_list *pp_stats; - struct netdev_page_pool_get_list *pools; - struct stats_array a = {}; - struct ynl_error yerr; - struct ynl_sock *ys; - - ys = ynl_sock_create(&ynl_netdev_family, &yerr); - if (!ys) { - fprintf(stderr, "YNL: %s\n", yerr.msg); - return 1; - } - - a.max = 128; - a.s = calloc(a.max, sizeof(*a.s)); - if (!a.s) - goto err_close; - - pools = netdev_page_pool_get_dump(ys); - if (!pools) - goto err_free; - - ynl_dump_foreach(pools, pp) { - struct stat *s = find_ifc(&a, pp->ifindex); - - count(s, 1, pp); - if (pp->_present.detach_time) - count(s, 0, pp); - } - netdev_page_pool_get_list_free(pools); - - pp_stats = netdev_page_pool_stats_get_dump(ys); - if (!pp_stats) - goto err_free; - - ynl_dump_foreach(pp_stats, pp) { - struct stat *s = find_ifc(&a, pp->info.ifindex); - - if (pp->_present.alloc_fast) - s->alloc_fast += pp->alloc_fast; - if (pp->_present.alloc_refill) - s->alloc_fast += pp->alloc_refill; - if (pp->_present.alloc_slow) - s->alloc_slow += pp->alloc_slow; - if (pp->_present.recycle_ring) - s->recycle_ring += pp->recycle_ring; - if (pp->_present.recycle_cached) - s->recycle_cache += pp->recycle_cached; - } - netdev_page_pool_stats_get_list_free(pp_stats); - - for (unsigned int i = 0; i < a.i; i++) { - char ifname[IF_NAMESIZE]; - struct stat *s = &a.s[i]; - const char *name; - double recycle; - - if (!s->ifc) { - name = "\t"; - } else { - name = if_indextoname(s->ifc, ifname); - if (name) - printf("%8s", name); - printf("[%u]\t", s->ifc); - } - - printf("page pools: %u (zombies: %u)\n", - s->live[1].cnt, s->live[0].cnt); - printf("\t\trefs: %zu bytes: %zu (refs: %zu bytes: %zu)\n", - s->live[1].refs, s->live[1].bytes, - s->live[0].refs, s->live[0].bytes); - - /* We don't know how many pages are sitting in cache and ring - * so we will under-count the recycling rate a bit. - */ - recycle = (double)(s->recycle_ring + s->recycle_cache) / - (s->alloc_fast + s->alloc_slow) * 100; - printf("\t\trecycling: %.1lf%% (alloc: %zu:%zu recycle: %zu:%zu)\n", - recycle, s->alloc_slow, s->alloc_fast, - s->recycle_ring, s->recycle_cache); - } - - ynl_sock_destroy(ys); - return 0; - -err_free: - free(a.s); -err_close: - fprintf(stderr, "YNL: %s\n", ys->err.msg); - ynl_sock_destroy(ys); - return 2; -} diff --git a/tools/net/ynl/ynltool/Makefile b/tools/net/ynl/ynltool/Makefile index cfabab3a20da6..11240740ed810 100644 --- a/tools/net/ynl/ynltool/Makefile +++ b/tools/net/ynl/ynltool/Makefile @@ -10,7 +10,7 @@ CFLAGS := -Wall -Wextra -Werror -O2 ifeq ("$(DEBUG)","1") CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan endif -CFLAGS += -I../lib +CFLAGS += -I../lib -I../generated -I../../../include/uapi/ SRC_VERSION := \ $(shell make --no-print-directory -sC ../../../.. kernelversion || \ @@ -29,14 +29,17 @@ all: $(YNLTOOL) Q = @ -$(YNLTOOL): $(OBJS) +$(YNLTOOL): ../libynl.a $(OBJS) $(Q)echo -e "\tLINK $@" - $(Q)$(CC) $(CFLAGS) -o $@ $(OBJS) + $(Q)$(CC) $(CFLAGS) -o $@ $(OBJS) ../libynl.a -lmnl -%.o: %.c main.h json_writer.h +%.o: %.c ../libynl.a $(Q)echo -e "\tCC $@" $(Q)$(COMPILE.c) -MMD -c -o $@ $< +../libynl.a: + $(Q)$(MAKE) -C ../ + clean: rm -f *.o *.d *~ diff --git a/tools/net/ynl/ynltool/main.c b/tools/net/ynl/ynltool/main.c index 8e15e4ee543f1..f83c6f3245c8c 100644 --- a/tools/net/ynl/ynltool/main.c +++ b/tools/net/ynl/ynltool/main.c @@ -47,7 +47,7 @@ static int do_help(int argc __attribute__((unused)), "Usage: %s [OPTIONS] OBJECT { COMMAND | help }\n" " %s version\n" "\n" - " OBJECT := { }\n" + " OBJECT := { page-pool }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, bin_name); @@ -71,6 +71,7 @@ static int do_version(int argc __attribute__((unused)), static const struct cmd commands[] = { { "help", do_help }, + { "page-pool", do_page_pool }, { "version", do_version }, { 0 } }; diff --git a/tools/net/ynl/ynltool/main.h b/tools/net/ynl/ynltool/main.h index f4a70acf20858..fd05d21451a21 100644 --- a/tools/net/ynl/ynltool/main.h +++ b/tools/net/ynl/ynltool/main.h @@ -59,4 +59,7 @@ struct cmd { int cmd_select(const struct cmd *cmds, int argc, char **argv, int (*help)(int argc, char **argv)); +/* subcommands */ +int do_page_pool(int argc, char **argv); + #endif /* __YNLTOOL_H */ diff --git a/tools/net/ynl/ynltool/page-pool.c b/tools/net/ynl/ynltool/page-pool.c new file mode 100644 index 0000000000000..4b24492abab78 --- /dev/null +++ b/tools/net/ynl/ynltool/page-pool.c @@ -0,0 +1,461 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + +#include +#include +#include +#include +#include + +#include +#include "netdev-user.h" + +#include "main.h" + +struct pp_stat { + unsigned int ifc; + + struct { + unsigned int cnt; + size_t refs, bytes; + } live[2]; + + size_t alloc_slow, alloc_fast, recycle_ring, recycle_cache; +}; + +struct pp_stats_array { + unsigned int i, max; + struct pp_stat *s; +}; + +static struct pp_stat *find_ifc(struct pp_stats_array *a, unsigned int ifindex) +{ + unsigned int i; + + for (i = 0; i < a->i; i++) { + if (a->s[i].ifc == ifindex) + return &a->s[i]; + } + + a->i++; + if (a->i == a->max) { + a->max *= 2; + a->s = reallocarray(a->s, a->max, sizeof(*a->s)); + } + a->s[i].ifc = ifindex; + return &a->s[i]; +} + +static void count_pool(struct pp_stat *s, unsigned int l, + struct netdev_page_pool_get_rsp *pp) +{ + s->live[l].cnt++; + if (pp->_present.inflight) + s->live[l].refs += pp->inflight; + if (pp->_present.inflight_mem) + s->live[l].bytes += pp->inflight_mem; +} + +/* We don't know how many pages are sitting in cache and ring + * so we will under-count the recycling rate a bit. + */ +static void print_json_recycling_stats(struct pp_stat *s) +{ + double recycle; + + if (s->alloc_fast + s->alloc_slow) { + recycle = (double)(s->recycle_ring + s->recycle_cache) / + (s->alloc_fast + s->alloc_slow) * 100; + jsonw_float_field(json_wtr, "recycling_pct", recycle); + } + + jsonw_name(json_wtr, "alloc"); + jsonw_start_object(json_wtr); + jsonw_uint_field(json_wtr, "slow", s->alloc_slow); + jsonw_uint_field(json_wtr, "fast", s->alloc_fast); + jsonw_end_object(json_wtr); + + jsonw_name(json_wtr, "recycle"); + jsonw_start_object(json_wtr); + jsonw_uint_field(json_wtr, "ring", s->recycle_ring); + jsonw_uint_field(json_wtr, "cache", s->recycle_cache); + jsonw_end_object(json_wtr); +} + +static void print_plain_recycling_stats(struct pp_stat *s) +{ + double recycle; + + if (s->alloc_fast + s->alloc_slow) { + recycle = (double)(s->recycle_ring + s->recycle_cache) / + (s->alloc_fast + s->alloc_slow) * 100; + printf("recycling: %.1lf%% (alloc: %zu:%zu recycle: %zu:%zu)", + recycle, s->alloc_slow, s->alloc_fast, + s->recycle_ring, s->recycle_cache); + } +} + +static void print_json_stats(struct pp_stats_array *a) +{ + jsonw_start_array(json_wtr); + + for (unsigned int i = 0; i < a->i; i++) { + char ifname[IF_NAMESIZE]; + struct pp_stat *s = &a->s[i]; + const char *name; + + jsonw_start_object(json_wtr); + + if (!s->ifc) { + jsonw_string_field(json_wtr, "ifname", ""); + jsonw_uint_field(json_wtr, "ifindex", 0); + } else { + name = if_indextoname(s->ifc, ifname); + if (name) + jsonw_string_field(json_wtr, "ifname", name); + jsonw_uint_field(json_wtr, "ifindex", s->ifc); + } + + jsonw_uint_field(json_wtr, "page_pools", s->live[1].cnt); + jsonw_uint_field(json_wtr, "zombies", s->live[0].cnt); + + jsonw_name(json_wtr, "live"); + jsonw_start_object(json_wtr); + jsonw_uint_field(json_wtr, "refs", s->live[1].refs); + jsonw_uint_field(json_wtr, "bytes", s->live[1].bytes); + jsonw_end_object(json_wtr); + + jsonw_name(json_wtr, "zombie"); + jsonw_start_object(json_wtr); + jsonw_uint_field(json_wtr, "refs", s->live[0].refs); + jsonw_uint_field(json_wtr, "bytes", s->live[0].bytes); + jsonw_end_object(json_wtr); + + if (s->alloc_fast || s->alloc_slow) + print_json_recycling_stats(s); + + jsonw_end_object(json_wtr); + } + + jsonw_end_array(json_wtr); +} + +static void print_plain_stats(struct pp_stats_array *a) +{ + for (unsigned int i = 0; i < a->i; i++) { + char ifname[IF_NAMESIZE]; + struct pp_stat *s = &a->s[i]; + const char *name; + + if (!s->ifc) { + printf("\t"); + } else { + name = if_indextoname(s->ifc, ifname); + if (name) + printf("%8s", name); + printf("[%u]\t", s->ifc); + } + + printf("page pools: %u (zombies: %u)\n", + s->live[1].cnt, s->live[0].cnt); + printf("\t\trefs: %zu bytes: %zu (refs: %zu bytes: %zu)\n", + s->live[1].refs, s->live[1].bytes, + s->live[0].refs, s->live[0].bytes); + + if (s->alloc_fast || s->alloc_slow) { + printf("\t\t"); + print_plain_recycling_stats(s); + printf("\n"); + } + } +} + +static bool +find_pool_stat_in_list(struct netdev_page_pool_stats_get_list *pp_stats, + __u64 pool_id, struct pp_stat *pstat) +{ + ynl_dump_foreach(pp_stats, pp) { + if (!pp->_present.info || !pp->info._present.id) + continue; + if (pp->info.id != pool_id) + continue; + + memset(pstat, 0, sizeof(*pstat)); + if (pp->_present.alloc_fast) + pstat->alloc_fast = pp->alloc_fast; + if (pp->_present.alloc_refill) + pstat->alloc_fast += pp->alloc_refill; + if (pp->_present.alloc_slow) + pstat->alloc_slow = pp->alloc_slow; + if (pp->_present.recycle_ring) + pstat->recycle_ring = pp->recycle_ring; + if (pp->_present.recycle_cached) + pstat->recycle_cache = pp->recycle_cached; + return true; + } + return false; +} + +static void +print_json_pool_list(struct netdev_page_pool_get_list *pools, + struct netdev_page_pool_stats_get_list *pp_stats, + bool zombies_only) +{ + jsonw_start_array(json_wtr); + + ynl_dump_foreach(pools, pp) { + char ifname[IF_NAMESIZE]; + struct pp_stat pstat; + const char *name; + + if (zombies_only && !pp->_present.detach_time) + continue; + + jsonw_start_object(json_wtr); + + jsonw_uint_field(json_wtr, "id", pp->id); + + if (pp->_present.ifindex) { + name = if_indextoname(pp->ifindex, ifname); + if (name) + jsonw_string_field(json_wtr, "ifname", name); + jsonw_uint_field(json_wtr, "ifindex", pp->ifindex); + } + + if (pp->_present.napi_id) + jsonw_uint_field(json_wtr, "napi_id", pp->napi_id); + + if (pp->_present.inflight) + jsonw_uint_field(json_wtr, "refs", pp->inflight); + + if (pp->_present.inflight_mem) + jsonw_uint_field(json_wtr, "bytes", pp->inflight_mem); + + if (pp->_present.detach_time) + jsonw_uint_field(json_wtr, "detach_time", pp->detach_time); + + if (pp->_present.dmabuf) + jsonw_uint_field(json_wtr, "dmabuf", pp->dmabuf); + + if (find_pool_stat_in_list(pp_stats, pp->id, &pstat) && + (pstat.alloc_fast || pstat.alloc_slow)) + print_json_recycling_stats(&pstat); + + jsonw_end_object(json_wtr); + } + + jsonw_end_array(json_wtr); +} + +static void +print_plain_pool_list(struct netdev_page_pool_get_list *pools, + struct netdev_page_pool_stats_get_list *pp_stats, + bool zombies_only) +{ + ynl_dump_foreach(pools, pp) { + char ifname[IF_NAMESIZE]; + struct pp_stat pstat; + const char *name; + + if (zombies_only && !pp->_present.detach_time) + continue; + + printf("pool id: %llu", pp->id); + + if (pp->_present.ifindex) { + name = if_indextoname(pp->ifindex, ifname); + if (name) + printf(" dev: %s", name); + printf("[%u]", pp->ifindex); + } + + if (pp->_present.napi_id) + printf(" napi: %llu", pp->napi_id); + + printf("\n"); + + if (pp->_present.inflight || pp->_present.inflight_mem) { + printf(" inflight:"); + if (pp->_present.inflight) + printf(" %llu pages", pp->inflight); + if (pp->_present.inflight_mem) + printf(" %llu bytes", pp->inflight_mem); + printf("\n"); + } + + if (pp->_present.detach_time) + printf(" detached: %llu\n", pp->detach_time); + + if (pp->_present.dmabuf) + printf(" dmabuf: %u\n", pp->dmabuf); + + if (find_pool_stat_in_list(pp_stats, pp->id, &pstat) && + (pstat.alloc_fast || pstat.alloc_slow)) { + printf(" "); + print_plain_recycling_stats(&pstat); + printf("\n"); + } + } +} + +static void aggregate_device_stats(struct pp_stats_array *a, + struct netdev_page_pool_get_list *pools, + struct netdev_page_pool_stats_get_list *pp_stats) +{ + ynl_dump_foreach(pools, pp) { + struct pp_stat *s = find_ifc(a, pp->ifindex); + + count_pool(s, 1, pp); + if (pp->_present.detach_time) + count_pool(s, 0, pp); + } + + ynl_dump_foreach(pp_stats, pp) { + struct pp_stat *s = find_ifc(a, pp->info.ifindex); + + if (pp->_present.alloc_fast) + s->alloc_fast += pp->alloc_fast; + if (pp->_present.alloc_refill) + s->alloc_fast += pp->alloc_refill; + if (pp->_present.alloc_slow) + s->alloc_slow += pp->alloc_slow; + if (pp->_present.recycle_ring) + s->recycle_ring += pp->recycle_ring; + if (pp->_present.recycle_cached) + s->recycle_cache += pp->recycle_cached; + } +} + +static int do_stats(int argc, char **argv) +{ + struct netdev_page_pool_stats_get_list *pp_stats; + struct netdev_page_pool_get_list *pools; + enum { + GROUP_BY_DEVICE, + GROUP_BY_POOL, + } group_by = GROUP_BY_DEVICE; + bool zombies_only = false; + struct pp_stats_array a = {}; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret = 0; + + /* Parse options */ + while (argc > 0) { + if (is_prefix(*argv, "group-by")) { + NEXT_ARG(); + + if (!REQ_ARGS(1)) + return -1; + + if (is_prefix(*argv, "device")) { + group_by = GROUP_BY_DEVICE; + } else if (is_prefix(*argv, "pp") || + is_prefix(*argv, "page-pool") || + is_prefix(*argv, "none")) { + group_by = GROUP_BY_POOL; + } else { + p_err("invalid group-by value '%s'", *argv); + return -1; + } + NEXT_ARG(); + } else if (is_prefix(*argv, "zombies")) { + zombies_only = true; + group_by = GROUP_BY_POOL; + NEXT_ARG(); + } else { + p_err("unknown option '%s'", *argv); + return -1; + } + } + + ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!ys) { + p_err("YNL: %s", yerr.msg); + return -1; + } + + pools = netdev_page_pool_get_dump(ys); + if (!pools) { + p_err("failed to get page pools: %s", ys->err.msg); + ret = -1; + goto exit_close; + } + + pp_stats = netdev_page_pool_stats_get_dump(ys); + if (!pp_stats) { + p_err("failed to get page pool stats: %s", ys->err.msg); + ret = -1; + goto exit_free_pp_list; + } + + /* If grouping by pool, print individual pools */ + if (group_by == GROUP_BY_POOL) { + if (json_output) + print_json_pool_list(pools, pp_stats, zombies_only); + else + print_plain_pool_list(pools, pp_stats, zombies_only); + } else { + /* Aggregated stats mode (group-by device) */ + a.max = 64; + a.s = calloc(a.max, sizeof(*a.s)); + if (!a.s) { + p_err("failed to allocate stats array"); + ret = -1; + goto exit_free_stats_list; + } + + aggregate_device_stats(&a, pools, pp_stats); + + if (json_output) + print_json_stats(&a); + else + print_plain_stats(&a); + + free(a.s); + } + +exit_free_stats_list: + netdev_page_pool_stats_get_list_free(pp_stats); +exit_free_pp_list: + netdev_page_pool_get_list_free(pools); +exit_close: + ynl_sock_destroy(ys); + return ret; +} + +static int do_help(int argc __attribute__((unused)), + char **argv __attribute__((unused))) +{ + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + + fprintf(stderr, + "Usage: %s page-pool { COMMAND | help }\n" + " %s page-pool stats [ OPTIONS ]\n" + "\n" + " OPTIONS := { group-by { device | page-pool | none } | zombies }\n" + "\n" + " stats - Display page pool statistics\n" + " stats group-by device - Group statistics by network device (default)\n" + " stats group-by page-pool | pp | none\n" + " - Show individual page pool details (no grouping)\n" + " stats zombies - Show only zombie page pools (detached but with\n" + " pages in flight). Implies group-by page-pool.\n" + "", + bin_name, bin_name); + + return 0; +} + +static const struct cmd page_pool_cmds[] = { + { "help", do_help }, + { "stats", do_stats }, + { 0 } +}; + +int do_page_pool(int argc, char **argv) +{ + return cmd_select(page_pool_cmds, argc, argv, do_help); +} From 3f0a638d45fcf98f7a53ece0aadf928dfc328a06 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 7 Nov 2025 08:22:26 -0800 Subject: [PATCH 747/867] tools: ynltool: add qstats support $ ynltool qstat eth0 rx-packets: 493192163 rx-bytes: 1442544543997 tx-packets: 745999838 tx-bytes: 4574215826482 tx-stop: 7033 tx-wake: 7033 $ ynltool qstat show group-by queue eth0 rx-0 packets: 70196880 bytes: 178633973750 eth0 rx-1 packets: 63623419 bytes: 197274745250 ... eth0 tx-1 packets: 98645810 bytes: 631247647938 stop: 1048 wake: 1048 eth0 tx-2 packets: 86775824 bytes: 563930471952 stop: 1126 wake: 1126 ... $ ynltool -j qstat | jq [ { "ifname": "eth0", "ifindex": 2, "rx": { "packets": 493396439, "bytes": 1443608198921 }, "tx": { "packets": 746239978, "bytes": 4574333772645, "stop": 7072, "wake": 7072 } } ] Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20251107162227.980672-4-kuba@kernel.org Acked-by: Stanislav Fomichev Signed-off-by: Paolo Abeni --- tools/net/ynl/ynltool/main.c | 3 +- tools/net/ynl/ynltool/main.h | 1 + tools/net/ynl/ynltool/qstats.c | 330 +++++++++++++++++++++++++++++++++ 3 files changed, 333 insertions(+), 1 deletion(-) create mode 100644 tools/net/ynl/ynltool/qstats.c diff --git a/tools/net/ynl/ynltool/main.c b/tools/net/ynl/ynltool/main.c index f83c6f3245c8c..5d0f428eed0a7 100644 --- a/tools/net/ynl/ynltool/main.c +++ b/tools/net/ynl/ynltool/main.c @@ -47,7 +47,7 @@ static int do_help(int argc __attribute__((unused)), "Usage: %s [OPTIONS] OBJECT { COMMAND | help }\n" " %s version\n" "\n" - " OBJECT := { page-pool }\n" + " OBJECT := { page-pool | qstats }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, bin_name); @@ -72,6 +72,7 @@ static int do_version(int argc __attribute__((unused)), static const struct cmd commands[] = { { "help", do_help }, { "page-pool", do_page_pool }, + { "qstats", do_qstats }, { "version", do_version }, { 0 } }; diff --git a/tools/net/ynl/ynltool/main.h b/tools/net/ynl/ynltool/main.h index fd05d21451a21..c7039f9ac55a6 100644 --- a/tools/net/ynl/ynltool/main.h +++ b/tools/net/ynl/ynltool/main.h @@ -61,5 +61,6 @@ int cmd_select(const struct cmd *cmds, int argc, char **argv, /* subcommands */ int do_page_pool(int argc, char **argv); +int do_qstats(int argc, char **argv); #endif /* __YNLTOOL_H */ diff --git a/tools/net/ynl/ynltool/qstats.c b/tools/net/ynl/ynltool/qstats.c new file mode 100644 index 0000000000000..fcdbb6d9a8525 --- /dev/null +++ b/tools/net/ynl/ynltool/qstats.c @@ -0,0 +1,330 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + +#include +#include +#include +#include +#include + +#include +#include "netdev-user.h" + +#include "main.h" + +static enum netdev_qstats_scope scope; /* default - device */ + +static void print_json_qstats(struct netdev_qstats_get_list *qstats) +{ + jsonw_start_array(json_wtr); + + ynl_dump_foreach(qstats, qs) { + char ifname[IF_NAMESIZE]; + const char *name; + + jsonw_start_object(json_wtr); + + name = if_indextoname(qs->ifindex, ifname); + if (name) + jsonw_string_field(json_wtr, "ifname", name); + jsonw_uint_field(json_wtr, "ifindex", qs->ifindex); + + if (qs->_present.queue_type) + jsonw_string_field(json_wtr, "queue-type", + netdev_queue_type_str(qs->queue_type)); + if (qs->_present.queue_id) + jsonw_uint_field(json_wtr, "queue-id", qs->queue_id); + + if (qs->_present.rx_packets || qs->_present.rx_bytes || + qs->_present.rx_alloc_fail || qs->_present.rx_hw_drops || + qs->_present.rx_csum_complete || qs->_present.rx_hw_gro_packets) { + jsonw_name(json_wtr, "rx"); + jsonw_start_object(json_wtr); + if (qs->_present.rx_packets) + jsonw_uint_field(json_wtr, "packets", qs->rx_packets); + if (qs->_present.rx_bytes) + jsonw_uint_field(json_wtr, "bytes", qs->rx_bytes); + if (qs->_present.rx_alloc_fail) + jsonw_uint_field(json_wtr, "alloc-fail", qs->rx_alloc_fail); + if (qs->_present.rx_hw_drops) + jsonw_uint_field(json_wtr, "hw-drops", qs->rx_hw_drops); + if (qs->_present.rx_hw_drop_overruns) + jsonw_uint_field(json_wtr, "hw-drop-overruns", qs->rx_hw_drop_overruns); + if (qs->_present.rx_hw_drop_ratelimits) + jsonw_uint_field(json_wtr, "hw-drop-ratelimits", qs->rx_hw_drop_ratelimits); + if (qs->_present.rx_csum_complete) + jsonw_uint_field(json_wtr, "csum-complete", qs->rx_csum_complete); + if (qs->_present.rx_csum_unnecessary) + jsonw_uint_field(json_wtr, "csum-unnecessary", qs->rx_csum_unnecessary); + if (qs->_present.rx_csum_none) + jsonw_uint_field(json_wtr, "csum-none", qs->rx_csum_none); + if (qs->_present.rx_csum_bad) + jsonw_uint_field(json_wtr, "csum-bad", qs->rx_csum_bad); + if (qs->_present.rx_hw_gro_packets) + jsonw_uint_field(json_wtr, "hw-gro-packets", qs->rx_hw_gro_packets); + if (qs->_present.rx_hw_gro_bytes) + jsonw_uint_field(json_wtr, "hw-gro-bytes", qs->rx_hw_gro_bytes); + if (qs->_present.rx_hw_gro_wire_packets) + jsonw_uint_field(json_wtr, "hw-gro-wire-packets", qs->rx_hw_gro_wire_packets); + if (qs->_present.rx_hw_gro_wire_bytes) + jsonw_uint_field(json_wtr, "hw-gro-wire-bytes", qs->rx_hw_gro_wire_bytes); + jsonw_end_object(json_wtr); + } + + if (qs->_present.tx_packets || qs->_present.tx_bytes || + qs->_present.tx_hw_drops || qs->_present.tx_csum_none || + qs->_present.tx_hw_gso_packets) { + jsonw_name(json_wtr, "tx"); + jsonw_start_object(json_wtr); + if (qs->_present.tx_packets) + jsonw_uint_field(json_wtr, "packets", qs->tx_packets); + if (qs->_present.tx_bytes) + jsonw_uint_field(json_wtr, "bytes", qs->tx_bytes); + if (qs->_present.tx_hw_drops) + jsonw_uint_field(json_wtr, "hw-drops", qs->tx_hw_drops); + if (qs->_present.tx_hw_drop_errors) + jsonw_uint_field(json_wtr, "hw-drop-errors", qs->tx_hw_drop_errors); + if (qs->_present.tx_hw_drop_ratelimits) + jsonw_uint_field(json_wtr, "hw-drop-ratelimits", qs->tx_hw_drop_ratelimits); + if (qs->_present.tx_csum_none) + jsonw_uint_field(json_wtr, "csum-none", qs->tx_csum_none); + if (qs->_present.tx_needs_csum) + jsonw_uint_field(json_wtr, "needs-csum", qs->tx_needs_csum); + if (qs->_present.tx_hw_gso_packets) + jsonw_uint_field(json_wtr, "hw-gso-packets", qs->tx_hw_gso_packets); + if (qs->_present.tx_hw_gso_bytes) + jsonw_uint_field(json_wtr, "hw-gso-bytes", qs->tx_hw_gso_bytes); + if (qs->_present.tx_hw_gso_wire_packets) + jsonw_uint_field(json_wtr, "hw-gso-wire-packets", qs->tx_hw_gso_wire_packets); + if (qs->_present.tx_hw_gso_wire_bytes) + jsonw_uint_field(json_wtr, "hw-gso-wire-bytes", qs->tx_hw_gso_wire_bytes); + if (qs->_present.tx_stop) + jsonw_uint_field(json_wtr, "stop", qs->tx_stop); + if (qs->_present.tx_wake) + jsonw_uint_field(json_wtr, "wake", qs->tx_wake); + jsonw_end_object(json_wtr); + } + + jsonw_end_object(json_wtr); + } + + jsonw_end_array(json_wtr); +} + +static void print_one(bool present, const char *name, unsigned long long val, + int *line) +{ + if (!present) + return; + + if (!*line) { + printf(" "); + ++(*line); + } + + /* Don't waste space on tx- and rx- prefix, its implied by queue type */ + if (scope == NETDEV_QSTATS_SCOPE_QUEUE && + (name[0] == 'r' || name[0] == 't') && + name[1] == 'x' && name[2] == '-') + name += 3; + + printf(" %15s: %15llu", name, val); + + if (++(*line) == 3) { + printf("\n"); + *line = 0; + } +} + +static void print_plain_qstats(struct netdev_qstats_get_list *qstats) +{ + ynl_dump_foreach(qstats, qs) { + char ifname[IF_NAMESIZE]; + const char *name; + int n; + + name = if_indextoname(qs->ifindex, ifname); + if (name) + printf("%s", name); + else + printf("ifindex:%u", qs->ifindex); + + if (qs->_present.queue_type && qs->_present.queue_id) + printf("\t%s-%-3u", + netdev_queue_type_str(qs->queue_type), + qs->queue_id); + else + printf("\t "); + + n = 1; + + /* Basic counters */ + print_one(qs->_present.rx_packets, "rx-packets", qs->rx_packets, &n); + print_one(qs->_present.rx_bytes, "rx-bytes", qs->rx_bytes, &n); + print_one(qs->_present.tx_packets, "tx-packets", qs->tx_packets, &n); + print_one(qs->_present.tx_bytes, "tx-bytes", qs->tx_bytes, &n); + + /* RX error/drop counters */ + print_one(qs->_present.rx_alloc_fail, "rx-alloc-fail", + qs->rx_alloc_fail, &n); + print_one(qs->_present.rx_hw_drops, "rx-hw-drops", + qs->rx_hw_drops, &n); + print_one(qs->_present.rx_hw_drop_overruns, "rx-hw-drop-overruns", + qs->rx_hw_drop_overruns, &n); + print_one(qs->_present.rx_hw_drop_ratelimits, "rx-hw-drop-ratelimits", + qs->rx_hw_drop_ratelimits, &n); + + /* RX checksum counters */ + print_one(qs->_present.rx_csum_complete, "rx-csum-complete", + qs->rx_csum_complete, &n); + print_one(qs->_present.rx_csum_unnecessary, "rx-csum-unnecessary", + qs->rx_csum_unnecessary, &n); + print_one(qs->_present.rx_csum_none, "rx-csum-none", + qs->rx_csum_none, &n); + print_one(qs->_present.rx_csum_bad, "rx-csum-bad", + qs->rx_csum_bad, &n); + + /* RX GRO counters */ + print_one(qs->_present.rx_hw_gro_packets, "rx-hw-gro-packets", + qs->rx_hw_gro_packets, &n); + print_one(qs->_present.rx_hw_gro_bytes, "rx-hw-gro-bytes", + qs->rx_hw_gro_bytes, &n); + print_one(qs->_present.rx_hw_gro_wire_packets, "rx-hw-gro-wire-packets", + qs->rx_hw_gro_wire_packets, &n); + print_one(qs->_present.rx_hw_gro_wire_bytes, "rx-hw-gro-wire-bytes", + qs->rx_hw_gro_wire_bytes, &n); + + /* TX error/drop counters */ + print_one(qs->_present.tx_hw_drops, "tx-hw-drops", + qs->tx_hw_drops, &n); + print_one(qs->_present.tx_hw_drop_errors, "tx-hw-drop-errors", + qs->tx_hw_drop_errors, &n); + print_one(qs->_present.tx_hw_drop_ratelimits, "tx-hw-drop-ratelimits", + qs->tx_hw_drop_ratelimits, &n); + + /* TX checksum counters */ + print_one(qs->_present.tx_csum_none, "tx-csum-none", + qs->tx_csum_none, &n); + print_one(qs->_present.tx_needs_csum, "tx-needs-csum", + qs->tx_needs_csum, &n); + + /* TX GSO counters */ + print_one(qs->_present.tx_hw_gso_packets, "tx-hw-gso-packets", + qs->tx_hw_gso_packets, &n); + print_one(qs->_present.tx_hw_gso_bytes, "tx-hw-gso-bytes", + qs->tx_hw_gso_bytes, &n); + print_one(qs->_present.tx_hw_gso_wire_packets, "tx-hw-gso-wire-packets", + qs->tx_hw_gso_wire_packets, &n); + print_one(qs->_present.tx_hw_gso_wire_bytes, "tx-hw-gso-wire-bytes", + qs->tx_hw_gso_wire_bytes, &n); + + /* TX queue control */ + print_one(qs->_present.tx_stop, "tx-stop", qs->tx_stop, &n); + print_one(qs->_present.tx_wake, "tx-wake", qs->tx_wake, &n); + + if (n) + printf("\n"); + } +} + +static int do_show(int argc, char **argv) +{ + struct netdev_qstats_get_list *qstats; + struct netdev_qstats_get_req *req; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret = 0; + + /* Parse options */ + while (argc > 0) { + if (is_prefix(*argv, "scope") || is_prefix(*argv, "group-by")) { + NEXT_ARG(); + + if (!REQ_ARGS(1)) + return -1; + + if (is_prefix(*argv, "queue")) { + scope = NETDEV_QSTATS_SCOPE_QUEUE; + } else if (is_prefix(*argv, "device")) { + scope = 0; + } else { + p_err("invalid scope value '%s'", *argv); + return -1; + } + NEXT_ARG(); + } else { + p_err("unknown option '%s'", *argv); + return -1; + } + } + + ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!ys) { + p_err("YNL: %s", yerr.msg); + return -1; + } + + req = netdev_qstats_get_req_alloc(); + if (!req) { + p_err("failed to allocate qstats request"); + ret = -1; + goto exit_close; + } + + if (scope) + netdev_qstats_get_req_set_scope(req, scope); + + qstats = netdev_qstats_get_dump(ys, req); + netdev_qstats_get_req_free(req); + if (!qstats) { + p_err("failed to get queue stats: %s", ys->err.msg); + ret = -1; + goto exit_close; + } + + /* Print the stats as returned by the kernel */ + if (json_output) + print_json_qstats(qstats); + else + print_plain_qstats(qstats); + + netdev_qstats_get_list_free(qstats); +exit_close: + ynl_sock_destroy(ys); + return ret; +} + +static int do_help(int argc __attribute__((unused)), + char **argv __attribute__((unused))) +{ + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + + fprintf(stderr, + "Usage: %s qstats { COMMAND | help }\n" + " %s qstats [ show ] [ OPTIONS ]\n" + "\n" + " OPTIONS := { scope queue | group-by { device | queue } }\n" + "\n" + " show - Display queue statistics (default)\n" + " Statistics are aggregated for the entire device.\n" + " show scope queue - Display per-queue statistics\n" + " show group-by device - Display device-aggregated statistics (default)\n" + " show group-by queue - Display per-queue statistics\n" + "", + bin_name, bin_name); + + return 0; +} + +static const struct cmd qstats_cmds[] = { + { "show", do_show }, + { "help", do_help }, + { 0 } +}; + +int do_qstats(int argc, char **argv) +{ + return cmd_select(qstats_cmds, argc, argv, do_help); +} From 9eef97a9dea3d059cb719a26a055ff09933e964e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 7 Nov 2025 08:22:27 -0800 Subject: [PATCH 748/867] tools: ynltool: add traffic distribution balance The main if not only use case for per-queue stats today is checking for traffic imbalance. Add simple traffic balance analysis to qstats. $ ynltool qstat balance eth0 rx 44 queues: rx-packets : cv=6.9% ns=24.2% stddev=512006493 min=6278921110 max=8011570575 mean=7437054644 rx-bytes : cv=6.9% ns=24.1% stddev=759670503060 min=9326315769440 max=11884393670786 mean=11035439201354 ... $ ynltool -j qstat balance | jq [ { "ifname": "eth0", "ifindex": 2, "queue-type": "rx", "rx-packets": { "queue-count": 44, "min": 6278301665, "max": 8010780185, "mean": 7.43635E+9, "stddev": 5.12012E+8, "coefficient-of-variation": 6.88525, "normalized-spread": 24.249 }, ... Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20251107162227.980672-5-kuba@kernel.org Acked-by: Stanislav Fomichev Signed-off-by: Paolo Abeni --- tools/net/ynl/ynltool/Makefile | 2 +- tools/net/ynl/ynltool/qstats.c | 293 ++++++++++++++++++++++++++++++++- 2 files changed, 293 insertions(+), 2 deletions(-) diff --git a/tools/net/ynl/ynltool/Makefile b/tools/net/ynl/ynltool/Makefile index 11240740ed810..86c30b7420cf7 100644 --- a/tools/net/ynl/ynltool/Makefile +++ b/tools/net/ynl/ynltool/Makefile @@ -31,7 +31,7 @@ Q = @ $(YNLTOOL): ../libynl.a $(OBJS) $(Q)echo -e "\tLINK $@" - $(Q)$(CC) $(CFLAGS) -o $@ $(OBJS) ../libynl.a -lmnl + $(Q)$(CC) $(CFLAGS) -o $@ $(OBJS) ../libynl.a -lmnl -lm %.o: %.c ../libynl.a $(Q)echo -e "\tCC $@" diff --git a/tools/net/ynl/ynltool/qstats.c b/tools/net/ynl/ynltool/qstats.c index fcdbb6d9a8525..31fb45709ffab 100644 --- a/tools/net/ynl/ynltool/qstats.c +++ b/tools/net/ynl/ynltool/qstats.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "netdev-user.h" @@ -13,6 +14,16 @@ static enum netdev_qstats_scope scope; /* default - device */ +struct queue_balance { + unsigned int ifindex; + enum netdev_queue_type type; + unsigned int queue_count; + __u64 *rx_packets; + __u64 *rx_bytes; + __u64 *tx_packets; + __u64 *tx_bytes; +}; + static void print_json_qstats(struct netdev_qstats_get_list *qstats) { jsonw_start_array(json_wtr); @@ -293,6 +304,283 @@ static int do_show(int argc, char **argv) return ret; } +static void compute_stats(__u64 *values, unsigned int count, + double *mean, double *stddev, __u64 *min, __u64 *max) +{ + double sum = 0.0, variance = 0.0; + unsigned int i; + + *min = ~0ULL; + *max = 0; + + if (count == 0) { + *mean = 0; + *stddev = 0; + *min = 0; + return; + } + + for (i = 0; i < count; i++) { + sum += values[i]; + if (values[i] < *min) + *min = values[i]; + if (values[i] > *max) + *max = values[i]; + } + + *mean = sum / count; + + if (count > 1) { + for (i = 0; i < count; i++) { + double diff = values[i] - *mean; + + variance += diff * diff; + } + *stddev = sqrt(variance / (count - 1)); + } else { + *stddev = 0; + } +} + +static void print_balance_stats(const char *name, enum netdev_queue_type type, + __u64 *values, unsigned int count) +{ + double mean, stddev, cv, ns; + __u64 min, max; + + if ((name[0] == 'r' && type != NETDEV_QUEUE_TYPE_RX) || + (name[0] == 't' && type != NETDEV_QUEUE_TYPE_TX)) + return; + + compute_stats(values, count, &mean, &stddev, &min, &max); + + cv = mean > 0 ? (stddev / mean) * 100.0 : 0.0; + ns = min + max > 0 ? (double)2 * (max - min) / (max + min) * 100 : 0.0; + + printf(" %-12s: cv=%.1f%% ns=%.1f%% stddev=%.0f\n", + name, cv, ns, stddev); + printf(" %-12s min=%llu max=%llu mean=%.0f\n", + "", min, max, mean); +} + +static void +print_balance_stats_json(const char *name, enum netdev_queue_type type, + __u64 *values, unsigned int count) +{ + double mean, stddev, cv, ns; + __u64 min, max; + + if ((name[0] == 'r' && type != NETDEV_QUEUE_TYPE_RX) || + (name[0] == 't' && type != NETDEV_QUEUE_TYPE_TX)) + return; + + compute_stats(values, count, &mean, &stddev, &min, &max); + + cv = mean > 0 ? (stddev / mean) * 100.0 : 0.0; + ns = min + max > 0 ? (double)2 * (max - min) / (max + min) * 100 : 0.0; + + jsonw_name(json_wtr, name); + jsonw_start_object(json_wtr); + jsonw_uint_field(json_wtr, "queue-count", count); + jsonw_uint_field(json_wtr, "min", min); + jsonw_uint_field(json_wtr, "max", max); + jsonw_float_field(json_wtr, "mean", mean); + jsonw_float_field(json_wtr, "stddev", stddev); + jsonw_float_field(json_wtr, "coefficient-of-variation", cv); + jsonw_float_field(json_wtr, "normalized-spread", ns); + jsonw_end_object(json_wtr); +} + +static int cmp_ifindex_type(const void *a, const void *b) +{ + const struct netdev_qstats_get_rsp *qa = a; + const struct netdev_qstats_get_rsp *qb = b; + + if (qa->ifindex != qb->ifindex) + return qa->ifindex - qb->ifindex; + if (qa->queue_type != qb->queue_type) + return qa->queue_type - qb->queue_type; + return qa->queue_id - qb->queue_id; +} + +static int do_balance(int argc, char **argv __attribute__((unused))) +{ + struct netdev_qstats_get_list *qstats; + struct netdev_qstats_get_req *req; + struct netdev_qstats_get_rsp **sorted; + struct ynl_error yerr; + struct ynl_sock *ys; + unsigned int count = 0; + unsigned int i, j; + int ret = 0; + + if (argc > 0) { + p_err("balance command takes no arguments"); + return -1; + } + + ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!ys) { + p_err("YNL: %s", yerr.msg); + return -1; + } + + req = netdev_qstats_get_req_alloc(); + if (!req) { + p_err("failed to allocate qstats request"); + ret = -1; + goto exit_close; + } + + /* Always use queue scope for balance analysis */ + netdev_qstats_get_req_set_scope(req, NETDEV_QSTATS_SCOPE_QUEUE); + + qstats = netdev_qstats_get_dump(ys, req); + netdev_qstats_get_req_free(req); + if (!qstats) { + p_err("failed to get queue stats: %s", ys->err.msg); + ret = -1; + goto exit_close; + } + + /* Count and sort queues */ + ynl_dump_foreach(qstats, qs) + count++; + + if (count == 0) { + if (json_output) + jsonw_start_array(json_wtr); + else + printf("No queue statistics available\n"); + goto exit_free_qstats; + } + + sorted = calloc(count, sizeof(*sorted)); + if (!sorted) { + p_err("failed to allocate sorted array"); + ret = -1; + goto exit_free_qstats; + } + + i = 0; + ynl_dump_foreach(qstats, qs) + sorted[i++] = qs; + + qsort(sorted, count, sizeof(*sorted), cmp_ifindex_type); + + if (json_output) + jsonw_start_array(json_wtr); + + /* Process each device/queue-type combination */ + i = 0; + while (i < count) { + __u64 *rx_packets, *rx_bytes, *tx_packets, *tx_bytes; + enum netdev_queue_type type = sorted[i]->queue_type; + unsigned int ifindex = sorted[i]->ifindex; + unsigned int queue_count = 0; + char ifname[IF_NAMESIZE]; + const char *name; + + /* Count queues for this device/type */ + for (j = i; j < count && sorted[j]->ifindex == ifindex && + sorted[j]->queue_type == type; j++) + queue_count++; + + /* Skip if no packets/bytes (inactive queues) */ + if (!sorted[i]->_present.rx_packets && + !sorted[i]->_present.rx_bytes && + !sorted[i]->_present.tx_packets && + !sorted[i]->_present.tx_bytes) + goto next_ifc; + + /* Allocate arrays for statistics */ + rx_packets = calloc(queue_count, sizeof(*rx_packets)); + rx_bytes = calloc(queue_count, sizeof(*rx_bytes)); + tx_packets = calloc(queue_count, sizeof(*tx_packets)); + tx_bytes = calloc(queue_count, sizeof(*tx_bytes)); + + if (!rx_packets || !rx_bytes || !tx_packets || !tx_bytes) { + p_err("failed to allocate statistics arrays"); + free(rx_packets); + free(rx_bytes); + free(tx_packets); + free(tx_bytes); + ret = -1; + goto exit_free_sorted; + } + + /* Collect statistics */ + for (j = 0; j < queue_count; j++) { + rx_packets[j] = sorted[i + j]->_present.rx_packets ? + sorted[i + j]->rx_packets : 0; + rx_bytes[j] = sorted[i + j]->_present.rx_bytes ? + sorted[i + j]->rx_bytes : 0; + tx_packets[j] = sorted[i + j]->_present.tx_packets ? + sorted[i + j]->tx_packets : 0; + tx_bytes[j] = sorted[i + j]->_present.tx_bytes ? + sorted[i + j]->tx_bytes : 0; + } + + name = if_indextoname(ifindex, ifname); + + if (json_output) { + jsonw_start_object(json_wtr); + if (name) + jsonw_string_field(json_wtr, "ifname", name); + jsonw_uint_field(json_wtr, "ifindex", ifindex); + jsonw_string_field(json_wtr, "queue-type", + netdev_queue_type_str(type)); + + print_balance_stats_json("rx-packets", type, + rx_packets, queue_count); + print_balance_stats_json("rx-bytes", type, + rx_bytes, queue_count); + print_balance_stats_json("tx-packets", type, + tx_packets, queue_count); + print_balance_stats_json("tx-bytes", type, + tx_bytes, queue_count); + + jsonw_end_object(json_wtr); + } else { + if (name) + printf("%s", name); + else + printf("ifindex:%u", ifindex); + printf(" %s %d queues:\n", + netdev_queue_type_str(type), queue_count); + + print_balance_stats("rx-packets", type, + rx_packets, queue_count); + print_balance_stats("rx-bytes", type, + rx_bytes, queue_count); + print_balance_stats("tx-packets", type, + tx_packets, queue_count); + print_balance_stats("tx-bytes", type, + tx_bytes, queue_count); + printf("\n"); + } + + free(rx_packets); + free(rx_bytes); + free(tx_packets); + free(tx_bytes); + +next_ifc: + i += queue_count; + } + + if (json_output) + jsonw_end_array(json_wtr); + +exit_free_sorted: + free(sorted); +exit_free_qstats: + netdev_qstats_get_list_free(qstats); +exit_close: + ynl_sock_destroy(ys); + return ret; +} + static int do_help(int argc __attribute__((unused)), char **argv __attribute__((unused))) { @@ -304,6 +592,7 @@ static int do_help(int argc __attribute__((unused)), fprintf(stderr, "Usage: %s qstats { COMMAND | help }\n" " %s qstats [ show ] [ OPTIONS ]\n" + " %s qstats balance\n" "\n" " OPTIONS := { scope queue | group-by { device | queue } }\n" "\n" @@ -312,14 +601,16 @@ static int do_help(int argc __attribute__((unused)), " show scope queue - Display per-queue statistics\n" " show group-by device - Display device-aggregated statistics (default)\n" " show group-by queue - Display per-queue statistics\n" + " balance - Analyze traffic distribution balance.\n" "", - bin_name, bin_name); + bin_name, bin_name, bin_name); return 0; } static const struct cmd qstats_cmds[] = { { "show", do_show }, + { "balance", do_balance }, { "help", do_help }, { 0 } }; From 0e535824d0bcf7c9bb0532d902283c31c78cd6f3 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 7 Nov 2025 23:04:02 -0800 Subject: [PATCH 749/867] devlink: Introduce switchdev_inactive eswitch mode Adds DEVLINK_ESWITCH_MODE_SWITCHDEV_INACTIVE attribute to UAPI and documentation. Before having traffic flow through an eswitch, a user may want to have the ability to block traffic towards the FDB until FDB is fully programmed and the user is ready to send traffic to it. For example: when two eswitches are present for vports in a multi-PF setup, one eswitch may take over the traffic from the other when the user chooses. Before this take over, a user may want to first program the inactive eswitch and then once ready redirect traffic to this new eswitch. switchdev modes transition semantics: legacy->switchdev_inactive: Create switchdev mode normally, traffic not allowed to flow yet. switchdev_inactive->switchdev: Enable traffic to flow. switchdev->switchdev_inactive: Block traffic on the FDB, FDB and representros state and content is preserved. When eswitch is configured to this mode, traffic is ignored/dropped on this eswitch FDB, while current configuration is kept, e.g FDB rules and netdev representros are kept available, FDB programming is allowed. Example: # start inactive switchdev devlink dev eswitch set pci/0000:08:00.1 mode switchdev_inactive # setup TC rules, representors etc .. # activate devlink dev eswitch set pci/0000:08:00.1 mode switchdev Signed-off-by: Saeed Mahameed Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20251108070404.1551708-2-saeed@kernel.org Signed-off-by: Paolo Abeni --- Documentation/netlink/specs/devlink.yaml | 2 ++ .../networking/devlink/devlink-eswitch-attr.rst | 13 +++++++++++++ include/uapi/linux/devlink.h | 1 + net/devlink/netlink_gen.c | 2 +- 4 files changed, 17 insertions(+), 1 deletion(-) diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index 3db59c9658694..426d5aa7d9551 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -99,6 +99,8 @@ definitions: name: legacy - name: switchdev + - + name: switchdev-inactive - type: enum name: eswitch-inline-mode diff --git a/Documentation/networking/devlink/devlink-eswitch-attr.rst b/Documentation/networking/devlink/devlink-eswitch-attr.rst index 08bb39ab15286..eafe09abc40c2 100644 --- a/Documentation/networking/devlink/devlink-eswitch-attr.rst +++ b/Documentation/networking/devlink/devlink-eswitch-attr.rst @@ -39,6 +39,10 @@ The following is a list of E-Switch attributes. rules. * ``switchdev`` allows for more advanced offloading capabilities of the E-Switch to hardware. + * ``switchdev_inactive`` switchdev mode but starts inactive, doesn't allow traffic + until explicitly activated. This mode is useful for orchestrators that + want to prepare the device in switchdev mode but only activate it when + all configurations are done. * - ``inline-mode`` - enum - Some HWs need the VF driver to put part of the packet @@ -74,3 +78,12 @@ Example Usage # enable encap-mode with legacy mode $ devlink dev eswitch set pci/0000:08:00.0 mode legacy inline-mode none encap-mode basic + + # start switchdev mode in inactive state + $ devlink dev eswitch set pci/0000:08:00.0 mode switchdev_inactive + + # setup switchdev configurations, representors, FDB entries, etc.. + ... + + # activate switchdev mode to allow traffic + $ devlink dev eswitch set pci/0000:08:00.0 mode switchdev diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index bcad11a787a55..157f11d3fb72f 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -181,6 +181,7 @@ enum devlink_sb_threshold_type { enum devlink_eswitch_mode { DEVLINK_ESWITCH_MODE_LEGACY, DEVLINK_ESWITCH_MODE_SWITCHDEV, + DEVLINK_ESWITCH_MODE_SWITCHDEV_INACTIVE, }; enum devlink_eswitch_inline_mode { diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index 9fd00977d59e3..5ad435aee29de 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -229,7 +229,7 @@ static const struct nla_policy devlink_eswitch_get_nl_policy[DEVLINK_ATTR_DEV_NA static const struct nla_policy devlink_eswitch_set_nl_policy[DEVLINK_ATTR_ESWITCH_ENCAP_MODE + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, - [DEVLINK_ATTR_ESWITCH_MODE] = NLA_POLICY_MAX(NLA_U16, 1), + [DEVLINK_ATTR_ESWITCH_MODE] = NLA_POLICY_MAX(NLA_U16, 2), [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = NLA_POLICY_MAX(NLA_U8, 3), [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = NLA_POLICY_MAX(NLA_U8, 1), }; From 9902b6381d76ccd2e08e2703390e8c8a3bcda482 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 7 Nov 2025 23:04:03 -0800 Subject: [PATCH 750/867] net/mlx5: MPFS, add support for dynamic enable/disable MPFS (Multi PF Switch) is enabled by default in Multi-Host environments, the driver keeps a list of desired unicast mac addresses of all vports (vfs/Sfs) and applied to HW via L2_table FW command. Add API to dynamically apply the list of MACs to HW when needed for next patches, to utilize this new API in devlink eswitch active/in-active uAPI. Signed-off-by: Saeed Mahameed Signed-off-by: Adithya Jayachandran Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20251108070404.1551708-3-saeed@kernel.org Signed-off-by: Paolo Abeni --- .../ethernet/mellanox/mlx5/core/lib/mpfs.c | 116 +++++++++++++++--- .../ethernet/mellanox/mlx5/core/lib/mpfs.h | 9 ++ 2 files changed, 108 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c index 4450091e181a1..99fb7a53add08 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -65,13 +65,14 @@ static int del_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index) /* UC L2 table hash node */ struct l2table_node { struct l2addr_node node; - u32 index; /* index in HW l2 table */ + int index; /* index in HW l2 table */ int ref_count; }; struct mlx5_mpfs { struct hlist_head hash[MLX5_L2_ADDR_HASH_SIZE]; struct mutex lock; /* Synchronize l2 table access */ + bool enabled; u32 size; unsigned long *bitmap; }; @@ -114,6 +115,8 @@ int mlx5_mpfs_init(struct mlx5_core_dev *dev) return -ENOMEM; } + mpfs->enabled = true; + dev->priv.mpfs = mpfs; return 0; } @@ -135,7 +138,7 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) struct mlx5_mpfs *mpfs = dev->priv.mpfs; struct l2table_node *l2addr; int err = 0; - u32 index; + int index; if (!mpfs) return 0; @@ -148,30 +151,34 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) goto out; } - err = alloc_l2table_index(mpfs, &index); - if (err) - goto out; - l2addr = l2addr_hash_add(mpfs->hash, mac, struct l2table_node, GFP_KERNEL); if (!l2addr) { err = -ENOMEM; - goto hash_add_err; + goto out; } - err = set_l2table_entry_cmd(dev, index, mac); - if (err) - goto set_table_entry_err; + index = -1; + + if (mpfs->enabled) { + err = alloc_l2table_index(mpfs, &index); + if (err) + goto hash_del; + err = set_l2table_entry_cmd(dev, index, mac); + if (err) + goto free_l2table_index; + mlx5_core_dbg(dev, "MPFS entry %pM, set @index (%d)\n", + l2addr->node.addr, l2addr->index); + } l2addr->index = index; l2addr->ref_count = 1; mlx5_core_dbg(dev, "MPFS mac added %pM, index (%d)\n", mac, index); goto out; - -set_table_entry_err: - l2addr_hash_del(l2addr); -hash_add_err: +free_l2table_index: free_l2table_index(mpfs, index); +hash_del: + l2addr_hash_del(l2addr); out: mutex_unlock(&mpfs->lock); return err; @@ -183,7 +190,7 @@ int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) struct mlx5_mpfs *mpfs = dev->priv.mpfs; struct l2table_node *l2addr; int err = 0; - u32 index; + int index; if (!mpfs) return 0; @@ -200,12 +207,87 @@ int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) goto unlock; index = l2addr->index; - del_l2table_entry_cmd(dev, index); + if (index >= 0) { + del_l2table_entry_cmd(dev, index); + free_l2table_index(mpfs, index); + mlx5_core_dbg(dev, "MPFS entry %pM, deleted @index (%d)\n", + mac, index); + } l2addr_hash_del(l2addr); - free_l2table_index(mpfs, index); mlx5_core_dbg(dev, "MPFS mac deleted %pM, index (%d)\n", mac, index); unlock: mutex_unlock(&mpfs->lock); return err; } EXPORT_SYMBOL(mlx5_mpfs_del_mac); + +int mlx5_mpfs_enable(struct mlx5_core_dev *dev) +{ + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + struct l2table_node *l2addr; + struct hlist_node *n; + int err = 0, i; + + if (!mpfs) + return -ENODEV; + + mutex_lock(&mpfs->lock); + if (mpfs->enabled) + goto out; + mpfs->enabled = true; + mlx5_core_dbg(dev, "MPFS enabling mpfs\n"); + + mlx5_mpfs_foreach(l2addr, n, mpfs, i) { + u32 index; + + err = alloc_l2table_index(mpfs, &index); + if (err) { + mlx5_core_err(dev, "Failed to allocated MPFS index for %pM, err(%d)\n", + l2addr->node.addr, err); + goto out; + } + + err = set_l2table_entry_cmd(dev, index, l2addr->node.addr); + if (err) { + mlx5_core_err(dev, "Failed to set MPFS l2table entry for %pM index=%d, err(%d)\n", + l2addr->node.addr, index, err); + free_l2table_index(mpfs, index); + goto out; + } + + l2addr->index = index; + mlx5_core_dbg(dev, "MPFS entry %pM, set @index (%d)\n", + l2addr->node.addr, l2addr->index); + } +out: + mutex_unlock(&mpfs->lock); + return err; +} + +void mlx5_mpfs_disable(struct mlx5_core_dev *dev) +{ + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + struct l2table_node *l2addr; + struct hlist_node *n; + int i; + + if (!mpfs) + return; + + mutex_lock(&mpfs->lock); + if (!mpfs->enabled) + goto unlock; + mlx5_mpfs_foreach(l2addr, n, mpfs, i) { + if (l2addr->index < 0) + continue; + del_l2table_entry_cmd(dev, l2addr->index); + free_l2table_index(mpfs, l2addr->index); + mlx5_core_dbg(dev, "MPFS entry %pM, deleted @index (%d)\n", + l2addr->node.addr, l2addr->index); + l2addr->index = -1; + } + mpfs->enabled = false; + mlx5_core_dbg(dev, "MPFS disabled\n"); +unlock: + mutex_unlock(&mpfs->lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h index 4a293542a7aa1..9c63838ce1f32 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h @@ -45,6 +45,10 @@ struct l2addr_node { u8 addr[ETH_ALEN]; }; +#define mlx5_mpfs_foreach(hs, tmp, mpfs, i) \ + for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \ + hlist_for_each_entry_safe(hs, tmp, &(mpfs)->hash[i], node.hlist) + #define for_each_l2hash_node(hn, tmp, hash, i) \ for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \ hlist_for_each_entry_safe(hn, tmp, &(hash)[i], hlist) @@ -82,11 +86,16 @@ struct l2addr_node { }) #ifdef CONFIG_MLX5_MPFS +struct mlx5_core_dev; int mlx5_mpfs_init(struct mlx5_core_dev *dev); void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_mpfs_enable(struct mlx5_core_dev *dev); +void mlx5_mpfs_disable(struct mlx5_core_dev *dev); #else /* #ifndef CONFIG_MLX5_MPFS */ static inline int mlx5_mpfs_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) {} +static inline int mlx5_mpfs_enable(struct mlx5_core_dev *dev) { return 0; } +static inline void mlx5_mpfs_disable(struct mlx5_core_dev *dev) {} #endif #endif From 9da611df15aa8d519f9947b88a5c733267cba888 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 7 Nov 2025 23:04:04 -0800 Subject: [PATCH 751/867] net/mlx5: E-Switch, support eswitch inactive mode Add support for eswitch switchdev inactive mode Inactive mode: Drop all traffic going to FDB, Remove mpfs l2 rules and disconnect adjacent vports. Active mode: Traffic flows through FDB, mpfs table populated, and adjacent vports are connected. Signed-off-by: Saeed Mahameed Signed-off-by: Adithya Jayachandran Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20251108070404.1551708-4-saeed@kernel.org Signed-off-by: Paolo Abeni --- .../mellanox/mlx5/core/esw/adj_vport.c | 15 +- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 6 + .../mellanox/mlx5/core/eswitch_offloads.c | 207 +++++++++++++++++- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 5 + .../ethernet/mellanox/mlx5/core/lib/mpfs.c | 2 +- include/linux/mlx5/fs.h | 1 + 6 files changed, 214 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/adj_vport.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/adj_vport.c index 0091ba697baec..250af09b5af23 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/adj_vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/adj_vport.c @@ -4,13 +4,8 @@ #include "fs_core.h" #include "eswitch.h" -enum { - MLX5_ADJ_VPORT_DISCONNECT = 0x0, - MLX5_ADJ_VPORT_CONNECT = 0x1, -}; - -static int mlx5_esw_adj_vport_modify(struct mlx5_core_dev *dev, - u16 vport, bool connect) +int mlx5_esw_adj_vport_modify(struct mlx5_core_dev *dev, u16 vport, + bool connect) { u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {}; @@ -24,7 +19,7 @@ static int mlx5_esw_adj_vport_modify(struct mlx5_core_dev *dev, MLX5_SET(modify_vport_state_in, in, egress_connect_valid, 1); MLX5_SET(modify_vport_state_in, in, ingress_connect, connect); MLX5_SET(modify_vport_state_in, in, egress_connect, connect); - + MLX5_SET(modify_vport_state_in, in, admin_state, connect); return mlx5_cmd_exec_in(dev, modify_vport_state, in); } @@ -96,7 +91,6 @@ static int mlx5_esw_adj_vport_create(struct mlx5_eswitch *esw, u16 vhca_id, if (err) goto acl_ns_remove; - mlx5_esw_adj_vport_modify(esw->dev, vport_num, MLX5_ADJ_VPORT_CONNECT); return 0; acl_ns_remove: @@ -117,8 +111,7 @@ static void mlx5_esw_adj_vport_destroy(struct mlx5_eswitch *esw, esw_debug(esw->dev, "Destroying adjacent vport %d for vhca_id 0x%x\n", vport_num, vport->vhca_id); - mlx5_esw_adj_vport_modify(esw->dev, vport_num, - MLX5_ADJ_VPORT_DISCONNECT); + mlx5_esw_offloads_rep_remove(esw, vport); mlx5_fs_vport_egress_acl_ns_remove(esw->dev->priv.steering, vport->index); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 16eb99aba2a7e..beaec450a7343 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -264,6 +264,9 @@ struct mlx5_eswitch_fdb { struct offloads_fdb { struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *drop_root; + struct mlx5_flow_handle *drop_root_rule; + struct mlx5_fc *drop_root_fc; struct mlx5_flow_table *tc_miss_table; struct mlx5_flow_table *slow_fdb; struct mlx5_flow_group *send_to_vport_grp; @@ -392,6 +395,7 @@ struct mlx5_eswitch { struct mlx5_esw_offload offloads; u32 last_vport_idx; int mode; + bool offloads_inactive; u16 manager_vport; u16 first_host_vport; u8 num_peers; @@ -634,6 +638,8 @@ const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev); void mlx5_esw_adjacent_vhcas_setup(struct mlx5_eswitch *esw); void mlx5_esw_adjacent_vhcas_cleanup(struct mlx5_eswitch *esw); +int mlx5_esw_adj_vport_modify(struct mlx5_core_dev *dev, u16 vport, + bool connect); #define MLX5_DEBUG_ESWITCH_MASK BIT(3) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 4092ea29c6308..0b1a180ef238e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1577,6 +1577,7 @@ esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb) attr.max_grp_num = esw->params.large_group_num; attr.default_ft = miss_fdb; attr.mapping = esw->offloads.reg_c0_obj_pool; + attr.fs_base_prio = FDB_BYPASS_PATH; chains = mlx5_chains_create(dev, &attr); if (IS_ERR(chains)) { @@ -2355,6 +2356,131 @@ static void esw_mode_change(struct mlx5_eswitch *esw, u16 mode) mlx5_devcom_comp_unlock(esw->dev->priv.hca_devcom_comp); } +static void mlx5_esw_fdb_drop_destroy(struct mlx5_eswitch *esw) +{ + if (!esw->fdb_table.offloads.drop_root) + return; + + esw_debug(esw->dev, "Destroying FDB drop root table %#x fc %#x\n", + esw->fdb_table.offloads.drop_root->id, + esw->fdb_table.offloads.drop_root_fc->id); + mlx5_del_flow_rules(esw->fdb_table.offloads.drop_root_rule); + /* Don't free flow counter here, can be reused on a later activation */ + mlx5_destroy_flow_table(esw->fdb_table.offloads.drop_root); + esw->fdb_table.offloads.drop_root_rule = NULL; + esw->fdb_table.offloads.drop_root = NULL; +} + +static int mlx5_esw_fdb_drop_create(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_destination drop_fc_dst = {}; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_destination *dst = NULL; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_table *table; + int err = 0, dst_num = 0; + + if (esw->fdb_table.offloads.drop_root) + return 0; + + root_ns = esw->fdb_table.offloads.ns; + + ft_attr.prio = FDB_DROP_ROOT; + ft_attr.max_fte = 1; + ft_attr.autogroup.max_num_groups = 1; + table = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); + if (IS_ERR(table)) { + esw_warn(dev, "Failed to create fdb drop root table, err %pe\n", + table); + return PTR_ERR(table); + } + + /* Drop FC reusable, create once on first deactivation of FDB */ + if (!esw->fdb_table.offloads.drop_root_fc) { + struct mlx5_fc *counter = mlx5_fc_create(dev, 0); + + err = PTR_ERR_OR_ZERO(counter); + if (err) + esw_warn(esw->dev, "create fdb drop fc err %d\n", err); + else + esw->fdb_table.offloads.drop_root_fc = counter; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + + if (esw->fdb_table.offloads.drop_root_fc) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + drop_fc_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + drop_fc_dst.counter = esw->fdb_table.offloads.drop_root_fc; + dst = &drop_fc_dst; + dst_num++; + } + + flow_rule = mlx5_add_flow_rules(table, NULL, &flow_act, dst, dst_num); + err = PTR_ERR_OR_ZERO(flow_rule); + if (err) { + esw_warn(esw->dev, + "fs offloads: Failed to add vport rx drop rule err %d\n", + err); + goto err_flow_rule; + } + + esw->fdb_table.offloads.drop_root = table; + esw->fdb_table.offloads.drop_root_rule = flow_rule; + esw_debug(esw->dev, "Created FDB drop root table %#x fc %#x\n", + table->id, dst ? dst->counter->id : 0); + return 0; + +err_flow_rule: + /* no need to free drop fc, esw_offloads_steering_cleanup will do it */ + mlx5_destroy_flow_table(table); + return err; +} + +static void mlx5_esw_fdb_active(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + + mlx5_esw_fdb_drop_destroy(esw); + mlx5_mpfs_enable(esw->dev); + + mlx5_esw_for_each_vf_vport(esw, i, vport, U16_MAX) { + if (!vport->adjacent) + continue; + esw_debug(esw->dev, "Connecting vport %d to eswitch\n", + vport->vport); + mlx5_esw_adj_vport_modify(esw->dev, vport->vport, true); + } + + esw->offloads_inactive = false; + esw_warn(esw->dev, "MPFS/FDB active\n"); +} + +static void mlx5_esw_fdb_inactive(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + + mlx5_mpfs_disable(esw->dev); + mlx5_esw_fdb_drop_create(esw); + + mlx5_esw_for_each_vf_vport(esw, i, vport, U16_MAX) { + if (!vport->adjacent) + continue; + esw_debug(esw->dev, "Disconnecting vport %u from eswitch\n", + vport->vport); + + mlx5_esw_adj_vport_modify(esw->dev, vport->vport, false); + } + + esw->offloads_inactive = true; + esw_warn(esw->dev, "MPFS/FDB inactive\n"); +} + static int esw_offloads_start(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { @@ -3438,6 +3564,10 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw) static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) { + mlx5_esw_fdb_drop_destroy(esw); + if (esw->fdb_table.offloads.drop_root_fc) + mlx5_fc_destroy(esw->dev, esw->fdb_table.offloads.drop_root_fc); + esw->fdb_table.offloads.drop_root_fc = NULL; esw_destroy_vport_rx_drop_rule(esw); esw_destroy_vport_rx_drop_group(esw); esw_destroy_vport_rx_group(esw); @@ -3600,6 +3730,11 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) if (err) goto err_steering_init; + if (esw->offloads_inactive) + mlx5_esw_fdb_inactive(esw); + else + mlx5_esw_fdb_active(esw); + /* Representor will control the vport link state */ mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN; @@ -3666,6 +3801,9 @@ void esw_offloads_disable(struct mlx5_eswitch *esw) esw_offloads_metadata_uninit(esw); mlx5_rdma_disable_roce(esw->dev); mlx5_esw_adjacent_vhcas_cleanup(esw); + /* must be done after vhcas cleanup to avoid adjacent vports connect */ + if (esw->offloads_inactive) + mlx5_esw_fdb_active(esw); /* legacy mode always active */ mutex_destroy(&esw->offloads.termtbl_mutex); } @@ -3676,6 +3814,7 @@ static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) *mlx5_mode = MLX5_ESWITCH_LEGACY; break; case DEVLINK_ESWITCH_MODE_SWITCHDEV: + case DEVLINK_ESWITCH_MODE_SWITCHDEV_INACTIVE: *mlx5_mode = MLX5_ESWITCH_OFFLOADS; break; default: @@ -3685,14 +3824,17 @@ static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) return 0; } -static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode) +static int esw_mode_to_devlink(struct mlx5_eswitch *esw, u16 *mode) { - switch (mlx5_mode) { + switch (esw->mode) { case MLX5_ESWITCH_LEGACY: *mode = DEVLINK_ESWITCH_MODE_LEGACY; break; case MLX5_ESWITCH_OFFLOADS: - *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV; + if (esw->offloads_inactive) + *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV_INACTIVE; + else + *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV; break; default: return -EINVAL; @@ -3798,6 +3940,45 @@ static bool mlx5_devlink_netdev_netns_immutable_set(struct devlink *devlink, return ret; } +/* Returns true when only changing between active and inactive switchdev mode */ +static bool mlx5_devlink_switchdev_active_mode_change(struct mlx5_eswitch *esw, + u16 devlink_mode) +{ + /* current mode is not switchdev */ + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return false; + + /* new mode is not switchdev */ + if (devlink_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV && + devlink_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV_INACTIVE) + return false; + + /* already inactive: no change in current state */ + if (devlink_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV_INACTIVE && + esw->offloads_inactive) + return false; + + /* already active: no change in current state */ + if (devlink_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && + !esw->offloads_inactive) + return false; + + down_write(&esw->mode_lock); + esw->offloads_inactive = !esw->offloads_inactive; + esw->eswitch_operation_in_progress = true; + up_write(&esw->mode_lock); + + if (esw->offloads_inactive) + mlx5_esw_fdb_inactive(esw); + else + mlx5_esw_fdb_active(esw); + + down_write(&esw->mode_lock); + esw->eswitch_operation_in_progress = false; + up_write(&esw->mode_lock); + return true; +} + int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, struct netlink_ext_ack *extack) { @@ -3812,12 +3993,16 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL; - if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && mlx5_get_sd(esw->dev)) { + if (mlx5_mode == MLX5_ESWITCH_OFFLOADS && mlx5_get_sd(esw->dev)) { NL_SET_ERR_MSG_MOD(extack, "Can't change E-Switch mode to switchdev when multi-PF netdev (Socket Direct) is configured."); return -EPERM; } + /* Avoid try_lock, active/inactive mode change is not restricted */ + if (mlx5_devlink_switchdev_active_mode_change(esw, mode)) + return 0; + mlx5_lag_disable_change(esw->dev); err = mlx5_esw_try_lock(esw); if (err < 0) { @@ -3840,7 +4025,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, esw->eswitch_operation_in_progress = true; up_write(&esw->mode_lock); - if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && + if (mlx5_mode == MLX5_ESWITCH_OFFLOADS && !mlx5_devlink_netdev_netns_immutable_set(devlink, true)) { NL_SET_ERR_MSG_MOD(extack, "Can't change E-Switch mode to switchdev when netdev net namespace has diverged from the devlink's."); @@ -3848,25 +4033,27 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, goto skip; } - if (mode == DEVLINK_ESWITCH_MODE_LEGACY) + if (mlx5_mode == MLX5_ESWITCH_LEGACY) esw->dev->priv.flags |= MLX5_PRIV_FLAGS_SWITCH_LEGACY; mlx5_eswitch_disable_locked(esw); - if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) { + if (mlx5_mode == MLX5_ESWITCH_OFFLOADS) { if (mlx5_devlink_trap_get_num_active(esw->dev)) { NL_SET_ERR_MSG_MOD(extack, "Can't change mode while devlink traps are active"); err = -EOPNOTSUPP; goto skip; } + esw->offloads_inactive = + (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV_INACTIVE); err = esw_offloads_start(esw, extack); - } else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) { + } else if (mlx5_mode == MLX5_ESWITCH_LEGACY) { err = esw_offloads_stop(esw, extack); } else { err = -EINVAL; } skip: - if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && err) + if (mlx5_mode == MLX5_ESWITCH_OFFLOADS && err) mlx5_devlink_netdev_netns_immutable_set(devlink, false); down_write(&esw->mode_lock); esw->eswitch_operation_in_progress = false; @@ -3885,7 +4072,7 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) if (IS_ERR(esw)) return PTR_ERR(esw); - return esw_mode_to_devlink(esw->mode, mode); + return esw_mode_to_devlink(esw, mode); } static int mlx5_esw_vports_inline_set(struct mlx5_eswitch *esw, u8 mlx5_mode, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 2db3ffb0a2b28..2ca3bddbdf050 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -3520,6 +3520,11 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering) if (!steering->fdb_root_ns) return -ENOMEM; + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_DROP_ROOT, 1); + err = PTR_ERR_OR_ZERO(maj_prio); + if (err) + goto out_err; + err = create_fdb_bypass(steering); if (err) goto out_err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c index 99fb7a53add08..4a88a42ae4f7a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -167,7 +167,7 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) if (err) goto free_l2table_index; mlx5_core_dbg(dev, "MPFS entry %pM, set @index (%d)\n", - l2addr->node.addr, l2addr->index); + l2addr->node.addr, index); } l2addr->index = index; diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 6ac76a0c38277..7bf2449c53b20 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -116,6 +116,7 @@ enum mlx5_flow_namespace_type { }; enum { + FDB_DROP_ROOT, FDB_BYPASS_PATH, FDB_CRYPTO_INGRESS, FDB_TC_OFFLOAD, From 60e6489f8e3b086bd1130ad4450a2c112e863791 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sun, 9 Nov 2025 02:52:22 +0000 Subject: [PATCH 752/867] af_unix: Initialise scc_index in unix_add_edge(). Quang Le reported that the AF_UNIX GC could garbage-collect a receive queue of an alive in-flight socket, with a nice repro. The repro consists of three stages. 1) 1-a. Create a single cyclic reference with many sockets 1-b. close() all sockets 1-c. Trigger GC 2) 2-a. Pass sk-A to an embryo sk-B 2-b. Pass sk-X to sk-X 2-c. Trigger GC 3) 3-a. accept() the embryo sk-B 3-b. Pass sk-B to sk-C 3-c. close() the in-flight sk-A 3-d. Trigger GC As of 2-c, sk-A and sk-X are linked to unix_unvisited_vertices, and unix_walk_scc() groups them into two different SCCs: unix_sk(sk-A)->vertex->scc_index = 2 (UNIX_VERTEX_INDEX_START) unix_sk(sk-X)->vertex->scc_index = 3 Once GC completes, unix_graph_grouped is set to true. Also, unix_graph_maybe_cyclic is set to true due to sk-X's cyclic self-reference, which makes close() trigger GC. At 3-b, unix_add_edge() allocates unix_sk(sk-B)->vertex and links it to unix_unvisited_vertices. unix_update_graph() is called at 3-a. and 3-b., but neither unix_graph_grouped nor unix_graph_maybe_cyclic is changed because both sk-B's listener and sk-C are not in-flight. 3-c decrements sk-A's file refcnt to 1. Since unix_graph_grouped is true at 3-d, unix_walk_scc_fast() is finally called and iterates 3 sockets sk-A, sk-B, and sk-X: sk-A -> sk-B (-> sk-C) sk-X -> sk-X This is totally fine. All of them are not yet close()d and should be grouped into different SCCs. However, unix_vertex_dead() misjudges that sk-A and sk-B are in the same SCC and sk-A is dead. unix_sk(sk-A)->scc_index == unix_sk(sk-B)->scc_index <-- Wrong! && sk-A's file refcnt == unix_sk(sk-A)->vertex->out_degree ^-- 1 in-flight count for sk-B -> sk-A is dead !? The problem is that unix_add_edge() does not initialise scc_index. Stage 1) is used for heap spraying, making a newly allocated vertex have vertex->scc_index == 2 (UNIX_VERTEX_INDEX_START) set by unix_walk_scc() at 1-c. Let's track the max SCC index from the previous unix_walk_scc() call and assign the max + 1 to a new vertex's scc_index. This way, we can continue to avoid Tarjan's algorithm while preventing misjudgments. Fixes: ad081928a8b0 ("af_unix: Avoid Tarjan's algorithm if unnecessary.") Reported-by: Quang Le Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20251109025233.3659187-1-kuniyu@google.com Signed-off-by: Paolo Abeni --- net/unix/garbage.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 684ab03137b6c..65396a4e1b07e 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -145,6 +145,7 @@ enum unix_vertex_index { }; static unsigned long unix_vertex_unvisited_index = UNIX_VERTEX_INDEX_MARK1; +static unsigned long unix_vertex_max_scc_index = UNIX_VERTEX_INDEX_START; static void unix_add_edge(struct scm_fp_list *fpl, struct unix_edge *edge) { @@ -153,6 +154,7 @@ static void unix_add_edge(struct scm_fp_list *fpl, struct unix_edge *edge) if (!vertex) { vertex = list_first_entry(&fpl->vertices, typeof(*vertex), entry); vertex->index = unix_vertex_unvisited_index; + vertex->scc_index = ++unix_vertex_max_scc_index; vertex->out_degree = 0; INIT_LIST_HEAD(&vertex->edges); INIT_LIST_HEAD(&vertex->scc_entry); @@ -489,10 +491,15 @@ static void __unix_walk_scc(struct unix_vertex *vertex, unsigned long *last_inde scc_dead = unix_vertex_dead(v); } - if (scc_dead) + if (scc_dead) { unix_collect_skb(&scc, hitlist); - else if (!unix_graph_maybe_cyclic) - unix_graph_maybe_cyclic = unix_scc_cyclic(&scc); + } else { + if (unix_vertex_max_scc_index < vertex->scc_index) + unix_vertex_max_scc_index = vertex->scc_index; + + if (!unix_graph_maybe_cyclic) + unix_graph_maybe_cyclic = unix_scc_cyclic(&scc); + } list_del(&scc); } @@ -507,6 +514,7 @@ static void unix_walk_scc(struct sk_buff_head *hitlist) unsigned long last_index = UNIX_VERTEX_INDEX_START; unix_graph_maybe_cyclic = false; + unix_vertex_max_scc_index = UNIX_VERTEX_INDEX_START; /* Visit every vertex exactly once. * __unix_walk_scc() moves visited vertices to unix_visited_vertices. From 62b656e43eaeae445a39cd8021a4f47065af4389 Mon Sep 17 00:00:00 2001 From: Ranganath V N Date: Sun, 9 Nov 2025 14:43:35 +0530 Subject: [PATCH 753/867] net: sched: act_connmark: initialize struct tc_ife to fix kernel leak In tcf_connmark_dump(), the variable 'opt' was partially initialized using a designatied initializer. While the padding bytes are reamined uninitialized. nla_put() copies the entire structure into a netlink message, these uninitialized bytes leaked to userspace. Initialize the structure with memset before assigning its fields to ensure all members and padding are cleared prior to beign copied. Reported-by: syzbot+0c85cae3350b7d486aee@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=0c85cae3350b7d486aee Tested-by: syzbot+0c85cae3350b7d486aee@syzkaller.appspotmail.com Fixes: 22a5dc0e5e3e ("net: sched: Introduce connmark action") Signed-off-by: Ranganath V N Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251109091336.9277-2-vnranganath.20@gmail.com Acked-by: Cong Wang Signed-off-by: Paolo Abeni --- net/sched/act_connmark.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 3e89927d71164..26ba8c2d20abf 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -195,13 +195,15 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a, const struct tcf_connmark_info *ci = to_connmark(a); unsigned char *b = skb_tail_pointer(skb); const struct tcf_connmark_parms *parms; - struct tc_connmark opt = { - .index = ci->tcf_index, - .refcnt = refcount_read(&ci->tcf_refcnt) - ref, - .bindcnt = atomic_read(&ci->tcf_bindcnt) - bind, - }; + struct tc_connmark opt; struct tcf_t t; + memset(&opt, 0, sizeof(opt)); + + opt.index = ci->tcf_index; + opt.refcnt = refcount_read(&ci->tcf_refcnt) - ref; + opt.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind; + rcu_read_lock(); parms = rcu_dereference(ci->parms); From ce50039be49eea9b4cd8873ca6eccded1b4a130a Mon Sep 17 00:00:00 2001 From: Ranganath V N Date: Sun, 9 Nov 2025 14:43:36 +0530 Subject: [PATCH 754/867] net: sched: act_ife: initialize struct tc_ife to fix KMSAN kernel-infoleak Fix a KMSAN kernel-infoleak detected by the syzbot . [net?] KMSAN: kernel-infoleak in __skb_datagram_iter In tcf_ife_dump(), the variable 'opt' was partially initialized using a designatied initializer. While the padding bytes are reamined uninitialized. nla_put() copies the entire structure into a netlink message, these uninitialized bytes leaked to userspace. Initialize the structure with memset before assigning its fields to ensure all members and padding are cleared prior to beign copied. This change silences the KMSAN report and prevents potential information leaks from the kernel memory. This fix has been tested and validated by syzbot. This patch closes the bug reported at the following syzkaller link and ensures no infoleak. Reported-by: syzbot+0c85cae3350b7d486aee@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=0c85cae3350b7d486aee Tested-by: syzbot+0c85cae3350b7d486aee@syzkaller.appspotmail.com Fixes: ef6980b6becb ("introduce IFE action") Signed-off-by: Ranganath V N Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251109091336.9277-3-vnranganath.20@gmail.com Acked-by: Cong Wang Signed-off-by: Paolo Abeni --- net/sched/act_ife.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 107c6d83dc5c4..7c6975632fc2e 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -644,13 +644,15 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, unsigned char *b = skb_tail_pointer(skb); struct tcf_ife_info *ife = to_ife(a); struct tcf_ife_params *p; - struct tc_ife opt = { - .index = ife->tcf_index, - .refcnt = refcount_read(&ife->tcf_refcnt) - ref, - .bindcnt = atomic_read(&ife->tcf_bindcnt) - bind, - }; + struct tc_ife opt; struct tcf_t t; + memset(&opt, 0, sizeof(opt)); + + opt.index = ife->tcf_index, + opt.refcnt = refcount_read(&ife->tcf_refcnt) - ref, + opt.bindcnt = atomic_read(&ife->tcf_bindcnt) - bind, + spin_lock_bh(&ife->tcf_lock); opt.action = ife->tcf_action; p = rcu_dereference_protected(ife->params, From 0bcd5b3b50cc1fcbf775479322cc37c15d35a489 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Sun, 9 Nov 2025 11:37:49 +0200 Subject: [PATCH 755/867] net/mlx5e: Fix missing error assignment in mlx5e_xfrm_add_state() Assign the return value of mlx5_eswitch_block_mode() to 'err' before checking it to avoid returning an uninitialized error code. Fixes: 22239eb258bc ("net/mlx5e: Prevent tunnel reformat when tunnel mode not allowed") Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202510271649.uwsIxD6O-lkp@intel.com/ Closes: http://lore.kernel.org/linux-rdma/aPIEK4rLB586FdDt@stanley.mountain/ Signed-off-by: Carolina Jubran Reviewed-by: Jianbo Liu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1762681073-1084058-2-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 0a4fb8c922684..35d9530037a65 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -804,7 +804,8 @@ static int mlx5e_xfrm_add_state(struct net_device *dev, goto err_xfrm; } - if (mlx5_eswitch_block_mode(priv->mdev)) + err = mlx5_eswitch_block_mode(priv->mdev); + if (err) goto unblock_ipsec; if (x->props.mode == XFRM_MODE_TUNNEL && From 2dc768c05217e667f987907a3404926e7ba89ff3 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Sun, 9 Nov 2025 11:37:50 +0200 Subject: [PATCH 756/867] net/mlx5e: Trim the length of the num_doorbell error When trying to set num_doorbells to a value greater than the max number of channels, the error message was going over the netlink limit of 80 chars, truncating the most important part of the message, the number of channels. Fix that by trimming the length a bit. Fixes: 11bbcfb7668c ("net/mlx5e: Use the 'num_doorbells' devlink param") Signed-off-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1762681073-1084058-3-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index fceea83abbd76..887adf4807d16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -541,7 +541,7 @@ static int mlx5_devlink_num_doorbells_validate(struct devlink *devlink, u32 id, max_num_channels = mlx5e_get_max_num_channels(mdev); if (val32 > max_num_channels) { NL_SET_ERR_MSG_FMT_MOD(extack, - "Requested num_doorbells (%u) exceeds maximum number of channels (%u)", + "Requested num_doorbells (%u) exceeds max number of channels (%u)", val32, max_num_channels); return -EINVAL; } From a7bf4d5063c7837096aab2853224eb23628514d9 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 9 Nov 2025 11:37:51 +0200 Subject: [PATCH 757/867] net/mlx5e: Fix maxrate wraparound in threshold between units The previous calculation used roundup() which caused an overflow for rates between 25.5Gbps and 26Gbps. For example, a rate of 25.6Gbps would result in using 100Mbps units with value of 256, which would overflow the 8 bits field. Simplify the upper_limit_mbps calculation by removing the unnecessary roundup, and adjust the comparison to use <= to correctly handle the boundary condition. Fixes: d8880795dabf ("net/mlx5e: Implement DCBNL IEEE max rate") Signed-off-by: Gal Pressman Reviewed-by: Nimrod Oren Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1762681073-1084058-4-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index d166c0d5189e1..3456144710528 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -595,18 +595,19 @@ static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev, struct mlx5_core_dev *mdev = priv->mdev; u8 max_bw_value[IEEE_8021QAZ_MAX_TCS]; u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS]; - __u64 upper_limit_mbps = roundup(255 * MLX5E_100MB, MLX5E_1GB); + __u64 upper_limit_mbps; int i; memset(max_bw_value, 0, sizeof(max_bw_value)); memset(max_bw_unit, 0, sizeof(max_bw_unit)); + upper_limit_mbps = 255 * MLX5E_100MB; for (i = 0; i <= mlx5_max_tc(mdev); i++) { if (!maxrate->tc_maxrate[i]) { max_bw_unit[i] = MLX5_BW_NO_LIMIT; continue; } - if (maxrate->tc_maxrate[i] < upper_limit_mbps) { + if (maxrate->tc_maxrate[i] <= upper_limit_mbps) { max_bw_value[i] = div_u64(maxrate->tc_maxrate[i], MLX5E_100MB); max_bw_value[i] = max_bw_value[i] ? max_bw_value[i] : 1; From 43b27d1bd88a4bce34ec2437d103acfae9655f9e Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 9 Nov 2025 11:37:52 +0200 Subject: [PATCH 758/867] net/mlx5e: Fix wraparound in rate limiting for values above 255 Gbps Add validation to reject rates exceeding 255 Gbps that would overflow the 8 bits max bandwidth field. Fixes: d8880795dabf ("net/mlx5e: Implement DCBNL IEEE max rate") Signed-off-by: Gal Pressman Reviewed-by: Nimrod Oren Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1762681073-1084058-5-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 3456144710528..d88a48210fdcb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -596,11 +596,13 @@ static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev, u8 max_bw_value[IEEE_8021QAZ_MAX_TCS]; u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS]; __u64 upper_limit_mbps; + __u64 upper_limit_gbps; int i; memset(max_bw_value, 0, sizeof(max_bw_value)); memset(max_bw_unit, 0, sizeof(max_bw_unit)); upper_limit_mbps = 255 * MLX5E_100MB; + upper_limit_gbps = 255 * MLX5E_1GB; for (i = 0; i <= mlx5_max_tc(mdev); i++) { if (!maxrate->tc_maxrate[i]) { @@ -612,10 +614,16 @@ static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev, MLX5E_100MB); max_bw_value[i] = max_bw_value[i] ? max_bw_value[i] : 1; max_bw_unit[i] = MLX5_100_MBPS_UNIT; - } else { + } else if (max_bw_value[i] <= upper_limit_gbps) { max_bw_value[i] = div_u64(maxrate->tc_maxrate[i], MLX5E_1GB); max_bw_unit[i] = MLX5_GBPS_UNIT; + } else { + netdev_err(netdev, + "tc_%d maxrate %llu Kbps exceeds limit %llu\n", + i, maxrate->tc_maxrate[i], + upper_limit_gbps); + return -EINVAL; } } From 9fcc2b6c10523f7e75db6387946c86fcf19dc97e Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 9 Nov 2025 11:37:53 +0200 Subject: [PATCH 759/867] net/mlx5e: Fix potentially misleading debug message Change the debug message to print the correct units instead of always assuming Gbps, as the value can be in either 100 Mbps or 1 Gbps units. Fixes: 5da8bc3effb6 ("net/mlx5e: DCBNL, Add debug messages log") Signed-off-by: Gal Pressman Reviewed-by: Nimrod Oren Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1762681073-1084058-6-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- .../net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index d88a48210fdcb..9b93da4d52f64 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -598,6 +598,19 @@ static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev, __u64 upper_limit_mbps; __u64 upper_limit_gbps; int i; + struct { + int scale; + const char *units_str; + } units[] = { + [MLX5_100_MBPS_UNIT] = { + .scale = 100, + .units_str = "Mbps", + }, + [MLX5_GBPS_UNIT] = { + .scale = 1, + .units_str = "Gbps", + }, + }; memset(max_bw_value, 0, sizeof(max_bw_value)); memset(max_bw_unit, 0, sizeof(max_bw_unit)); @@ -628,8 +641,9 @@ static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev, } for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - netdev_dbg(netdev, "%s: tc_%d <=> max_bw %d Gbps\n", - __func__, i, max_bw_value[i]); + netdev_dbg(netdev, "%s: tc_%d <=> max_bw %u %s\n", __func__, i, + max_bw_value[i] * units[max_bw_unit[i]].scale, + units[max_bw_unit[i]].units_str); } return mlx5_modify_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit); From e5eba42f01340f73888dfe560be2806057c25913 Mon Sep 17 00:00:00 2001 From: Akiva Goldberger Date: Sun, 9 Nov 2025 11:49:03 +0200 Subject: [PATCH 760/867] mlx5: Fix default values in create CQ Currently, CQs without a completion function are assigned the mlx5_add_cq_to_tasklet function by default. This is problematic since only user CQs created through the mlx5_ib driver are intended to use this function. Additionally, all CQs that will use doorbells instead of polling for completions must call mlx5_cq_arm. However, the default CQ creation flow leaves a valid value in the CQ's arm_db field, allowing FW to send interrupts to polling-only CQs in certain corner cases. These two factors would allow a polling-only kernel CQ to be triggered by an EQ interrupt and call a completion function intended only for user CQs, causing a null pointer exception. Some areas in the driver have prevented this issue with one-off fixes but did not address the root cause. This patch fixes the described issue by adding defaults to the create CQ flow. It adds a default dummy completion function to protect against null pointer exceptions, and it sets an invalid command sequence number by default in kernel CQs to prevent the FW from sending an interrupt to the CQ until it is armed. User CQs are responsible for their own initialization values. Callers of mlx5_core_create_cq are responsible for changing the completion function and arming the CQ per their needs. Fixes: cdd04f4d4d71 ("net/mlx5: Add support to create SQ and CQ for ASO") Signed-off-by: Akiva Goldberger Reviewed-by: Moshe Shemesh Signed-off-by: Tariq Toukan Acked-by: Leon Romanovsky Link: https://patch.msgid.link/1762681743-1084694-1-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/infiniband/hw/mlx5/cq.c | 11 +++++--- drivers/net/ethernet/mellanox/mlx5/core/cq.c | 23 +++++++++++++-- .../net/ethernet/mellanox/mlx5/core/en_main.c | 1 - .../ethernet/mellanox/mlx5/core/fpga/conn.c | 15 +++++----- .../mellanox/mlx5/core/steering/hws/send.c | 7 ----- .../mellanox/mlx5/core/steering/sws/dr_send.c | 28 +++++-------------- drivers/vdpa/mlx5/net/mlx5_vnet.c | 6 ++-- include/linux/mlx5/cq.h | 1 + 8 files changed, 44 insertions(+), 48 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index a23b364e24ffe..651d76bca114d 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -1020,15 +1020,18 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN) MLX5_SET(cqc, cqc, oi, 1); + if (udata) { + cq->mcq.comp = mlx5_add_cq_to_tasklet; + cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp; + } else { + cq->mcq.comp = mlx5_ib_cq_comp; + } + err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen, out, sizeof(out)); if (err) goto err_cqb; mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn); - if (udata) - cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp; - else - cq->mcq.comp = mlx5_ib_cq_comp; cq->mcq.event = mlx5_ib_cq_event; INIT_LIST_HEAD(&cq->wc_list); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index e9f319a9bdd6b..60f7ab1d72e78 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -66,8 +66,8 @@ void mlx5_cq_tasklet_cb(struct tasklet_struct *t) tasklet_schedule(&ctx->task); } -static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, - struct mlx5_eqe *eqe) +void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, + struct mlx5_eqe *eqe) { unsigned long flags; struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv; @@ -95,7 +95,15 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, if (schedule_tasklet) tasklet_schedule(&tasklet_ctx->task); } +EXPORT_SYMBOL(mlx5_add_cq_to_tasklet); +static void mlx5_core_cq_dummy_cb(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe) +{ + mlx5_core_err(cq->eq->core.dev, + "CQ default completion callback, CQ #%u\n", cq->cqn); +} + +#define MLX5_CQ_INIT_CMD_SN cpu_to_be32(2 << 28) /* Callers must verify outbox status in case of err */ int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen, u32 *out, int outlen) @@ -121,10 +129,19 @@ int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, cq->arm_sn = 0; cq->eq = eq; cq->uid = MLX5_GET(create_cq_in, in, uid); + + /* Kernel CQs must set the arm_db address prior to calling + * this function, allowing for the proper value to be + * initialized. User CQs are responsible for their own + * initialization since they do not use the arm_db field. + */ + if (cq->arm_db) + *cq->arm_db = MLX5_CQ_INIT_CMD_SN; + refcount_set(&cq->refcount, 1); init_completion(&cq->free); if (!cq->comp) - cq->comp = mlx5_add_cq_to_tasklet; + cq->comp = mlx5_core_cq_dummy_cb; /* assuming CQ will be deleted before the EQ */ cq->tasklet_ctx.priv = &eq->tasklet_ctx; INIT_LIST_HEAD(&cq->tasklet_ctx.list); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6023bbbf3f39c..5e17eae81f4b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2219,7 +2219,6 @@ static int mlx5e_alloc_cq_common(struct mlx5_core_dev *mdev, mcq->set_ci_db = cq->wq_ctrl.db.db; mcq->arm_db = cq->wq_ctrl.db.db + 1; *mcq->set_ci_db = 0; - *mcq->arm_db = 0; mcq->vector = param->eq_ix; mcq->comp = mlx5e_completion_event; mcq->event = mlx5e_cq_error_event; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index cb1319974f83f..ccef64fb40b66 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -421,6 +421,13 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) __be64 *pas; u32 i; + conn->cq.mcq.cqe_sz = 64; + conn->cq.mcq.set_ci_db = conn->cq.wq_ctrl.db.db; + conn->cq.mcq.arm_db = conn->cq.wq_ctrl.db.db + 1; + *conn->cq.mcq.set_ci_db = 0; + conn->cq.mcq.vector = 0; + conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete; + cq_size = roundup_pow_of_two(cq_size); MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(cq_size)); @@ -468,15 +475,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) if (err) goto err_cqwq; - conn->cq.mcq.cqe_sz = 64; - conn->cq.mcq.set_ci_db = conn->cq.wq_ctrl.db.db; - conn->cq.mcq.arm_db = conn->cq.wq_ctrl.db.db + 1; - *conn->cq.mcq.set_ci_db = 0; - *conn->cq.mcq.arm_db = 0; - conn->cq.mcq.vector = 0; - conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete; tasklet_setup(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet); - mlx5_fpga_dbg(fdev, "Created CQ #0x%x\n", conn->cq.mcq.cqn); goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c index 24ef7d66fa8aa..7510c46e58a57 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c @@ -873,12 +873,6 @@ static int hws_send_ring_open_sq(struct mlx5hws_context *ctx, return err; } -static void hws_cq_complete(struct mlx5_core_cq *mcq, - struct mlx5_eqe *eqe) -{ - pr_err("CQ completion CQ: #%u\n", mcq->cqn); -} - static int hws_send_ring_alloc_cq(struct mlx5_core_dev *mdev, int numa_node, struct mlx5hws_send_engine *queue, @@ -901,7 +895,6 @@ static int hws_send_ring_alloc_cq(struct mlx5_core_dev *mdev, mcq->cqe_sz = 64; mcq->set_ci_db = cq->wq_ctrl.db.db; mcq->arm_db = cq->wq_ctrl.db.db + 1; - mcq->comp = hws_cq_complete; for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { cqe = mlx5_cqwq_get_wqe(&cq->wq, i); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c index 077a77fde670e..d034372fa0476 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c @@ -1049,12 +1049,6 @@ static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) return 0; } -static void dr_cq_complete(struct mlx5_core_cq *mcq, - struct mlx5_eqe *eqe) -{ - pr_err("CQ completion CQ: #%u\n", mcq->cqn); -} - static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, struct mlx5_uars_page *uar, size_t ncqe) @@ -1089,6 +1083,13 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK; } + cq->mcq.cqe_sz = 64; + cq->mcq.set_ci_db = cq->wq_ctrl.db.db; + cq->mcq.arm_db = cq->wq_ctrl.db.db + 1; + *cq->mcq.set_ci_db = 0; + cq->mcq.vector = 0; + cq->mdev = mdev; + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + sizeof(u64) * cq->wq_ctrl.buf.npages; in = kvzalloc(inlen, GFP_KERNEL); @@ -1112,27 +1113,12 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas); - cq->mcq.comp = dr_cq_complete; - err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out)); kvfree(in); if (err) goto err_cqwq; - cq->mcq.cqe_sz = 64; - cq->mcq.set_ci_db = cq->wq_ctrl.db.db; - cq->mcq.arm_db = cq->wq_ctrl.db.db + 1; - *cq->mcq.set_ci_db = 0; - - /* set no-zero value, in order to avoid the HW to run db-recovery on - * CQ that used in polling mode. - */ - *cq->mcq.arm_db = cpu_to_be32(2 << 28); - - cq->mcq.vector = 0; - cq->mdev = mdev; - return cq; err_cqwq: diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 82034efb74fc7..a7936bd1aabe1 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -573,6 +573,8 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) vcq->mcq.set_ci_db = vcq->db.db; vcq->mcq.arm_db = vcq->db.db + 1; vcq->mcq.cqe_sz = 64; + vcq->mcq.comp = mlx5_vdpa_cq_comp; + vcq->cqe = num_ent; err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent); if (err) @@ -612,10 +614,6 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) if (err) goto err_vec; - vcq->mcq.comp = mlx5_vdpa_cq_comp; - vcq->cqe = num_ent; - vcq->mcq.set_ci_db = vcq->db.db; - vcq->mcq.arm_db = vcq->db.db + 1; mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); kfree(in); return 0; diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 7ef2c7c7d803d..9d47cdc727ad0 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -183,6 +183,7 @@ static inline void mlx5_cq_put(struct mlx5_core_cq *cq) complete(&cq->free); } +void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe); int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen, u32 *out, int outlen); int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, From 3081891b76ca909e6a97830e0ca09e060c4d992e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 9 Nov 2025 16:12:15 +0000 Subject: [PATCH 761/867] net_sched: limit try_bulk_dequeue_skb() batches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit 100dfa74cad9 ("inet: dev_queue_xmit() llist adoption") I started seeing many qdisc requeues on IDPF under high TX workload. $ tc -s qd sh dev eth1 handle 1: ; sleep 1; tc -s qd sh dev eth1 handle 1: qdisc mq 1: root Sent 43534617319319 bytes 268186451819 pkt (dropped 0, overlimits 0 requeues 3532840114) backlog 1056Kb 6675p requeues 3532840114 qdisc mq 1: root Sent 43554665866695 bytes 268309964788 pkt (dropped 0, overlimits 0 requeues 3537737653) backlog 781164b 4822p requeues 3537737653 This is caused by try_bulk_dequeue_skb() being only limited by BQL budget. perf record -C120-239 -e qdisc:qdisc_dequeue sleep 1 ; perf script ... netperf 75332 [146] 2711.138269: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1292 skbaddr=0xff378005a1e9f200 netperf 75332 [146] 2711.138953: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1213 skbaddr=0xff378004d607a500 netperf 75330 [144] 2711.139631: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1233 skbaddr=0xff3780046be20100 netperf 75333 [147] 2711.140356: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1093 skbaddr=0xff37800514845b00 netperf 75337 [151] 2711.141037: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1353 skbaddr=0xff37800460753300 netperf 75337 [151] 2711.141877: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1367 skbaddr=0xff378004e72c7b00 netperf 75330 [144] 2711.142643: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1202 skbaddr=0xff3780045bd60000 ... This is bad because : 1) Large batches hold one victim cpu for a very long time. 2) Driver often hit their own TX ring limit (all slots are used). 3) We call dev_requeue_skb() 4) Requeues are using a FIFO (q->gso_skb), breaking qdisc ability to implement FQ or priority scheduling. 5) dequeue_skb() gets packets from q->gso_skb one skb at a time with no xmit_more support. This is causing many spinlock games between the qdisc and the device driver. Requeues were supposed to be very rare, lets keep them this way. Limit batch sizes to /proc/sys/net/core/dev_weight (default 64) as __qdisc_run() was designed to use. Fixes: 5772e9a3463b ("qdisc: bulk dequeue support for qdiscs with TCQ_F_ONETXQUEUE") Signed-off-by: Eric Dumazet Cc: Jesper Dangaard Brouer Cc: Toke Høiland-Jørgensen Reviewed-by: Toke Høiland-Jørgensen Acked-by: Jesper Dangaard Brouer Signed-off-by: NipaLocal --- net/sched/sch_generic.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index d9a98d02a55fc..852e603c17551 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -180,9 +180,10 @@ static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) static void try_bulk_dequeue_skb(struct Qdisc *q, struct sk_buff *skb, const struct netdev_queue *txq, - int *packets) + int *packets, int budget) { int bytelimit = qdisc_avail_bulklimit(txq) - skb->len; + int cnt = 0; while (bytelimit > 0) { struct sk_buff *nskb = q->dequeue(q); @@ -193,8 +194,10 @@ static void try_bulk_dequeue_skb(struct Qdisc *q, bytelimit -= nskb->len; /* covers GSO len */ skb->next = nskb; skb = nskb; - (*packets)++; /* GSO counts as one pkt */ + if (++cnt >= budget) + break; } + (*packets) += cnt; skb_mark_not_on_list(skb); } @@ -228,7 +231,7 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q, * A requeued skb (via q->gso_skb) can also be a SKB list. */ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, - int *packets) + int *packets, int budget) { const struct netdev_queue *txq = q->dev_queue; struct sk_buff *skb = NULL; @@ -295,7 +298,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, if (skb) { bulk: if (qdisc_may_bulk(q)) - try_bulk_dequeue_skb(q, skb, txq, packets); + try_bulk_dequeue_skb(q, skb, txq, packets, budget); else try_bulk_dequeue_skb_slow(q, skb, packets); } @@ -387,7 +390,7 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, * >0 - queue is not empty. * */ -static inline bool qdisc_restart(struct Qdisc *q, int *packets) +static inline bool qdisc_restart(struct Qdisc *q, int *packets, int budget) { spinlock_t *root_lock = NULL; struct netdev_queue *txq; @@ -396,7 +399,7 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets) bool validate; /* Dequeue packet */ - skb = dequeue_skb(q, &validate, packets); + skb = dequeue_skb(q, &validate, packets, budget); if (unlikely(!skb)) return false; @@ -414,7 +417,7 @@ void __qdisc_run(struct Qdisc *q) int quota = READ_ONCE(net_hotdata.dev_tx_weight); int packets; - while (qdisc_restart(q, &packets)) { + while (qdisc_restart(q, &packets, quota)) { quota -= packets; if (quota <= 0) { if (q->flags & TCQ_F_NOLOCK) From 9289e0ad966e4262c5616b04db8d1b54100fb9c5 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Sun, 9 Nov 2025 23:37:51 +0200 Subject: [PATCH 762/867] net: ethernet: ti: am65-cpsw: fix BPF Program change on multi-port CPSW On a multi-port CPSW system, stopping and starting just one port (ndev) will not restart the queues if other ports (ndevs) are open. Instead, check the usage_count variable to know if CPSW is running and if so restart all the queues. Signed-off-by: Roger Quadros Signed-off-by: NipaLocal --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 25 +++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index d5f358ec98205..f8beb1735fb9c 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1919,18 +1919,33 @@ static int am65_cpsw_xdp_prog_setup(struct net_device *ndev, struct bpf_prog *prog) { struct am65_cpsw_port *port = am65_ndev_to_port(ndev); - bool running = netif_running(ndev); + struct am65_cpsw_common *common = port->common; + bool running = !!port->common->usage_count; struct bpf_prog *old_prog; + int ret; - if (running) - am65_cpsw_nuss_ndo_slave_stop(ndev); + if (running) { + /* stop all queues */ + am65_cpsw_destroy_txqs(common); + am65_cpsw_destroy_rxqs(common); + } old_prog = xchg(&port->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); - if (running) - return am65_cpsw_nuss_ndo_slave_open(ndev); + if (running) { + /* start all queues */ + ret = am65_cpsw_create_rxqs(common); + if (ret) + return ret; + + ret = am65_cpsw_create_txqs(common); + if (ret) { + am65_cpsw_destroy_rxqs(common); + return ret; + } + } return 0; } From 04647d9798a96dd8980a49351439b6a547bcdc43 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Sun, 9 Nov 2025 23:37:52 +0200 Subject: [PATCH 763/867] net: ethernet: ti: am65-cpsw: Retain page_pool on XDP program exchange Add a new 'retain_page_pool' flag to am65_cpsw_destroy_rxq/s() so that the page pool allocation is retained while switching XDP program. This will avoid requiring any re-allocation and potential failures during low memory conditions. Signed-off-by: Roger Quadros Signed-off-by: NipaLocal --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 38 ++++++++++++++---------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index f8beb1735fb9c..f9e2286efa29b 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -505,7 +505,7 @@ static inline void am65_cpsw_put_page(struct am65_cpsw_rx_flow *flow, static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma); static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma); -static void am65_cpsw_destroy_rxq(struct am65_cpsw_common *common, int id) +static void am65_cpsw_destroy_rxq(struct am65_cpsw_common *common, int id, bool retain_page_pool) { struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns; struct am65_cpsw_rx_flow *flow; @@ -528,13 +528,13 @@ static void am65_cpsw_destroy_rxq(struct am65_cpsw_common *common, int id) xdp_rxq_info_unreg(rxq); } - if (flow->page_pool) { + if (flow->page_pool && !retain_page_pool) { page_pool_destroy(flow->page_pool); flow->page_pool = NULL; } } -static void am65_cpsw_destroy_rxqs(struct am65_cpsw_common *common) +static void am65_cpsw_destroy_rxqs(struct am65_cpsw_common *common, bool retain_page_pool) { struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns; int id; @@ -549,7 +549,7 @@ static void am65_cpsw_destroy_rxqs(struct am65_cpsw_common *common) } for (id = common->rx_ch_num_flows - 1; id >= 0; id--) - am65_cpsw_destroy_rxq(common, id); + am65_cpsw_destroy_rxq(common, id, retain_page_pool); k3_udma_glue_disable_rx_chn(common->rx_chns.rx_chn); } @@ -574,13 +574,18 @@ static int am65_cpsw_create_rxq(struct am65_cpsw_common *common, int id) flow = &rx_chn->flows[id]; pp_params.napi = &flow->napi_rx; - pool = page_pool_create(&pp_params); - if (IS_ERR(pool)) { - ret = PTR_ERR(pool); - return ret; - } - flow->page_pool = pool; + if (!flow->page_pool) { + pool = page_pool_create(&pp_params); + if (IS_ERR(pool)) { + ret = PTR_ERR(pool); + return ret; + } + + flow->page_pool = pool; + } else { + pool = flow->page_pool; + } /* using same page pool is allowed as no running rx handlers * simultaneously for both ndevs @@ -626,7 +631,7 @@ static int am65_cpsw_create_rxq(struct am65_cpsw_common *common, int id) return 0; err: - am65_cpsw_destroy_rxq(common, id); + am65_cpsw_destroy_rxq(common, id, false); return ret; } @@ -653,7 +658,7 @@ static int am65_cpsw_create_rxqs(struct am65_cpsw_common *common) err: for (--id; id >= 0; id--) - am65_cpsw_destroy_rxq(common, id); + am65_cpsw_destroy_rxq(common, id, false); return ret; } @@ -942,7 +947,7 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common) return 0; cleanup_rx: - am65_cpsw_destroy_rxqs(common); + am65_cpsw_destroy_rxqs(common, false); return ret; } @@ -956,7 +961,7 @@ static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common) ALE_PORT_STATE, ALE_PORT_STATE_DISABLE); am65_cpsw_destroy_txqs(common); - am65_cpsw_destroy_rxqs(common); + am65_cpsw_destroy_rxqs(common, false); cpsw_ale_stop(common->ale); writel(0, common->cpsw_base + AM65_CPSW_REG_CTL); @@ -1927,7 +1932,8 @@ static int am65_cpsw_xdp_prog_setup(struct net_device *ndev, if (running) { /* stop all queues */ am65_cpsw_destroy_txqs(common); - am65_cpsw_destroy_rxqs(common); + /* Retain page pool */ + am65_cpsw_destroy_rxqs(common, true); } old_prog = xchg(&port->xdp_prog, prog); @@ -1942,7 +1948,7 @@ static int am65_cpsw_xdp_prog_setup(struct net_device *ndev, ret = am65_cpsw_create_txqs(common); if (ret) { - am65_cpsw_destroy_rxqs(common); + am65_cpsw_destroy_rxqs(common, false); return ret; } } From 7134aff3f5b965a2d6f538c8674fb6678dabcdd5 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Sun, 9 Nov 2025 23:37:53 +0200 Subject: [PATCH 764/867] net: ethernet: ti: am65-cpsw: add XSK pool helpers To prepare for XSK zero copy support, add XSK pool helpers in a new file am65-cpsw-xdp.c As queues are shared between ports we can no longer support the case where zero copy (XSK Pool) is enabled for the queue on one port but not for other ports. Current solution is to drop the packet if Zero copy is not enabled for that port + queue but enabled for some other port + same queue. xdp_zc_queues bitmap tracks if queue is setup as XSK pool and xsk_port_id array tracks which port the XSK queue is assigned to for zero copy. Signed-off-by: Roger Quadros Signed-off-by: NipaLocal --- drivers/net/ethernet/ti/Makefile | 2 +- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 21 ++-- drivers/net/ethernet/ti/am65-cpsw-nuss.h | 20 ++++ drivers/net/ethernet/ti/am65-cpsw-xdp.c | 122 +++++++++++++++++++++++ 4 files changed, 156 insertions(+), 9 deletions(-) create mode 100644 drivers/net/ethernet/ti/am65-cpsw-xdp.c diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile index 93c0a4d0e33a6..96585a28fc7d7 100644 --- a/drivers/net/ethernet/ti/Makefile +++ b/drivers/net/ethernet/ti/Makefile @@ -29,7 +29,7 @@ keystone_netcp_ethss-y := netcp_ethss.o netcp_sgmii.o netcp_xgbepcsr.o cpsw_ale. obj-$(CONFIG_TI_K3_CPPI_DESC_POOL) += k3-cppi-desc-pool.o obj-$(CONFIG_TI_K3_AM65_CPSW_NUSS) += ti-am65-cpsw-nuss.o -ti-am65-cpsw-nuss-y := am65-cpsw-nuss.o cpsw_sl.o am65-cpsw-ethtool.o cpsw_ale.o +ti-am65-cpsw-nuss-y := am65-cpsw-nuss.o cpsw_sl.o am65-cpsw-ethtool.o cpsw_ale.o am65-cpsw-xdp.o ti-am65-cpsw-nuss-$(CONFIG_TI_AM65_CPSW_QOS) += am65-cpsw-qos.o ti-am65-cpsw-nuss-$(CONFIG_TI_K3_AM65_CPSW_SWITCHDEV) += am65-cpsw-switchdev.o obj-$(CONFIG_TI_K3_AM65_CPTS) += am65-cpts.o diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index f9e2286efa29b..46523be93df27 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -58,9 +58,6 @@ #define AM65_CPSW_MAX_PORTS 8 -#define AM65_CPSW_MIN_PACKET_SIZE VLAN_ETH_ZLEN -#define AM65_CPSW_MAX_PACKET_SIZE 2024 - #define AM65_CPSW_REG_CTL 0x004 #define AM65_CPSW_REG_STAT_PORT_EN 0x014 #define AM65_CPSW_REG_PTYPE 0x018 @@ -505,7 +502,7 @@ static inline void am65_cpsw_put_page(struct am65_cpsw_rx_flow *flow, static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma); static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma); -static void am65_cpsw_destroy_rxq(struct am65_cpsw_common *common, int id, bool retain_page_pool) +void am65_cpsw_destroy_rxq(struct am65_cpsw_common *common, int id, bool retain_page_pool) { struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns; struct am65_cpsw_rx_flow *flow; @@ -554,7 +551,7 @@ static void am65_cpsw_destroy_rxqs(struct am65_cpsw_common *common, bool retain_ k3_udma_glue_disable_rx_chn(common->rx_chns.rx_chn); } -static int am65_cpsw_create_rxq(struct am65_cpsw_common *common, int id) +int am65_cpsw_create_rxq(struct am65_cpsw_common *common, int id) { struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns; struct page_pool_params pp_params = { @@ -663,7 +660,7 @@ static int am65_cpsw_create_rxqs(struct am65_cpsw_common *common) return ret; } -static void am65_cpsw_destroy_txq(struct am65_cpsw_common *common, int id) +void am65_cpsw_destroy_txq(struct am65_cpsw_common *common, int id) { struct am65_cpsw_tx_chn *tx_chn = &common->tx_chns[id]; @@ -697,7 +694,7 @@ static void am65_cpsw_destroy_txqs(struct am65_cpsw_common *common) am65_cpsw_destroy_txq(common, id); } -static int am65_cpsw_create_txq(struct am65_cpsw_common *common, int id) +int am65_cpsw_create_txq(struct am65_cpsw_common *common, int id) { struct am65_cpsw_tx_chn *tx_chn = &common->tx_chns[id]; int ret; @@ -1327,7 +1324,7 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow, dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE); k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); - if (port->xdp_prog) { + if (am65_cpsw_xdp_is_enabled(port)) { xdp_init_buff(&xdp, PAGE_SIZE, &port->xdp_rxq[flow->id]); xdp_prepare_buff(&xdp, page_addr, AM65_CPSW_HEADROOM, pkt_len, false); @@ -1961,6 +1958,9 @@ static int am65_cpsw_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf) switch (bpf->command) { case XDP_SETUP_PROG: return am65_cpsw_xdp_prog_setup(ndev, bpf->prog); + case XDP_SETUP_XSK_POOL: + return am65_cpsw_xsk_setup_pool(ndev, bpf->xsk.pool, + bpf->xsk.queue_id); default: return -EINVAL; } @@ -3553,7 +3553,12 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) common = devm_kzalloc(dev, sizeof(struct am65_cpsw_common), GFP_KERNEL); if (!common) return -ENOMEM; + common->dev = dev; + common->xdp_zc_queues = devm_bitmap_zalloc(dev, AM65_CPSW_MAX_QUEUES, + GFP_KERNEL); + if (!common->xdp_zc_queues) + return -ENOMEM; of_id = of_match_device(am65_cpsw_nuss_of_mtable, dev); if (!of_id) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h index 917c37e4e89bd..31789b5e5e1fc 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h @@ -23,8 +23,14 @@ struct am65_cpts; #define AM65_CPSW_MAX_QUEUES 8 /* both TX & RX */ +#define AM65_CPSW_MIN_PACKET_SIZE VLAN_ETH_ZLEN +#define AM65_CPSW_MAX_PACKET_SIZE 2024 + #define AM65_CPSW_PORT_VLAN_REG_OFFSET 0x014 +#define AM65_CPSW_RX_DMA_ATTR (DMA_ATTR_SKIP_CPU_SYNC |\ + DMA_ATTR_WEAK_ORDERING) + struct am65_cpsw_slave_data { bool mac_only; struct cpsw_sl *mac_sl; @@ -190,6 +196,9 @@ struct am65_cpsw_common { unsigned char switch_id[MAX_PHYS_ITEM_ID_LEN]; /* only for suspend/resume context restore */ u32 *ale_context; + /* XDP Zero Copy */ + unsigned long *xdp_zc_queues; + int xsk_port_id[AM65_CPSW_MAX_QUEUES]; }; struct am65_cpsw_ndev_priv { @@ -228,4 +237,15 @@ int am65_cpsw_nuss_update_tx_rx_chns(struct am65_cpsw_common *common, bool am65_cpsw_port_dev_check(const struct net_device *dev); +int am65_cpsw_create_rxq(struct am65_cpsw_common *common, int id); +void am65_cpsw_destroy_rxq(struct am65_cpsw_common *common, int id, bool retain_page_pool); +int am65_cpsw_create_txq(struct am65_cpsw_common *common, int id); +void am65_cpsw_destroy_txq(struct am65_cpsw_common *common, int id); +int am65_cpsw_xsk_setup_pool(struct net_device *ndev, + struct xsk_buff_pool *pool, u16 qid); +int am65_cpsw_xsk_wakeup(struct net_device *ndev, u32 qid, u32 flags); +static inline bool am65_cpsw_xdp_is_enabled(struct am65_cpsw_port *port) +{ + return !!READ_ONCE(port->xdp_prog); +} #endif /* AM65_CPSW_NUSS_H_ */ diff --git a/drivers/net/ethernet/ti/am65-cpsw-xdp.c b/drivers/net/ethernet/ti/am65-cpsw-xdp.c new file mode 100644 index 0000000000000..89f43f7c83db3 --- /dev/null +++ b/drivers/net/ethernet/ti/am65-cpsw-xdp.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Texas Instruments K3 AM65 Ethernet Switch SubSystem Driver + * + * Copyright (C) 2025 Texas Instruments Incorporated - http://www.ti.com/ + * + */ + +#include +#include +#include "am65-cpsw-nuss.h" + +static int am65_cpsw_xsk_pool_enable(struct am65_cpsw_port *port, + struct xsk_buff_pool *pool, u16 qid) +{ + struct am65_cpsw_common *common = port->common; + struct am65_cpsw_rx_chn *rx_chn; + bool need_update; + u32 frame_size; + int ret; + + /* + * As queues are shared between ports we can no longer + * support the case where zero copy (XSK Pool) is enabled + * for the queue on one port but not for other ports. + * + * Current solution is to drop the packet if Zero copy + * is not enabled for that port + queue but enabled for + * some other port + same queue. + */ + if (test_bit(qid, common->xdp_zc_queues)) + return -EINVAL; + + rx_chn = &common->rx_chns; + if (qid >= common->rx_ch_num_flows || qid >= common->tx_ch_num) + return -EINVAL; + + frame_size = xsk_pool_get_rx_frame_size(pool); + if (frame_size < AM65_CPSW_MAX_PACKET_SIZE) + return -EOPNOTSUPP; + + ret = xsk_pool_dma_map(pool, rx_chn->dma_dev, AM65_CPSW_RX_DMA_ATTR); + if (ret) { + netdev_err(port->ndev, "Failed to map xsk pool\n"); + return ret; + } + + need_update = common->usage_count && + am65_cpsw_xdp_is_enabled(port); + if (need_update) { + am65_cpsw_destroy_rxq(common, qid, true); + am65_cpsw_destroy_txq(common, qid); + } + + set_bit(qid, common->xdp_zc_queues); + common->xsk_port_id[qid] = port->port_id; + if (need_update) { + am65_cpsw_create_rxq(common, qid); + am65_cpsw_create_txq(common, qid); + } + + return 0; +} + +static int am65_cpsw_xsk_pool_disable(struct am65_cpsw_port *port, + struct xsk_buff_pool *pool, u16 qid) +{ + struct am65_cpsw_common *common = port->common; + bool need_update; + + if (qid >= common->rx_ch_num_flows || qid >= common->tx_ch_num) + return -EINVAL; + + if (!test_bit(qid, common->xdp_zc_queues)) + return -EINVAL; + + pool = xsk_get_pool_from_qid(port->ndev, qid); + if (!pool) + return -EINVAL; + + need_update = common->usage_count && am65_cpsw_xdp_is_enabled(port); + if (need_update) { + am65_cpsw_destroy_rxq(common, qid, true); + am65_cpsw_destroy_txq(common, qid); + synchronize_rcu(); + } + + xsk_pool_dma_unmap(pool, AM65_CPSW_RX_DMA_ATTR); + clear_bit(qid, common->xdp_zc_queues); + common->xsk_port_id[qid] = -EINVAL; + if (need_update) { + am65_cpsw_create_rxq(common, qid); + am65_cpsw_create_txq(common, qid); + } + + return 0; +} + +int am65_cpsw_xsk_setup_pool(struct net_device *ndev, + struct xsk_buff_pool *pool, u16 qid) +{ + struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + + return pool ? am65_cpsw_xsk_pool_enable(port, pool, qid) : + am65_cpsw_xsk_pool_disable(port, pool, qid); +} + +int am65_cpsw_xsk_wakeup(struct net_device *ndev, u32 qid, u32 flags) +{ + struct am65_cpsw_common *common = am65_ndev_to_common(ndev); + struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + + if (!netif_running(ndev) || !netif_carrier_ok(ndev)) + return -ENETDOWN; + + if (!am65_cpsw_xdp_is_enabled(port)) + return -EINVAL; + + if (qid >= common->rx_ch_num_flows || qid >= common->tx_ch_num) + return -EINVAL; + + return 0; +} From bc8d03ff260f8bece520525f38e2b162af2ac0d4 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Sun, 9 Nov 2025 23:37:54 +0200 Subject: [PATCH 765/867] net: ethernet: ti: am65-cpsw: Add AF_XDP zero copy for RX Add zero copy support to RX path. Introduce xsk_pool and xsk_port_id to struct am65_cpsw_rx_flow. This way we can quickly check if the flow is setup as XSK pool and for which port. If the RX flow is setup as XSK pool then register it as MEM_TYPE_XSK_BUFF_POOL. At queue creation get free frames from the XSK pool and push it to the RX ring. Signed-off-by: Roger Quadros Signed-off-by: NipaLocal --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 317 ++++++++++++++++++++--- drivers/net/ethernet/ti/am65-cpsw-nuss.h | 12 +- drivers/net/ethernet/ti/am65-cpsw-xdp.c | 24 ++ 3 files changed, 319 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 46523be93df27..afc0c8836fe24 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -429,6 +429,55 @@ static void am65_cpsw_nuss_ndo_host_tx_timeout(struct net_device *ndev, } } +static int am65_cpsw_nuss_rx_push_zc(struct am65_cpsw_rx_flow *flow, + struct xdp_buff *xdp) +{ + struct am65_cpsw_rx_chn *rx_chn = &flow->common->rx_chns; + struct cppi5_host_desc_t *desc_rx; + struct am65_cpsw_swdata *swdata; + u32 flow_id = flow->id; + dma_addr_t desc_dma; + dma_addr_t buf_dma; + int buf_len; + + desc_rx = k3_cppi_desc_pool_alloc(rx_chn->desc_pool); + if (!desc_rx) + return -ENOMEM; + + desc_dma = k3_cppi_desc_pool_virt2dma(rx_chn->desc_pool, desc_rx); + buf_dma = xsk_buff_xdp_get_dma(xdp); + cppi5_hdesc_init(desc_rx, CPPI5_INFO0_HDESC_EPIB_PRESENT, + AM65_CPSW_NAV_PS_DATA_SIZE); + k3_udma_glue_rx_dma_to_cppi5_addr(rx_chn->rx_chn, &buf_dma); + buf_len = xsk_pool_get_rx_frame_size(flow->xsk_pool); + cppi5_hdesc_attach_buf(desc_rx, buf_dma, buf_len, buf_dma, buf_len); + swdata = cppi5_hdesc_get_swdata(desc_rx); + swdata->xdp = xdp; + swdata->flow_id = flow_id; + + return k3_udma_glue_push_rx_chn(rx_chn->rx_chn, flow_id, + desc_rx, desc_dma); +} + +static int am65_cpsw_nuss_rx_alloc_zc(struct am65_cpsw_rx_flow *flow, + int budget) +{ + struct xdp_buff *xdp; + int i, ret; + + for (i = 0; i < budget; i++) { + xdp = xsk_buff_alloc(flow->xsk_pool); + if (!xdp) + break; + + ret = am65_cpsw_nuss_rx_push_zc(flow, xdp); + if (ret < 0) + break; + } + + return i; +} + static int am65_cpsw_nuss_rx_push(struct am65_cpsw_common *common, struct page *page, u32 flow_idx) { @@ -529,6 +578,9 @@ void am65_cpsw_destroy_rxq(struct am65_cpsw_common *common, int id, bool retain_ page_pool_destroy(flow->page_pool); flow->page_pool = NULL; } + + flow->xsk_pool = NULL; + flow->xsk_port_id = -EINVAL; } static void am65_cpsw_destroy_rxqs(struct am65_cpsw_common *common, bool retain_page_pool) @@ -568,6 +620,7 @@ int am65_cpsw_create_rxq(struct am65_cpsw_common *common, int id) struct page_pool *pool; struct page *page; int port, ret, i; + int port_id; flow = &rx_chn->flows[id]; pp_params.napi = &flow->napi_rx; @@ -587,9 +640,30 @@ int am65_cpsw_create_rxq(struct am65_cpsw_common *common, int id) /* using same page pool is allowed as no running rx handlers * simultaneously for both ndevs */ + + /* get first port with XSK pool & XDP program set */ + for (port = 0; port < common->port_num; port++) { + if (!common->ports[port].ndev) + continue; + + flow->xsk_pool = am65_cpsw_xsk_get_pool(&common->ports[port], + id); + if (flow->xsk_pool) + break; + } + + port_id = common->ports[port].port_id; + flow->xsk_port_id = flow->xsk_pool ? port_id : -EINVAL; for (port = 0; port < common->port_num; port++) { if (!common->ports[port].ndev) - /* FIXME should we BUG here? */ + continue; + + port_id = common->ports[port].port_id; + + /* NOTE: if queue is XSK then only register it + * for the relevant port it was assigned to + */ + if (flow->xsk_pool && port_id != flow->xsk_port_id) continue; rxq = &common->ports[port].xdp_rxq[id]; @@ -598,29 +672,44 @@ int am65_cpsw_create_rxq(struct am65_cpsw_common *common, int id) if (ret) goto err; - ret = xdp_rxq_info_reg_mem_model(rxq, - MEM_TYPE_PAGE_POOL, - pool); + if (flow->xsk_pool) { + ret = xdp_rxq_info_reg_mem_model(rxq, + MEM_TYPE_XSK_BUFF_POOL, + NULL); + xsk_pool_set_rxq_info(flow->xsk_pool, rxq); + } else { + ret = xdp_rxq_info_reg_mem_model(rxq, + MEM_TYPE_PAGE_POOL, + pool); + } + if (ret) goto err; } - for (i = 0; i < AM65_CPSW_MAX_RX_DESC; i++) { - page = page_pool_dev_alloc_pages(flow->page_pool); - if (!page) { - dev_err(common->dev, "cannot allocate page in flow %d\n", - id); - ret = -ENOMEM; - goto err; - } + if (flow->xsk_pool) { + /* get pages from xsk_pool and push to RX ring + * queue as much as possible + */ + am65_cpsw_nuss_rx_alloc_zc(flow, AM65_CPSW_MAX_RX_DESC); + } else { + for (i = 0; i < AM65_CPSW_MAX_RX_DESC; i++) { + page = page_pool_dev_alloc_pages(flow->page_pool); + if (!page) { + dev_err(common->dev, "cannot allocate page in flow %d\n", + id); + ret = -ENOMEM; + goto err; + } - ret = am65_cpsw_nuss_rx_push(common, page, id); - if (ret < 0) { - dev_err(common->dev, - "cannot submit page to rx channel flow %d, error %d\n", - id, ret); - am65_cpsw_put_page(flow, page, false); - goto err; + ret = am65_cpsw_nuss_rx_push(common, page, id); + if (ret < 0) { + dev_err(common->dev, + "cannot submit page to rx channel flow %d, error %d\n", + id, ret); + am65_cpsw_put_page(flow, page, false); + goto err; + } } } @@ -777,6 +866,8 @@ static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma) struct am65_cpsw_rx_chn *rx_chn = data; struct cppi5_host_desc_t *desc_rx; struct am65_cpsw_swdata *swdata; + struct am65_cpsw_rx_flow *flow; + struct xdp_buff *xdp; dma_addr_t buf_dma; struct page *page; u32 buf_dma_len; @@ -784,13 +875,20 @@ static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma) desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma); swdata = cppi5_hdesc_get_swdata(desc_rx); - page = swdata->page; flow_id = swdata->flow_id; cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len); k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma); - dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE); k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); - am65_cpsw_put_page(&rx_chn->flows[flow_id], page, false); + flow = &rx_chn->flows[flow_id]; + if (flow->xsk_pool) { + xdp = swdata->xdp; + xsk_buff_free(xdp); + } else { + page = swdata->page; + dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, + DMA_FROM_DEVICE); + am65_cpsw_put_page(flow, page, false); + } } static void am65_cpsw_nuss_xmit_free(struct am65_cpsw_tx_chn *tx_chn, @@ -1267,6 +1365,151 @@ static void am65_cpsw_nuss_rx_csum(struct sk_buff *skb, u32 csum_info) } } +static struct sk_buff *am65_cpsw_create_skb_zc(struct am65_cpsw_rx_flow *flow, + struct xdp_buff *xdp) +{ + unsigned int metasize = xdp->data - xdp->data_meta; + unsigned int datasize = xdp->data_end - xdp->data; + struct sk_buff *skb; + + skb = napi_alloc_skb(&flow->napi_rx, + xdp->data_end - xdp->data_hard_start); + if (unlikely(!skb)) + return NULL; + + skb_reserve(skb, xdp->data - xdp->data_hard_start); + memcpy(__skb_put(skb, datasize), xdp->data, datasize); + if (metasize) + skb_metadata_set(skb, metasize); + + return skb; +} + +static void am65_cpsw_dispatch_skb_zc(struct am65_cpsw_rx_flow *flow, + struct am65_cpsw_port *port, + struct xdp_buff *xdp, u32 csum_info) +{ + struct am65_cpsw_common *common = flow->common; + unsigned int len = xdp->data_end - xdp->data; + struct am65_cpsw_ndev_priv *ndev_priv; + struct net_device *ndev = port->ndev; + struct sk_buff *skb; + + skb = am65_cpsw_create_skb_zc(flow, xdp); + if (!skb) { + ndev->stats.rx_dropped++; + return; + } + + ndev_priv = netdev_priv(ndev); + am65_cpsw_nuss_set_offload_fwd_mark(skb, ndev_priv->offload_fwd_mark); + if (port->rx_ts_enabled) + am65_cpts_rx_timestamp(common->cpts, skb); + + skb_mark_for_recycle(skb); + skb->protocol = eth_type_trans(skb, ndev); + am65_cpsw_nuss_rx_csum(skb, csum_info); + napi_gro_receive(&flow->napi_rx, skb); + dev_sw_netstats_rx_add(ndev, len); +} + +static int am65_cpsw_nuss_rx_zc(struct am65_cpsw_rx_flow *flow, int budget) +{ + struct am65_cpsw_rx_chn *rx_chn = &flow->common->rx_chns; + u32 buf_dma_len, pkt_len, port_id = 0, csum_info; + struct am65_cpsw_common *common = flow->common; + struct cppi5_host_desc_t *desc_rx; + struct device *dev = common->dev; + struct am65_cpsw_swdata *swdata; + dma_addr_t desc_dma, buf_dma; + struct am65_cpsw_port *port; + struct net_device *ndev; + u32 flow_idx = flow->id; + struct xdp_buff *xdp; + int count = 0; + int xdp_status = 0; + u32 *psdata; + int ret; + + while (count < budget) { + ret = k3_udma_glue_pop_rx_chn(rx_chn->rx_chn, flow_idx, + &desc_dma); + if (ret) { + if (ret != -ENODATA) + dev_err(dev, "RX: pop chn fail %d\n", + ret); + break; + } + + if (cppi5_desc_is_tdcm(desc_dma)) { + dev_dbg(dev, "%s RX tdown flow: %u\n", + __func__, flow_idx); + if (common->pdata.quirks & AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ) + complete(&common->tdown_complete); + continue; + } + + desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, + desc_dma); + dev_dbg(dev, "%s flow_idx: %u desc %pad\n", + __func__, flow_idx, &desc_dma); + + swdata = cppi5_hdesc_get_swdata(desc_rx); + xdp = swdata->xdp; + cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len); + k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma); + pkt_len = cppi5_hdesc_get_pktlen(desc_rx); + cppi5_desc_get_tags_ids(&desc_rx->hdr, &port_id, NULL); + dev_dbg(dev, "%s rx port_id:%d\n", __func__, port_id); + port = am65_common_get_port(common, port_id); + ndev = port->ndev; + psdata = cppi5_hdesc_get_psdata(desc_rx); + csum_info = psdata[2]; + dev_dbg(dev, "%s rx csum_info:%#x\n", __func__, csum_info); + k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); + count++; + xsk_buff_set_size(xdp, pkt_len); + xsk_buff_dma_sync_for_cpu(xdp); + /* check if this port has XSK enabled. else drop packet */ + if (port_id != flow->xsk_port_id) { + dev_dbg(dev, "discarding non xsk port data\n"); + xsk_buff_free(xdp); + ndev->stats.rx_dropped++; + continue; + } + + ret = am65_cpsw_run_xdp(flow, port, xdp, &pkt_len); + switch (ret) { + case AM65_CPSW_XDP_PASS: + am65_cpsw_dispatch_skb_zc(flow, port, xdp, csum_info); + xsk_buff_free(xdp); + break; + case AM65_CPSW_XDP_CONSUMED: + xsk_buff_free(xdp); + break; + case AM65_CPSW_XDP_TX: + case AM65_CPSW_XDP_REDIRECT: + xdp_status |= ret; + break; + } + } + + if (xdp_status & AM65_CPSW_XDP_REDIRECT) + xdp_do_flush(); + + ret = am65_cpsw_nuss_rx_alloc_zc(flow, count); + + if (xsk_uses_need_wakeup(flow->xsk_pool)) { + /* We set wakeup if we are exhausted of new requests */ + if (ret < count) + xsk_set_rx_need_wakeup(flow->xsk_pool); + else + xsk_clear_rx_need_wakeup(flow->xsk_pool); + } + + return count; +} + static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow, int *xdp_state) { @@ -1392,7 +1635,11 @@ static enum hrtimer_restart am65_cpsw_nuss_rx_timer_callback(struct hrtimer *tim struct am65_cpsw_rx_flow, rx_hrtimer); - enable_irq(flow->irq); + if (flow->irq_disabled) { + flow->irq_disabled = false; + enable_irq(flow->irq); + } + return HRTIMER_NORESTART; } @@ -1406,17 +1653,21 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget) int num_rx = 0; /* process only this flow */ - cur_budget = budget; - while (cur_budget--) { - ret = am65_cpsw_nuss_rx_packets(flow, &xdp_state); - xdp_state_or |= xdp_state; - if (ret) - break; - num_rx++; - } + if (flow->xsk_pool) { + num_rx = am65_cpsw_nuss_rx_zc(flow, budget); + } else { + cur_budget = budget; + while (cur_budget--) { + ret = am65_cpsw_nuss_rx_packets(flow, &xdp_state); + xdp_state_or |= xdp_state; + if (ret) + break; + num_rx++; + } - if (xdp_state_or & AM65_CPSW_XDP_REDIRECT) - xdp_do_flush(); + if (xdp_state_or & AM65_CPSW_XDP_REDIRECT) + xdp_do_flush(); + } dev_dbg(common->dev, "%s num_rx:%d %d\n", __func__, num_rx, budget); diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h index 31789b5e5e1fc..2bf4d12f92764 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h @@ -15,6 +15,7 @@ #include #include #include +#include #include "am65-cpsw-qos.h" struct am65_cpts; @@ -107,6 +108,8 @@ struct am65_cpsw_rx_flow { struct hrtimer rx_hrtimer; unsigned long rx_pace_timeout; struct page_pool *page_pool; + struct xsk_buff_pool *xsk_pool; + int xsk_port_id; char name[32]; }; @@ -120,7 +123,10 @@ struct am65_cpsw_tx_swdata { struct am65_cpsw_swdata { u32 flow_id; - struct page *page; + union { + struct page *page; + struct xdp_buff *xdp; + }; }; struct am65_cpsw_rx_chn { @@ -248,4 +254,8 @@ static inline bool am65_cpsw_xdp_is_enabled(struct am65_cpsw_port *port) { return !!READ_ONCE(port->xdp_prog); } + +struct xsk_buff_pool *am65_cpsw_xsk_get_pool(struct am65_cpsw_port *port, + u32 qid); + #endif /* AM65_CPSW_NUSS_H_ */ diff --git a/drivers/net/ethernet/ti/am65-cpsw-xdp.c b/drivers/net/ethernet/ti/am65-cpsw-xdp.c index 89f43f7c83db3..0e37c27f77720 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-xdp.c +++ b/drivers/net/ethernet/ti/am65-cpsw-xdp.c @@ -108,6 +108,9 @@ int am65_cpsw_xsk_wakeup(struct net_device *ndev, u32 qid, u32 flags) { struct am65_cpsw_common *common = am65_ndev_to_common(ndev); struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + struct am65_cpsw_rx_flow *rx_flow; + + rx_flow = &common->rx_chns.flows[qid]; if (!netif_running(ndev) || !netif_carrier_ok(ndev)) return -ENETDOWN; @@ -118,5 +121,26 @@ int am65_cpsw_xsk_wakeup(struct net_device *ndev, u32 qid, u32 flags) if (qid >= common->rx_ch_num_flows || qid >= common->tx_ch_num) return -EINVAL; + if (!rx_flow->xsk_pool) + return -EINVAL; + + if (flags & XDP_WAKEUP_RX) { + if (!napi_if_scheduled_mark_missed(&rx_flow->napi_rx)) { + if (likely(napi_schedule_prep(&rx_flow->napi_rx))) + __napi_schedule(&rx_flow->napi_rx); + } + } + return 0; } + +struct xsk_buff_pool *am65_cpsw_xsk_get_pool(struct am65_cpsw_port *port, + u32 qid) +{ + if (!am65_cpsw_xdp_is_enabled(port) || + !test_bit(qid, port->common->xdp_zc_queues) || + port->common->xsk_port_id[qid] != port->port_id) + return NULL; + + return xsk_get_pool_from_qid(port->ndev, qid); +} From b16479e9989f5d80db8c31eb310f75f3aad1b956 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Sun, 9 Nov 2025 23:37:55 +0200 Subject: [PATCH 766/867] net: ethernet: ti: am65-cpsw: Add AF_XDP zero copy for TX Add zero copy support to TX path. Introduce xsk_pool and xsk_port_id to struct am65_cpsw_tx_chn. This way we can quickly check if the flow is setup as XSK pool and for which port. If the TX channel is setup as XSK pool then get the frames from the pool and send it to the TX channel. Signed-off-by: Roger Quadros Signed-off-by: NipaLocal --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 171 +++++++++++++++++++++-- drivers/net/ethernet/ti/am65-cpsw-nuss.h | 5 + drivers/net/ethernet/ti/am65-cpsw-xdp.c | 11 +- 3 files changed, 171 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index afc0c8836fe24..2e06e7df23ad5 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -758,6 +758,8 @@ void am65_cpsw_destroy_txq(struct am65_cpsw_common *common, int id) k3_udma_glue_reset_tx_chn(tx_chn->tx_chn, tx_chn, am65_cpsw_nuss_tx_cleanup); k3_udma_glue_disable_tx_chn(tx_chn->tx_chn); + tx_chn->xsk_pool = NULL; + tx_chn->xsk_port_id = -EINVAL; } static void am65_cpsw_destroy_txqs(struct am65_cpsw_common *common) @@ -786,12 +788,25 @@ static void am65_cpsw_destroy_txqs(struct am65_cpsw_common *common) int am65_cpsw_create_txq(struct am65_cpsw_common *common, int id) { struct am65_cpsw_tx_chn *tx_chn = &common->tx_chns[id]; - int ret; + int port, ret; ret = k3_udma_glue_enable_tx_chn(tx_chn->tx_chn); if (ret) return ret; + /* get first port with XSK pool & XDP program set */ + for (port = 0; port < common->port_num; port++) { + if (!common->ports[port].ndev) + continue; + + tx_chn->xsk_pool = am65_cpsw_xsk_get_pool(&common->ports[port], + id); + if (tx_chn->xsk_pool) + break; + } + + tx_chn->xsk_port_id = tx_chn->xsk_pool ? + common->ports[port].port_id : -EINVAL; napi_enable(&tx_chn->napi_tx); return 0; @@ -892,15 +907,18 @@ static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma) } static void am65_cpsw_nuss_xmit_free(struct am65_cpsw_tx_chn *tx_chn, - struct cppi5_host_desc_t *desc) + struct cppi5_host_desc_t *desc, + enum am65_cpsw_tx_buf_type buf_type) { struct cppi5_host_desc_t *first_desc, *next_desc; dma_addr_t buf_dma, next_desc_dma; u32 buf_dma_len; first_desc = desc; - next_desc = first_desc; + if (buf_type == AM65_CPSW_TX_BUF_TYPE_XSK_TX) + goto free_pool; + next_desc = first_desc; cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len); k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma); @@ -923,6 +941,7 @@ static void am65_cpsw_nuss_xmit_free(struct am65_cpsw_tx_chn *tx_chn, k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc); } +free_pool: k3_cppi_desc_pool_free(tx_chn->desc_pool, first_desc); } @@ -932,21 +951,32 @@ static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma) enum am65_cpsw_tx_buf_type buf_type; struct am65_cpsw_tx_swdata *swdata; struct cppi5_host_desc_t *desc_tx; + struct xsk_buff_pool *xsk_pool; struct xdp_frame *xdpf; struct sk_buff *skb; desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma); swdata = cppi5_hdesc_get_swdata(desc_tx); buf_type = am65_cpsw_nuss_buf_type(tx_chn, desc_dma); - if (buf_type == AM65_CPSW_TX_BUF_TYPE_SKB) { + switch (buf_type) { + case AM65_CPSW_TX_BUF_TYPE_SKB: skb = swdata->skb; dev_kfree_skb_any(skb); - } else { + break; + case AM65_CPSW_TX_BUF_TYPE_XDP_TX: + case AM65_CPSW_TX_BUF_TYPE_XDP_NDO: xdpf = swdata->xdpf; xdp_return_frame(xdpf); + break; + case AM65_CPSW_TX_BUF_TYPE_XSK_TX: + xsk_pool = swdata->xsk_pool; + xsk_tx_completed(xsk_pool, 1); + break; + default: + break; } - am65_cpsw_nuss_xmit_free(tx_chn, desc_tx); + am65_cpsw_nuss_xmit_free(tx_chn, desc_tx, buf_type); } static struct sk_buff *am65_cpsw_build_skb(void *page_addr, @@ -1189,6 +1219,82 @@ static int am65_cpsw_nuss_ndo_slave_open(struct net_device *ndev) return ret; } +static int am65_cpsw_xsk_xmit_zc(struct net_device *ndev, + struct am65_cpsw_tx_chn *tx_chn) +{ + struct am65_cpsw_common *common = tx_chn->common; + struct xsk_buff_pool *pool = tx_chn->xsk_pool; + struct xdp_desc *xdp_descs = pool->tx_descs; + struct cppi5_host_desc_t *host_desc; + struct am65_cpsw_tx_swdata *swdata; + dma_addr_t dma_desc, dma_buf; + int num_tx = 0, pkt_len; + int descs_avail, ret; + int i; + + descs_avail = k3_cppi_desc_pool_avail(tx_chn->desc_pool); + /* ensure that TX ring is not filled up by XDP, always MAX_SKB_FRAGS + * will be available for normal TX path and queue is stopped there if + * necessary + */ + if (descs_avail <= MAX_SKB_FRAGS) + return 0; + + descs_avail -= MAX_SKB_FRAGS; + descs_avail = xsk_tx_peek_release_desc_batch(pool, descs_avail); + + for (i = 0; i < descs_avail; i++) { + host_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool); + if (unlikely(!host_desc)) + break; + + am65_cpsw_nuss_set_buf_type(tx_chn, host_desc, + AM65_CPSW_TX_BUF_TYPE_XSK_TX); + dma_buf = xsk_buff_raw_get_dma(pool, xdp_descs[i].addr); + pkt_len = xdp_descs[i].len; + xsk_buff_raw_dma_sync_for_device(pool, dma_buf, pkt_len); + + cppi5_hdesc_init(host_desc, CPPI5_INFO0_HDESC_EPIB_PRESENT, + AM65_CPSW_NAV_PS_DATA_SIZE); + cppi5_hdesc_set_pkttype(host_desc, AM65_CPSW_CPPI_TX_PKT_TYPE); + cppi5_hdesc_set_pktlen(host_desc, pkt_len); + cppi5_desc_set_pktids(&host_desc->hdr, 0, + AM65_CPSW_CPPI_TX_FLOW_ID); + cppi5_desc_set_tags_ids(&host_desc->hdr, 0, + tx_chn->xsk_port_id); + + k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &dma_buf); + cppi5_hdesc_attach_buf(host_desc, dma_buf, pkt_len, dma_buf, + pkt_len); + + swdata = cppi5_hdesc_get_swdata(host_desc); + swdata->ndev = ndev; + swdata->xsk_pool = pool; + + dma_desc = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, + host_desc); + if (AM65_CPSW_IS_CPSW2G(common)) { + ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, + host_desc, dma_desc); + } else { + spin_lock_bh(&tx_chn->lock); + ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, + host_desc, dma_desc); + spin_unlock_bh(&tx_chn->lock); + } + + if (ret) { + ndev->stats.tx_errors++; + k3_cppi_desc_pool_free(tx_chn->desc_pool, host_desc); + break; + } + + num_tx++; + } + + return num_tx; +} + static int am65_cpsw_xdp_tx_frame(struct net_device *ndev, struct am65_cpsw_tx_chn *tx_chn, struct xdp_frame *xdpf, @@ -1716,15 +1822,19 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common, struct netdev_queue *netif_txq; unsigned int total_bytes = 0; struct net_device *ndev; + int xsk_frames_done = 0; struct xdp_frame *xdpf; unsigned int pkt_len; struct sk_buff *skb; dma_addr_t desc_dma; int res, num_tx = 0; + int xsk_tx = 0; tx_chn = &common->tx_chns[chn]; while (true) { + pkt_len = 0; + if (!single_port) spin_lock(&tx_chn->lock); res = k3_udma_glue_pop_tx_chn(tx_chn->tx_chn, &desc_dma); @@ -1746,25 +1856,36 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common, swdata = cppi5_hdesc_get_swdata(desc_tx); ndev = swdata->ndev; buf_type = am65_cpsw_nuss_buf_type(tx_chn, desc_dma); - if (buf_type == AM65_CPSW_TX_BUF_TYPE_SKB) { + switch (buf_type) { + case AM65_CPSW_TX_BUF_TYPE_SKB: skb = swdata->skb; am65_cpts_tx_timestamp(tx_chn->common->cpts, skb); pkt_len = skb->len; napi_consume_skb(skb, budget); - } else { + total_bytes += pkt_len; + break; + case AM65_CPSW_TX_BUF_TYPE_XDP_TX: + case AM65_CPSW_TX_BUF_TYPE_XDP_NDO: xdpf = swdata->xdpf; pkt_len = xdpf->len; + total_bytes += pkt_len; if (buf_type == AM65_CPSW_TX_BUF_TYPE_XDP_TX) xdp_return_frame_rx_napi(xdpf); else xdp_return_frame(xdpf); + break; + case AM65_CPSW_TX_BUF_TYPE_XSK_TX: + pkt_len = cppi5_hdesc_get_pktlen(desc_tx); + xsk_frames_done++; + break; + default: + break; } - total_bytes += pkt_len; num_tx++; - am65_cpsw_nuss_xmit_free(tx_chn, desc_tx); + am65_cpsw_nuss_xmit_free(tx_chn, desc_tx, buf_type); dev_sw_netstats_tx_add(ndev, 1, pkt_len); - if (!single_port) { + if (!single_port && buf_type != AM65_CPSW_TX_BUF_TYPE_XSK_TX) { /* as packets from multi ports can be interleaved * on the same channel, we have to figure out the * port/queue at every packet and report it/wake queue. @@ -1781,6 +1902,19 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common, am65_cpsw_nuss_tx_wake(tx_chn, ndev, netif_txq); } + if (tx_chn->xsk_pool) { + if (xsk_frames_done) + xsk_tx_completed(tx_chn->xsk_pool, xsk_frames_done); + + if (xsk_uses_need_wakeup(tx_chn->xsk_pool)) + xsk_set_tx_need_wakeup(tx_chn->xsk_pool); + + ndev = common->ports[tx_chn->xsk_port_id].ndev; + netif_txq = netdev_get_tx_queue(ndev, chn); + txq_trans_cond_update(netif_txq); + xsk_tx = am65_cpsw_xsk_xmit_zc(ndev, tx_chn); + } + dev_dbg(dev, "%s:%u pkt:%d\n", __func__, chn, num_tx); return num_tx; @@ -1791,7 +1925,11 @@ static enum hrtimer_restart am65_cpsw_nuss_tx_timer_callback(struct hrtimer *tim struct am65_cpsw_tx_chn *tx_chns = container_of(timer, struct am65_cpsw_tx_chn, tx_hrtimer); - enable_irq(tx_chns->irq); + if (tx_chns->irq_disabled) { + tx_chns->irq_disabled = false; + enable_irq(tx_chns->irq); + } + return HRTIMER_NORESTART; } @@ -1811,7 +1949,8 @@ static int am65_cpsw_nuss_tx_poll(struct napi_struct *napi_tx, int budget) hrtimer_start(&tx_chn->tx_hrtimer, ns_to_ktime(tx_chn->tx_pace_timeout), HRTIMER_MODE_REL_PINNED); - } else { + } else if (tx_chn->irq_disabled) { + tx_chn->irq_disabled = false; enable_irq(tx_chn->irq); } } @@ -1834,6 +1973,7 @@ static irqreturn_t am65_cpsw_nuss_tx_irq(int irq, void *dev_id) { struct am65_cpsw_tx_chn *tx_chn = dev_id; + tx_chn->irq_disabled = true; disable_irq_nosync(irq); napi_schedule(&tx_chn->napi_tx); @@ -1998,14 +2138,14 @@ static netdev_tx_t am65_cpsw_nuss_ndo_slave_xmit(struct sk_buff *skb, return NETDEV_TX_OK; err_free_descs: - am65_cpsw_nuss_xmit_free(tx_chn, first_desc); + am65_cpsw_nuss_xmit_free(tx_chn, first_desc, AM65_CPSW_TX_BUF_TYPE_SKB); err_free_skb: ndev->stats.tx_dropped++; dev_kfree_skb_any(skb); return NETDEV_TX_OK; busy_free_descs: - am65_cpsw_nuss_xmit_free(tx_chn, first_desc); + am65_cpsw_nuss_xmit_free(tx_chn, first_desc, AM65_CPSW_TX_BUF_TYPE_SKB); busy_stop_q: netif_tx_stop_queue(netif_txq); return NETDEV_TX_BUSY; @@ -2259,6 +2399,7 @@ static const struct net_device_ops am65_cpsw_nuss_netdev_ops = { .ndo_xdp_xmit = am65_cpsw_ndo_xdp_xmit, .ndo_hwtstamp_get = am65_cpsw_nuss_hwtstamp_get, .ndo_hwtstamp_set = am65_cpsw_nuss_hwtstamp_set, + .ndo_xsk_wakeup = am65_cpsw_xsk_wakeup, }; static void am65_cpsw_disable_phy(struct phy *phy) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h index 2bf4d12f92764..ac2d9d32e95b9 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h @@ -72,6 +72,7 @@ enum am65_cpsw_tx_buf_type { AM65_CPSW_TX_BUF_TYPE_SKB, AM65_CPSW_TX_BUF_TYPE_XDP_TX, AM65_CPSW_TX_BUF_TYPE_XDP_NDO, + AM65_CPSW_TX_BUF_TYPE_XSK_TX, }; struct am65_cpsw_host { @@ -97,6 +98,9 @@ struct am65_cpsw_tx_chn { unsigned char dsize_log2; char tx_chn_name[128]; u32 rate_mbps; + struct xsk_buff_pool *xsk_pool; + int xsk_port_id; + bool irq_disabled; }; struct am65_cpsw_rx_flow { @@ -118,6 +122,7 @@ struct am65_cpsw_tx_swdata { union { struct sk_buff *skb; struct xdp_frame *xdpf; + struct xsk_buff_pool *xsk_pool; }; }; diff --git a/drivers/net/ethernet/ti/am65-cpsw-xdp.c b/drivers/net/ethernet/ti/am65-cpsw-xdp.c index 0e37c27f77720..9adf13056f70f 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-xdp.c +++ b/drivers/net/ethernet/ti/am65-cpsw-xdp.c @@ -109,8 +109,10 @@ int am65_cpsw_xsk_wakeup(struct net_device *ndev, u32 qid, u32 flags) struct am65_cpsw_common *common = am65_ndev_to_common(ndev); struct am65_cpsw_port *port = am65_ndev_to_port(ndev); struct am65_cpsw_rx_flow *rx_flow; + struct am65_cpsw_tx_chn *tx_ch; rx_flow = &common->rx_chns.flows[qid]; + tx_ch = &common->tx_chns[qid]; if (!netif_running(ndev) || !netif_carrier_ok(ndev)) return -ENETDOWN; @@ -121,9 +123,16 @@ int am65_cpsw_xsk_wakeup(struct net_device *ndev, u32 qid, u32 flags) if (qid >= common->rx_ch_num_flows || qid >= common->tx_ch_num) return -EINVAL; - if (!rx_flow->xsk_pool) + if (!rx_flow->xsk_pool && !tx_ch->xsk_pool) return -EINVAL; + if (flags & XDP_WAKEUP_TX) { + if (!napi_if_scheduled_mark_missed(&tx_ch->napi_tx)) { + if (likely(napi_schedule_prep(&tx_ch->napi_tx))) + __napi_schedule(&tx_ch->napi_tx); + } + } + if (flags & XDP_WAKEUP_RX) { if (!napi_if_scheduled_mark_missed(&rx_flow->napi_rx)) { if (likely(napi_schedule_prep(&rx_flow->napi_rx))) From dbf16831117153a889fd28a4595c919c0274ae92 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Sun, 9 Nov 2025 23:37:56 +0200 Subject: [PATCH 767/867] net: ethernet: ti: am65-cpsw: enable zero copy in XDP features Now that we have the plumbing in for XDP zero copy RX and TX, enable the zero copy feature flag. Signed-off-by: Roger Quadros Signed-off-by: NipaLocal --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 2e06e7df23ad5..9d1048eea7e47 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -3210,7 +3210,8 @@ am65_cpsw_nuss_init_port_ndev(struct am65_cpsw_common *common, u32 port_idx) NETIF_F_HW_VLAN_CTAG_FILTER; port->ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | - NETDEV_XDP_ACT_NDO_XMIT; + NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_XSK_ZEROCOPY; port->ndev->vlan_features |= NETIF_F_SG; port->ndev->netdev_ops = &am65_cpsw_nuss_netdev_ops; port->ndev->ethtool_ops = &am65_cpsw_ethtool_ops_slave; From 596dc0b698b13270e5a6f129dbdba3eec2338dfd Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Sun, 9 Nov 2025 23:37:57 +0200 Subject: [PATCH 768/867] net: ethernet: ti: am65-cpsw: Fix clearing of irq_disabled flag in rx_poll In am65_cpsw_nuss_rx_poll() there is a possibility that irq_disabled flag is cleared but the IRQ is not enabled. This patch fixes by that by clearing irq_disabled flag right when enabling the irq. Fixes: da70d184a8c3 ("net: ethernet: ti: am65-cpsw: Introduce multi queue Rx") Signed-off-by: Roger Quadros Signed-off-by: NipaLocal --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 9d1048eea7e47..c0f891a91d747 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1778,15 +1778,13 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget) dev_dbg(common->dev, "%s num_rx:%d %d\n", __func__, num_rx, budget); if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) { - if (flow->irq_disabled) { + if (unlikely(flow->rx_pace_timeout)) { + hrtimer_start(&flow->rx_hrtimer, + ns_to_ktime(flow->rx_pace_timeout), + HRTIMER_MODE_REL_PINNED); + } else if (flow->irq_disabled) { flow->irq_disabled = false; - if (unlikely(flow->rx_pace_timeout)) { - hrtimer_start(&flow->rx_hrtimer, - ns_to_ktime(flow->rx_pace_timeout), - HRTIMER_MODE_REL_PINNED); - } else { - enable_irq(flow->irq); - } + enable_irq(flow->irq); } } From fc4b3918fb73db19676586303ac0fe2fdadd724f Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 10 Nov 2025 17:13:37 +0800 Subject: [PATCH 769/867] net: bonding: use atomic instead of rtnl_mutex, to make sure peer notify updated Using atomic to protect the send_peer_notif instead of rtnl_mutex. This approach allows safe updates in both interrupt and process contexts, while avoiding code complexity. In lacp mode, the rtnl might be locked, preventing ad_cond_set_peer_notif() from acquiring the lock and updating send_peer_notif. This patch addresses the issue by using a atomic. Since updating send_peer_notif does not require high real-time performance, such atomic updates are acceptable. By the way, send_peer_notif is reset atomically. For avoiding peer notify event loss, we should invoke bond_should_notify_peers() to check whether to send peer notify in bond_mii_monitor() and bond_activebackup_arp_mon(). More importantly, send_peer_notif-- should be placed with send_peer_notif *if* block [2]. Otherwise [1], if atomic_dec is executed immediately after resetting the send_peer_notif, it is very likely that the event will be lost. - [1]: if (send_peer_notif) { ... } // if reset and then atomic_dec, event will be lost. atomic_dec() - [2] should be changed: if (send_peer_notif) { ... // if reset and then atomic_dec, event may be lost. // but we are already in notify context. atomic_dec() } Cc: Jay Vosburgh Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Simon Horman Cc: Jonathan Corbet Cc: Andrew Lunn Cc: Nikolay Aleksandrov Cc: Hangbin Liu Suggested-by: Jay Vosburgh Signed-off-by: Tonghao Zhang Signed-off-by: NipaLocal --- drivers/net/bonding/bond_3ad.c | 7 +--- drivers/net/bonding/bond_main.c | 71 +++++++++++++++++---------------- include/net/bonding.h | 9 ++++- 3 files changed, 47 insertions(+), 40 deletions(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 49717b7b82a2c..05c573e454505 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -999,11 +999,8 @@ static void ad_cond_set_peer_notif(struct port *port) { struct bonding *bond = port->slave->bond; - if (bond->params.broadcast_neighbor && rtnl_trylock()) { - bond->send_peer_notif = bond->params.num_peer_notif * - max(1, bond->params.peer_notif_delay); - rtnl_unlock(); - } + if (bond->params.broadcast_neighbor) + bond_peer_notify_reset(bond); } /** diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 3d56339a8a10d..98a7fa737ff13 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1167,10 +1167,11 @@ static bool bond_should_notify_peers(struct bonding *bond) { struct bond_up_slave *usable; struct slave *slave = NULL; + int send_peer_notif; - if (!bond->send_peer_notif || - bond->send_peer_notif % - max(1, bond->params.peer_notif_delay) != 0 || + send_peer_notif = atomic_read(&bond->send_peer_notif); + if (!send_peer_notif || + send_peer_notif % max(1, bond->params.peer_notif_delay) != 0 || !netif_carrier_ok(bond->dev)) return false; @@ -1270,8 +1271,6 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) BOND_SLAVE_NOTIFY_NOW); if (new_active) { - bool should_notify_peers = false; - bond_set_slave_active_flags(new_active, BOND_SLAVE_NOTIFY_NOW); @@ -1280,19 +1279,17 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) old_active); if (netif_running(bond->dev)) { - bond->send_peer_notif = - bond->params.num_peer_notif * - max(1, bond->params.peer_notif_delay); - should_notify_peers = - bond_should_notify_peers(bond); + bond_peer_notify_reset(bond); + + if (bond_should_notify_peers(bond)) { + atomic_dec(&bond->send_peer_notif); + call_netdevice_notifiers( + NETDEV_NOTIFY_PEERS, + bond->dev); + } } call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev); - if (should_notify_peers) { - bond->send_peer_notif--; - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, - bond->dev); - } } } @@ -2784,11 +2781,11 @@ static void bond_mii_monitor(struct work_struct *work) { struct bonding *bond = container_of(work, struct bonding, mii_work.work); - bool should_notify_peers; - bool commit; - unsigned long delay; - struct slave *slave; struct list_head *iter; + struct slave *slave; + unsigned long delay; + int send_peer_notif; + bool commit; delay = msecs_to_jiffies(bond->params.miimon); @@ -2797,12 +2794,12 @@ static void bond_mii_monitor(struct work_struct *work) rcu_read_lock(); - should_notify_peers = bond_should_notify_peers(bond); + send_peer_notif = atomic_read(&bond->send_peer_notif); commit = !!bond_miimon_inspect(bond); rcu_read_unlock(); - if (commit || bond->send_peer_notif) { + if (commit || send_peer_notif) { /* Race avoidance with bond_close cancel of workqueue */ if (!rtnl_trylock()) { delay = 1; @@ -2817,11 +2814,13 @@ static void bond_mii_monitor(struct work_struct *work) bond_miimon_commit(bond); } - if (bond->send_peer_notif) { - bond->send_peer_notif--; - if (should_notify_peers) + if (send_peer_notif) { + if (bond_should_notify_peers(bond)) call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); + + /* It's safe even when send_peer_notif was reset. */ + atomic_dec_if_positive(&bond->send_peer_notif); } rtnl_unlock(); /* might sleep, hold no other locks */ @@ -3733,8 +3732,8 @@ static bool bond_ab_arp_probe(struct bonding *bond) static void bond_activebackup_arp_mon(struct bonding *bond) { - bool should_notify_peers = false; bool should_notify_rtnl = false; + int send_peer_notif = 0; int delta_in_ticks; delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); @@ -3744,15 +3743,12 @@ static void bond_activebackup_arp_mon(struct bonding *bond) rcu_read_lock(); - should_notify_peers = bond_should_notify_peers(bond); - if (bond_ab_arp_inspect(bond)) { rcu_read_unlock(); /* Race avoidance with bond_close flush of workqueue */ if (!rtnl_trylock()) { delta_in_ticks = 1; - should_notify_peers = false; goto re_arm; } @@ -3763,21 +3759,26 @@ static void bond_activebackup_arp_mon(struct bonding *bond) } should_notify_rtnl = bond_ab_arp_probe(bond); + send_peer_notif = atomic_read(&bond->send_peer_notif); + rcu_read_unlock(); re_arm: if (bond->params.arp_interval) queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); - if (should_notify_peers || should_notify_rtnl) { + if (send_peer_notif || should_notify_rtnl) { if (!rtnl_trylock()) return; - if (should_notify_peers) { - bond->send_peer_notif--; - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, - bond->dev); + if (send_peer_notif) { + if (bond_should_notify_peers(bond)) + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, + bond->dev); + /* It's safe even when send_peer_notif was reset. */ + atomic_dec_if_positive(&bond->send_peer_notif); } + if (should_notify_rtnl) { bond_slave_state_notify(bond); bond_slave_link_notify(bond); @@ -4268,6 +4269,8 @@ static int bond_open(struct net_device *bond_dev) queue_delayed_work(bond->wq, &bond->alb_work, 0); } + atomic_set(&bond->send_peer_notif, 0); + if (bond->params.miimon) /* link check interval, in milliseconds. */ queue_delayed_work(bond->wq, &bond->mii_work, 0); @@ -4301,7 +4304,7 @@ static int bond_close(struct net_device *bond_dev) struct slave *slave; bond_work_cancel_all(bond); - bond->send_peer_notif = 0; + atomic_set(&bond->send_peer_notif, 0); if (bond_is_lb(bond)) bond_alb_deinitialize(bond); bond->recv_probe = NULL; diff --git a/include/net/bonding.h b/include/net/bonding.h index 49edc7da05867..afdfcb5bfaf03 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -236,7 +236,7 @@ struct bonding { */ spinlock_t mode_lock; spinlock_t stats_lock; - u32 send_peer_notif; + atomic_t send_peer_notif; u8 igmp_retrans; #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_entry; @@ -814,4 +814,11 @@ static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *s return NET_XMIT_DROP; } +static inline void bond_peer_notify_reset(struct bonding *bond) +{ + atomic_set(&bond->send_peer_notif, + bond->params.num_peer_notif * + max(1, bond->params.peer_notif_delay)); +} + #endif /* _NET_BONDING_H */ From 0a7487e7e2963f0d2a144e4bc6d08104a9185961 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 10 Nov 2025 19:46:44 +0800 Subject: [PATCH 770/867] eea: introduce PCI framework Add basic driver framework for the Alibaba Elastic Ethernet Adapter(EEA). This commit implements the EEA PCI probe functionality. Reviewed-by: Dust Li Reviewed-by: Philo Lu Signed-off-by: Wen Gu Signed-off-by: Xuan Zhuo Signed-off-by: NipaLocal --- MAINTAINERS | 8 + drivers/net/ethernet/Kconfig | 1 + drivers/net/ethernet/Makefile | 1 + drivers/net/ethernet/alibaba/Kconfig | 29 ++ drivers/net/ethernet/alibaba/Makefile | 5 + drivers/net/ethernet/alibaba/eea/Makefile | 3 + drivers/net/ethernet/alibaba/eea/eea_pci.c | 385 +++++++++++++++++++++ drivers/net/ethernet/alibaba/eea/eea_pci.h | 50 +++ 8 files changed, 482 insertions(+) create mode 100644 drivers/net/ethernet/alibaba/Kconfig create mode 100644 drivers/net/ethernet/alibaba/Makefile create mode 100644 drivers/net/ethernet/alibaba/eea/Makefile create mode 100644 drivers/net/ethernet/alibaba/eea/eea_pci.c create mode 100644 drivers/net/ethernet/alibaba/eea/eea_pci.h diff --git a/MAINTAINERS b/MAINTAINERS index 0dc4aa37d9034..5437935120e88 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -795,6 +795,14 @@ S: Maintained F: Documentation/i2c/busses/i2c-ali1563.rst F: drivers/i2c/busses/i2c-ali1563.c +ALIBABA ELASTIC ETHERNET ADAPTER DRIVER +M: Xuan Zhuo +M: Wen Gu +R: Philo Lu +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/ethernet/alibaba/eea + ALIBABA ELASTIC RDMA DRIVER M: Cheng Xu M: Kai Shen diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 4a1b368ca7e61..429310dfc686f 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -22,6 +22,7 @@ source "drivers/net/ethernet/aeroflex/Kconfig" source "drivers/net/ethernet/agere/Kconfig" source "drivers/net/ethernet/airoha/Kconfig" source "drivers/net/ethernet/alacritech/Kconfig" +source "drivers/net/ethernet/alibaba/Kconfig" source "drivers/net/ethernet/allwinner/Kconfig" source "drivers/net/ethernet/alteon/Kconfig" source "drivers/net/ethernet/altera/Kconfig" diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 2e18df8ca8ec5..2347f87a5808e 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_NET_VENDOR_ADI) += adi/ obj-$(CONFIG_NET_VENDOR_AGERE) += agere/ obj-$(CONFIG_NET_VENDOR_AIROHA) += airoha/ obj-$(CONFIG_NET_VENDOR_ALACRITECH) += alacritech/ +obj-$(CONFIG_NET_VENDOR_ALIBABA) += alibaba/ obj-$(CONFIG_NET_VENDOR_ALLWINNER) += allwinner/ obj-$(CONFIG_NET_VENDOR_ALTEON) += alteon/ obj-$(CONFIG_ALTERA_TSE) += altera/ diff --git a/drivers/net/ethernet/alibaba/Kconfig b/drivers/net/ethernet/alibaba/Kconfig new file mode 100644 index 0000000000000..820a9a7aa1f18 --- /dev/null +++ b/drivers/net/ethernet/alibaba/Kconfig @@ -0,0 +1,29 @@ +# +# Alibaba network device configuration +# + +config NET_VENDOR_ALIBABA + bool "Alibaba Devices" + default y + help + If you have a network (Ethernet) device belonging to this class, say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about Alibaba devices. If you say Y, you will be asked + for your specific device in the following questions. + +if NET_VENDOR_ALIBABA + +config EEA + tristate "Alibaba Elastic Ethernet Adapter support" + depends on PCI_MSI + depends on 64BIT + select PAGE_POOL + default m + help + This driver supports Alibaba Elastic Ethernet Adapter" + + To compile this driver as a module, choose M here. + +endif #NET_VENDOR_ALIBABA diff --git a/drivers/net/ethernet/alibaba/Makefile b/drivers/net/ethernet/alibaba/Makefile new file mode 100644 index 0000000000000..7980525cb0860 --- /dev/null +++ b/drivers/net/ethernet/alibaba/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the Alibaba network device drivers. +# + +obj-$(CONFIG_EEA) += eea/ diff --git a/drivers/net/ethernet/alibaba/eea/Makefile b/drivers/net/ethernet/alibaba/eea/Makefile new file mode 100644 index 0000000000000..cf2acf1733fde --- /dev/null +++ b/drivers/net/ethernet/alibaba/eea/Makefile @@ -0,0 +1,3 @@ + +obj-$(CONFIG_EEA) += eea.o +eea-y := eea_pci.o diff --git a/drivers/net/ethernet/alibaba/eea/eea_pci.c b/drivers/net/ethernet/alibaba/eea/eea_pci.c new file mode 100644 index 0000000000000..5785d9b3fd309 --- /dev/null +++ b/drivers/net/ethernet/alibaba/eea/eea_pci.c @@ -0,0 +1,385 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Driver for Alibaba Elastic Ethernet Adapter. + * + * Copyright (C) 2025 Alibaba Inc. + */ + +#include +#include + +#include "eea_pci.h" + +#define EEA_PCI_DB_OFFSET 4096 + +struct eea_pci_cfg { + __le32 reserve0; + __le32 reserve1; + __le32 drv_f_idx; + __le32 drv_f; + +#define EEA_S_OK BIT(2) +#define EEA_S_FEATURE_DONE BIT(3) +#define EEA_S_FAILED BIT(7) + u8 device_status; + u8 reserved[7]; + + __le32 rx_num_max; + __le32 tx_num_max; + __le32 db_blk_size; + + /* admin queue cfg */ + __le16 aq_size; + __le16 aq_msix_vector; + __le32 aq_db_off; + + __le32 aq_sq_addr; + __le32 aq_sq_addr_hi; + __le32 aq_cq_addr; + __le32 aq_cq_addr_hi; + + __le64 hw_ts; +}; + +struct eea_pci_device { + struct eea_device edev; + struct pci_dev *pci_dev; + + u32 msix_vec_n; + + void __iomem *reg; + void __iomem *db_base; + + char ha_irq_name[32]; + u8 reset_pos; +}; + +#define cfg_pointer(reg, item) \ + ((void __iomem *)((reg) + offsetof(struct eea_pci_cfg, item))) + +#define cfg_write8(reg, item, val) iowrite8(val, cfg_pointer(reg, item)) +#define cfg_write32(reg, item, val) iowrite32(val, cfg_pointer(reg, item)) + +#define cfg_read8(reg, item) ioread8(cfg_pointer(reg, item)) +#define cfg_read32(reg, item) ioread32(cfg_pointer(reg, item)) +#define cfg_readq(reg, item) readq(cfg_pointer(reg, item)) + +const char *eea_pci_name(struct eea_device *edev) +{ + return pci_name(edev->ep_dev->pci_dev); +} + +int eea_pci_domain_nr(struct eea_device *edev) +{ + return pci_domain_nr(edev->ep_dev->pci_dev->bus); +} + +u16 eea_pci_dev_id(struct eea_device *edev) +{ + return pci_dev_id(edev->ep_dev->pci_dev); +} + +static void eea_pci_io_set_status(struct eea_device *edev, u8 status) +{ + struct eea_pci_device *ep_dev = edev->ep_dev; + + cfg_write8(ep_dev->reg, device_status, status); +} + +static u8 eea_pci_io_get_status(struct eea_device *edev) +{ + struct eea_pci_device *ep_dev = edev->ep_dev; + + return cfg_read8(ep_dev->reg, device_status); +} + +static void eea_add_status(struct eea_device *dev, u32 status) +{ + eea_pci_io_set_status(dev, eea_pci_io_get_status(dev) | status); +} + +#define EEA_RESET_TIMEOUT_US (1000 * 1000 * 1000) + +int eea_device_reset(struct eea_device *edev) +{ + struct eea_pci_device *ep_dev = edev->ep_dev; + int i, err; + u8 val; + + eea_pci_io_set_status(edev, 0); + + err = read_poll_timeout(cfg_read8, val, !val, 20, EEA_RESET_TIMEOUT_US, + false, ep_dev->reg, device_status); + if (err) + return -EBUSY; + + for (i = 0; i < ep_dev->msix_vec_n; ++i) + synchronize_irq(pci_irq_vector(ep_dev->pci_dev, i)); + + return 0; +} + +void eea_device_ready(struct eea_device *dev) +{ + u8 status = eea_pci_io_get_status(dev); + + WARN_ON(status & EEA_S_OK); + + eea_pci_io_set_status(dev, status | EEA_S_OK); +} + +static int eea_negotiate(struct eea_device *edev) +{ + struct eea_pci_device *ep_dev; + u32 status; + + ep_dev = edev->ep_dev; + + edev->features = 0; + + cfg_write32(ep_dev->reg, drv_f_idx, 0); + cfg_write32(ep_dev->reg, drv_f, (u32)edev->features); + cfg_write32(ep_dev->reg, drv_f_idx, 1); + cfg_write32(ep_dev->reg, drv_f, edev->features >> 32); + + eea_add_status(edev, EEA_S_FEATURE_DONE); + status = eea_pci_io_get_status(edev); + if (!(status & EEA_S_FEATURE_DONE)) + return -ENODEV; + + return 0; +} + +static void eea_pci_release_resource(struct eea_pci_device *ep_dev) +{ + struct pci_dev *pci_dev = ep_dev->pci_dev; + + if (ep_dev->reg) { + pci_iounmap(pci_dev, ep_dev->reg); + ep_dev->reg = NULL; + } + + if (ep_dev->msix_vec_n) { + ep_dev->msix_vec_n = 0; + pci_free_irq_vectors(ep_dev->pci_dev); + } + + pci_release_regions(pci_dev); + pci_disable_device(pci_dev); +} + +static int eea_pci_setup(struct pci_dev *pci_dev, struct eea_pci_device *ep_dev) +{ + int err, n; + + ep_dev->pci_dev = pci_dev; + + err = pci_enable_device(pci_dev); + if (err) + return err; + + err = pci_request_regions(pci_dev, "EEA"); + if (err) + goto err_disable_dev; + + pci_set_master(pci_dev); + + err = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64)); + if (err) { + dev_warn(&pci_dev->dev, "Failed to enable 64-bit DMA.\n"); + goto err_release_regions; + } + + ep_dev->reg = pci_iomap(pci_dev, 0, 0); + if (!ep_dev->reg) { + dev_err(&pci_dev->dev, "Failed to map pci bar!\n"); + err = -ENOMEM; + goto err_release_regions; + } + + ep_dev->edev.rx_num = cfg_read32(ep_dev->reg, rx_num_max); + ep_dev->edev.tx_num = cfg_read32(ep_dev->reg, tx_num_max); + + /* 2: adminq, error handle*/ + n = ep_dev->edev.rx_num + ep_dev->edev.tx_num + 2; + err = pci_alloc_irq_vectors(ep_dev->pci_dev, n, n, PCI_IRQ_MSIX); + if (err < 0) + goto err_unmap_reg; + + ep_dev->msix_vec_n = n; + + ep_dev->db_base = ep_dev->reg + EEA_PCI_DB_OFFSET; + ep_dev->edev.db_blk_size = cfg_read32(ep_dev->reg, db_blk_size); + + return 0; + +err_unmap_reg: + pci_iounmap(pci_dev, ep_dev->reg); + ep_dev->reg = NULL; + +err_release_regions: + pci_release_regions(pci_dev); + +err_disable_dev: + pci_disable_device(pci_dev); + + return err; +} + +void __iomem *eea_pci_db_addr(struct eea_device *edev, u32 off) +{ + return edev->ep_dev->db_base + off; +} + +u64 eea_pci_device_ts(struct eea_device *edev) +{ + struct eea_pci_device *ep_dev = edev->ep_dev; + + return cfg_readq(ep_dev->reg, hw_ts); +} + +static int eea_init_device(struct eea_device *edev) +{ + int err; + + err = eea_device_reset(edev); + if (err) + return err; + + eea_pci_io_set_status(edev, BIT(0) | BIT(1)); + + err = eea_negotiate(edev); + if (err) + goto err; + + /* do net device probe ... */ + + return 0; +err: + eea_add_status(edev, EEA_S_FAILED); + return err; +} + +static int __eea_pci_probe(struct pci_dev *pci_dev, + struct eea_pci_device *ep_dev) +{ + int err; + + pci_set_drvdata(pci_dev, ep_dev); + + err = eea_pci_setup(pci_dev, ep_dev); + if (err) + return err; + + err = eea_init_device(&ep_dev->edev); + if (err) + goto err_pci_rel; + + return 0; + +err_pci_rel: + eea_pci_release_resource(ep_dev); + return err; +} + +static void __eea_pci_remove(struct pci_dev *pci_dev, bool flush_ha_work) +{ + struct eea_pci_device *ep_dev = pci_get_drvdata(pci_dev); + struct device *dev = get_device(&ep_dev->pci_dev->dev); + + pci_disable_sriov(pci_dev); + + eea_pci_release_resource(ep_dev); + + put_device(dev); +} + +static int eea_pci_probe(struct pci_dev *pci_dev, + const struct pci_device_id *id) +{ + struct eea_pci_device *ep_dev; + struct eea_device *edev; + int err; + + ep_dev = kzalloc(sizeof(*ep_dev), GFP_KERNEL); + if (!ep_dev) + return -ENOMEM; + + edev = &ep_dev->edev; + + edev->ep_dev = ep_dev; + edev->dma_dev = &pci_dev->dev; + + ep_dev->pci_dev = pci_dev; + + err = __eea_pci_probe(pci_dev, ep_dev); + if (err) + kfree(ep_dev); + + return err; +} + +static void eea_pci_remove(struct pci_dev *pci_dev) +{ + struct eea_pci_device *ep_dev = pci_get_drvdata(pci_dev); + + __eea_pci_remove(pci_dev, true); + + kfree(ep_dev); +} + +static int eea_pci_sriov_configure(struct pci_dev *pci_dev, int num_vfs) +{ + struct eea_pci_device *ep_dev = pci_get_drvdata(pci_dev); + struct eea_device *edev = &ep_dev->edev; + int ret; + + if (!(eea_pci_io_get_status(edev) & EEA_S_OK)) + return -EBUSY; + + if (pci_vfs_assigned(pci_dev)) + return -EPERM; + + if (num_vfs == 0) { + pci_disable_sriov(pci_dev); + return 0; + } + + ret = pci_enable_sriov(pci_dev, num_vfs); + if (ret < 0) + return ret; + + return num_vfs; +} + +static const struct pci_device_id eea_pci_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_ALIBABA, 0x500B) }, + { 0 } +}; + +MODULE_DEVICE_TABLE(pci, eea_pci_id_table); + +static struct pci_driver eea_pci_driver = { + .name = "eea", + .id_table = eea_pci_id_table, + .probe = eea_pci_probe, + .remove = eea_pci_remove, + .sriov_configure = eea_pci_sriov_configure, +}; + +static __init int eea_pci_init(void) +{ + return pci_register_driver(&eea_pci_driver); +} + +static __exit void eea_pci_exit(void) +{ + pci_unregister_driver(&eea_pci_driver); +} + +module_init(eea_pci_init); +module_exit(eea_pci_exit); + +MODULE_DESCRIPTION("Driver for Alibaba Elastic Ethernet Adapter"); +MODULE_AUTHOR("Xuan Zhuo "); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/alibaba/eea/eea_pci.h b/drivers/net/ethernet/alibaba/eea/eea_pci.h new file mode 100644 index 0000000000000..126704a207d53 --- /dev/null +++ b/drivers/net/ethernet/alibaba/eea/eea_pci.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Driver for Alibaba Elastic Ethernet Adapter. + * + * Copyright (C) 2025 Alibaba Inc. + */ + +#ifndef __EEA_PCI_H__ +#define __EEA_PCI_H__ + +#include + +struct eea_pci_cap { + __u8 cap_vndr; + __u8 cap_next; + __u8 cap_len; + __u8 cfg_type; +}; + +struct eea_pci_reset_reg { + struct eea_pci_cap cap; + __le16 driver; + __le16 device; +}; + +struct eea_pci_device; + +struct eea_device { + struct eea_pci_device *ep_dev; + struct device *dma_dev; + struct eea_net *enet; + + u64 features; + + u32 rx_num; + u32 tx_num; + u32 db_blk_size; +}; + +const char *eea_pci_name(struct eea_device *edev); +int eea_pci_domain_nr(struct eea_device *edev); +u16 eea_pci_dev_id(struct eea_device *edev); + +int eea_device_reset(struct eea_device *dev); +void eea_device_ready(struct eea_device *dev); + +u64 eea_pci_device_ts(struct eea_device *edev); + +void __iomem *eea_pci_db_addr(struct eea_device *edev, u32 off); +#endif From ef0a8e6277b6e8df18d09177cb3c894a65a1997a Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 10 Nov 2025 19:46:45 +0800 Subject: [PATCH 771/867] eea: introduce ring and descriptor structures Add basic driver framework for the Alibaba Elastic Ethernet Adapter(EEA). This commit introduces the ring and descriptor implementations. These structures and ring APIs are used by the RX, TX, and admin queues. Reviewed-by: Dust Li Reviewed-by: Philo Lu Signed-off-by: Wen Gu Signed-off-by: Xuan Zhuo Signed-off-by: NipaLocal --- drivers/net/ethernet/alibaba/eea/Makefile | 3 +- drivers/net/ethernet/alibaba/eea/eea_desc.h | 156 ++++++++++++ drivers/net/ethernet/alibaba/eea/eea_ring.c | 260 ++++++++++++++++++++ drivers/net/ethernet/alibaba/eea/eea_ring.h | 91 +++++++ 4 files changed, 509 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/alibaba/eea/eea_desc.h create mode 100644 drivers/net/ethernet/alibaba/eea/eea_ring.c create mode 100644 drivers/net/ethernet/alibaba/eea/eea_ring.h diff --git a/drivers/net/ethernet/alibaba/eea/Makefile b/drivers/net/ethernet/alibaba/eea/Makefile index cf2acf1733fde..e5e4007810a65 100644 --- a/drivers/net/ethernet/alibaba/eea/Makefile +++ b/drivers/net/ethernet/alibaba/eea/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_EEA) += eea.o -eea-y := eea_pci.o +eea-y := eea_ring.o \ + eea_pci.o diff --git a/drivers/net/ethernet/alibaba/eea/eea_desc.h b/drivers/net/ethernet/alibaba/eea/eea_desc.h new file mode 100644 index 0000000000000..541346a033754 --- /dev/null +++ b/drivers/net/ethernet/alibaba/eea/eea_desc.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Driver for Alibaba Elastic Ethernet Adapter. + * + * Copyright (C) 2025 Alibaba Inc. + */ + +#ifndef __EEA_DESC_H__ +#define __EEA_DESC_H__ + +#define EEA_DESC_TS_MASK GENMASK(47, 0) +#define EEA_DESC_TS(desc) (le64_to_cpu((desc)->ts) & EEA_DESC_TS_MASK) + +struct eea_aq_desc { + __le16 flags; + __le16 id; + __le16 reserved; + u8 classid; + u8 command; + __le64 data_addr; + __le64 reply_addr; + __le32 data_len; + __le32 reply_len; +}; + +struct eea_aq_cdesc { + __le16 flags; + __le16 id; +#define EEA_OK 0 +#define EEA_ERR 0xffffffff + __le32 status; + __le32 reply_len; + __le32 reserved1; + + __le64 reserved2; + __le64 reserved3; +}; + +struct eea_rx_desc { + __le16 flags; + __le16 id; + __le16 len; + __le16 reserved1; + + __le64 addr; + + __le64 hdr_addr; + __le32 reserved2; + __le32 reserved3; +}; + +#define EEA_RX_CDESC_HDR_LEN_MASK GENMASK(9, 0) + +struct eea_rx_cdesc { +#define EEA_DESC_F_DATA_VALID BIT(6) +#define EEA_DESC_F_SPLIT_HDR BIT(5) + __le16 flags; + __le16 id; + __le16 len; +#define EEA_NET_PT_NONE 0 +#define EEA_NET_PT_IPv4 1 +#define EEA_NET_PT_TCPv4 2 +#define EEA_NET_PT_UDPv4 3 +#define EEA_NET_PT_IPv6 4 +#define EEA_NET_PT_TCPv6 5 +#define EEA_NET_PT_UDPv6 6 +#define EEA_NET_PT_IPv6_EX 7 +#define EEA_NET_PT_TCPv6_EX 8 +#define EEA_NET_PT_UDPv6_EX 9 + /* [9:0] is packet type. */ + __le16 type; + + /* hw timestamp [0:47]: ts */ + __le64 ts; + + __le32 hash; + + /* 0-9: hdr_len split header + * 10-15: reserved1 + */ + __le16 len_ex; + __le16 reserved2; + + __le32 reserved3; + __le32 reserved4; +}; + +#define EEA_TX_GSO_NONE 0 +#define EEA_TX_GSO_TCPV4 1 +#define EEA_TX_GSO_TCPV6 4 +#define EEA_TX_GSO_UDP_L4 5 +#define EEA_TX_GSO_ECN 0x80 + +struct eea_tx_desc { +#define EEA_DESC_F_DO_CSUM BIT(6) + __le16 flags; + __le16 id; + __le16 len; + __le16 reserved1; + + __le64 addr; + + __le16 csum_start; + __le16 csum_offset; + u8 gso_type; + u8 reserved2; + __le16 gso_size; + __le64 reserved3; +}; + +struct eea_tx_cdesc { + __le16 flags; + __le16 id; + __le16 len; + __le16 reserved1; + + /* hw timestamp [0:47]: ts */ + __le64 ts; + __le64 reserved2; + __le64 reserved3; +}; + +struct eea_db { +#define EEA_IDX_PRESENT BIT(0) +#define EEA_IRQ_MASK BIT(1) +#define EEA_IRQ_UNMASK BIT(2) +#define EEA_DIRECT_INLINE BIT(3) +#define EEA_DIRECT_DESC BIT(4) + u8 kick_flags; + u8 reserved; + __le16 idx; + + __le16 tx_cq_head; + __le16 rx_cq_head; +}; + +struct eea_db_direct { + u8 kick_flags; + u8 reserved; + __le16 idx; + + __le16 tx_cq_head; + __le16 rx_cq_head; + + u8 desc[24]; +}; + +static_assert(sizeof(struct eea_rx_desc) == 32, "rx desc size does not match"); +static_assert(sizeof(struct eea_rx_cdesc) == 32, + "rx cdesc size does not match"); +static_assert(sizeof(struct eea_tx_desc) == 32, "tx desc size does not match"); +static_assert(sizeof(struct eea_tx_cdesc) == 32, + "tx cdesc size does not match"); +static_assert(sizeof(struct eea_db_direct) == 32, + "db direct size does not match"); +#endif diff --git a/drivers/net/ethernet/alibaba/eea/eea_ring.c b/drivers/net/ethernet/alibaba/eea/eea_ring.c new file mode 100644 index 0000000000000..f05745474ecfb --- /dev/null +++ b/drivers/net/ethernet/alibaba/eea/eea_ring.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Driver for Alibaba Elastic Ethernet Adapter. + * + * Copyright (C) 2025 Alibaba Inc. + */ + +#include "eea_pci.h" +#include "eea_ring.h" + +void ering_irq_unactive(struct eea_ring *ering) +{ + union { + u64 data; + struct eea_db db; + } val; + + if (ering->mask == EEA_IRQ_MASK) + return; + + ering->mask = EEA_IRQ_MASK; + + val.db.kick_flags = EEA_IRQ_MASK; + + writeq(val.data, (void __iomem *)ering->db); +} + +void ering_irq_active(struct eea_ring *ering, struct eea_ring *tx_ering) +{ + union { + u64 data; + struct eea_db db; + } val; + + if (ering->mask == EEA_IRQ_UNMASK) + return; + + ering->mask = EEA_IRQ_UNMASK; + + val.db.kick_flags = EEA_IRQ_UNMASK; + + val.db.tx_cq_head = cpu_to_le16(tx_ering->cq.hw_idx); + val.db.rx_cq_head = cpu_to_le16(ering->cq.hw_idx); + + writeq(val.data, ering->db); +} + +void *ering_cq_get_desc(const struct eea_ring *ering) +{ + u8 phase; + u8 *desc; + + desc = ering->cq.desc + (ering->cq.head << ering->cq.desc_size_shift); + + phase = *(u8 *)(desc + ering->cq.desc_size - 1); + + if ((phase & EEA_RING_DESC_F_CQ_PHASE) == ering->cq.phase) { + dma_rmb(); + return desc; + } + + return NULL; +} + +/* sq api */ +void *ering_sq_alloc_desc(struct eea_ring *ering, u16 id, bool is_last, + u16 flags) +{ + struct eea_ring_sq *sq = &ering->sq; + struct eea_common_desc *desc; + + if (!sq->shadow_num) { + sq->shadow_idx = sq->head; + sq->shadow_id = cpu_to_le16(id); + } + + if (!is_last) + flags |= EEA_RING_DESC_F_MORE; + + desc = sq->desc + (sq->shadow_idx << sq->desc_size_shift); + + desc->flags = cpu_to_le16(flags); + desc->id = sq->shadow_id; + + if (unlikely(++sq->shadow_idx >= ering->num)) + sq->shadow_idx = 0; + + ++sq->shadow_num; + + return desc; +} + +/* alloc desc for adminq */ +void *ering_aq_alloc_desc(struct eea_ring *ering) +{ + struct eea_ring_sq *sq = &ering->sq; + struct eea_common_desc *desc; + + sq->shadow_idx = sq->head; + + desc = sq->desc + (sq->shadow_idx << sq->desc_size_shift); + + if (unlikely(++sq->shadow_idx >= ering->num)) + sq->shadow_idx = 0; + + ++sq->shadow_num; + + return desc; +} + +void ering_sq_commit_desc(struct eea_ring *ering) +{ + struct eea_ring_sq *sq = &ering->sq; + int num; + + num = sq->shadow_num; + + ering->num_free -= num; + + sq->head = sq->shadow_idx; + sq->hw_idx += num; + sq->shadow_num = 0; +} + +void ering_sq_cancel(struct eea_ring *ering) +{ + ering->sq.shadow_num = 0; +} + +/* cq api */ +void ering_cq_ack_desc(struct eea_ring *ering, u32 num) +{ + struct eea_ring_cq *cq = &ering->cq; + + cq->head += num; + cq->hw_idx += num; + + if (unlikely(cq->head >= ering->num)) { + cq->head -= ering->num; + cq->phase ^= EEA_RING_DESC_F_CQ_PHASE; + } + + ering->num_free += num; +} + +/* notify */ +bool ering_kick(struct eea_ring *ering) +{ + union { + struct eea_db db; + u64 data; + } val; + + val.db.kick_flags = EEA_IDX_PRESENT; + val.db.idx = cpu_to_le16(ering->sq.hw_idx); + + writeq(val.data, ering->db); + + return true; +} + +/* ering alloc/free */ +static void ering_free_queue(struct eea_device *edev, size_t size, + void *queue, dma_addr_t dma_handle) +{ + dma_free_coherent(edev->dma_dev, size, queue, dma_handle); +} + +static void *ering_alloc_queue(struct eea_device *edev, size_t size, + dma_addr_t *dma_handle) +{ + gfp_t flags = GFP_KERNEL | __GFP_NOWARN; + + return dma_alloc_coherent(edev->dma_dev, size, dma_handle, flags); +} + +static int ering_alloc_queues(struct eea_ring *ering, struct eea_device *edev, + u32 num, u8 sq_desc_size, u8 cq_desc_size) +{ + dma_addr_t addr; + size_t size; + void *ring; + + size = num * sq_desc_size; + + ring = ering_alloc_queue(edev, size, &addr); + if (!ring) + return -ENOMEM; + + ering->sq.desc = ring; + ering->sq.dma_addr = addr; + ering->sq.dma_size = size; + ering->sq.desc_size = sq_desc_size; + ering->sq.desc_size_shift = fls(sq_desc_size) - 1; + + size = num * cq_desc_size; + + ring = ering_alloc_queue(edev, size, &addr); + if (!ring) + goto err_free_sq; + + ering->cq.desc = ring; + ering->cq.dma_addr = addr; + ering->cq.dma_size = size; + ering->cq.desc_size = cq_desc_size; + ering->cq.desc_size_shift = fls(cq_desc_size) - 1; + + ering->num = num; + + return 0; + +err_free_sq: + ering_free_queue(ering->edev, ering->sq.dma_size, + ering->sq.desc, ering->sq.dma_addr); + return -ENOMEM; +} + +static void ering_init(struct eea_ring *ering) +{ + ering->cq.phase = EEA_RING_DESC_F_CQ_PHASE; + ering->num_free = ering->num; +} + +struct eea_ring *ering_alloc(u32 index, u32 num, struct eea_device *edev, + u8 sq_desc_size, u8 cq_desc_size, + const char *name) +{ + struct eea_ring *ering; + + ering = kzalloc(sizeof(*ering), GFP_KERNEL); + if (!ering) + return NULL; + + ering->edev = edev; + ering->name = name; + ering->index = index; + ering->msix_vec = index / 2 + 1; /* vec 0 is for error notify. */ + + if (ering_alloc_queues(ering, edev, num, sq_desc_size, cq_desc_size)) + goto err_free; + + ering_init(ering); + + return ering; + +err_free: + kfree(ering); + return NULL; +} + +void ering_free(struct eea_ring *ering) +{ + ering_free_queue(ering->edev, ering->cq.dma_size, + ering->cq.desc, ering->cq.dma_addr); + + ering_free_queue(ering->edev, ering->sq.dma_size, + ering->sq.desc, ering->sq.dma_addr); + + kfree(ering); +} diff --git a/drivers/net/ethernet/alibaba/eea/eea_ring.h b/drivers/net/ethernet/alibaba/eea/eea_ring.h new file mode 100644 index 0000000000000..ea7adc32bb23c --- /dev/null +++ b/drivers/net/ethernet/alibaba/eea/eea_ring.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Driver for Alibaba Elastic Ethernet Adapter. + * + * Copyright (C) 2025 Alibaba Inc. + */ + +#ifndef __EEA_RING_H__ +#define __EEA_RING_H__ + +#include +#include "eea_desc.h" + +#define EEA_RING_DESC_F_MORE BIT(0) +#define EEA_RING_DESC_F_CQ_PHASE BIT(7) + +struct eea_common_desc { + __le16 flags; + __le16 id; +}; + +struct eea_device; + +struct eea_ring_sq { + void *desc; + + u16 head; + u16 hw_idx; + + u16 shadow_idx; + __le16 shadow_id; + u16 shadow_num; + + u8 desc_size; + u8 desc_size_shift; + + dma_addr_t dma_addr; + u32 dma_size; +}; + +struct eea_ring_cq { + void *desc; + + u16 head; + u16 hw_idx; + + u8 phase; + u8 desc_size_shift; + u8 desc_size; + + dma_addr_t dma_addr; + u32 dma_size; +}; + +struct eea_ring { + const char *name; + struct eea_device *edev; + u32 index; + void __iomem *db; + u16 msix_vec; + + u8 mask; + + u32 num; + + u32 num_free; + + struct eea_ring_sq sq; + struct eea_ring_cq cq; + + char irq_name[32]; +}; + +struct eea_ring *ering_alloc(u32 index, u32 num, struct eea_device *edev, + u8 sq_desc_size, u8 cq_desc_size, + const char *name); +void ering_free(struct eea_ring *ering); +bool ering_kick(struct eea_ring *ering); + +void *ering_sq_alloc_desc(struct eea_ring *ering, u16 id, + bool is_last, u16 flags); +void *ering_aq_alloc_desc(struct eea_ring *ering); +void ering_sq_commit_desc(struct eea_ring *ering); +void ering_sq_cancel(struct eea_ring *ering); + +void ering_cq_ack_desc(struct eea_ring *ering, u32 num); + +void ering_irq_unactive(struct eea_ring *ering); +void ering_irq_active(struct eea_ring *ering, struct eea_ring *tx_ering); +void *ering_cq_get_desc(const struct eea_ring *ering); +#endif From 1ad8b848520ba2411c686bcda2f382c8daad77bc Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 10 Nov 2025 19:46:46 +0800 Subject: [PATCH 772/867] eea: probe the netdevice and create adminq Add basic driver framework for the Alibaba Elastic Ethernet Adapter(EEA). This commit creates and registers the netdevice after PCI probe, and initializes the admin queue to send commands to the device. Reviewed-by: Dust Li Reviewed-by: Philo Lu Signed-off-by: Wen Gu Signed-off-by: Xuan Zhuo Signed-off-by: NipaLocal --- drivers/net/ethernet/alibaba/eea/Makefile | 6 +- drivers/net/ethernet/alibaba/eea/eea_adminq.c | 421 ++++++++++++++++++ drivers/net/ethernet/alibaba/eea/eea_adminq.h | 70 +++ drivers/net/ethernet/alibaba/eea/eea_net.c | 193 ++++++++ drivers/net/ethernet/alibaba/eea/eea_net.h | 143 ++++++ drivers/net/ethernet/alibaba/eea/eea_pci.c | 24 +- drivers/net/ethernet/alibaba/eea/eea_pci.h | 3 + 7 files changed, 857 insertions(+), 3 deletions(-) create mode 100644 drivers/net/ethernet/alibaba/eea/eea_adminq.c create mode 100644 drivers/net/ethernet/alibaba/eea/eea_adminq.h create mode 100644 drivers/net/ethernet/alibaba/eea/eea_net.c create mode 100644 drivers/net/ethernet/alibaba/eea/eea_net.h diff --git a/drivers/net/ethernet/alibaba/eea/Makefile b/drivers/net/ethernet/alibaba/eea/Makefile index e5e4007810a65..91f318e8e0464 100644 --- a/drivers/net/ethernet/alibaba/eea/Makefile +++ b/drivers/net/ethernet/alibaba/eea/Makefile @@ -1,4 +1,6 @@ obj-$(CONFIG_EEA) += eea.o -eea-y := eea_ring.o \ - eea_pci.o +eea-y := eea_ring.o \ + eea_net.o \ + eea_pci.o \ + eea_adminq.o diff --git a/drivers/net/ethernet/alibaba/eea/eea_adminq.c b/drivers/net/ethernet/alibaba/eea/eea_adminq.c new file mode 100644 index 0000000000000..99b05357649eb --- /dev/null +++ b/drivers/net/ethernet/alibaba/eea/eea_adminq.c @@ -0,0 +1,421 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Driver for Alibaba Elastic Ethernet Adapter. + * + * Copyright (C) 2025 Alibaba Inc. + */ + +#include +#include +#include +#include + +#include "eea_adminq.h" +#include "eea_net.h" +#include "eea_pci.h" +#include "eea_ring.h" + +#define EEA_AQ_CMD_CFG_QUERY ((0 << 8) | 0) + +#define EEA_AQ_CMD_QUEUE_CREATE ((1 << 8) | 0) +#define EEA_AQ_CMD_QUEUE_DESTROY_ALL ((1 << 8) | 1) + +#define EEA_AQ_CMD_HOST_INFO ((2 << 8) | 0) + +#define EEA_AQ_CMD_DEV_STATUS ((3 << 8) | 0) + +#define EEA_RING_DESC_F_AQ_PHASE (BIT(15) | BIT(7)) + +#define EEA_QUEUE_FLAGS_HW_SPLIT_HDR BIT(0) +#define EEA_QUEUE_FLAGS_SQCQ BIT(1) +#define EEA_QUEUE_FLAGS_HWTS BIT(2) + +struct eea_aq_create { + __le32 flags; + /* queue index. + * rx: 0 == qidx % 2 + * tx: 1 == qidx % 2 + */ + __le16 qidx; + /* the depth of the queue */ + __le16 depth; + /* 0: without SPLIT HDR + * 1: 128B + * 2: 256B + * 3: 512B + */ + u8 hdr_buf_size; + u8 sq_desc_size; + u8 cq_desc_size; + u8 reserve0; + /* The vector for the irq. rx,tx share the same vector */ + __le16 msix_vector; + __le16 reserve; + /* sq ring cfg. */ + __le32 sq_addr_low; + __le32 sq_addr_high; + /* cq ring cfg. Just valid when flags include EEA_QUEUE_FLAGS_SQCQ. */ + __le32 cq_addr_low; + __le32 cq_addr_high; +}; + +struct eea_aq_queue_drv_status { + __le16 qidx; + + __le16 sq_head; + __le16 cq_head; + __le16 reserved; +}; + +#define EEA_OS_DISTRO 0 +#define EEA_DRV_TYPE 0 +#define EEA_OS_LINUX 1 +#define EEA_SPEC_VER_MAJOR 1 +#define EEA_SPEC_VER_MINOR 0 + +struct eea_aq_host_info_cfg { + __le16 os_type; + __le16 os_dist; + __le16 drv_type; + + __le16 kern_ver_major; + __le16 kern_ver_minor; + __le16 kern_ver_sub_minor; + + __le16 drv_ver_major; + __le16 drv_ver_minor; + __le16 drv_ver_sub_minor; + + __le16 spec_ver_major; + __le16 spec_ver_minor; + __le16 pci_bdf; + __le32 pci_domain; + + u8 os_ver_str[64]; + u8 isa_str[64]; +}; + +#define EEA_HINFO_MAX_REP_LEN 1024 +#define EEA_HINFO_REP_REJECT 2 + +struct eea_aq_host_info_rep { + u8 op_code; + u8 has_reply; + u8 reply_str[EEA_HINFO_MAX_REP_LEN]; +}; + +static struct eea_ring *qid_to_ering(struct eea_net *enet, u32 qid) +{ + struct eea_ring *ering; + + if (qid % 2 == 0) + ering = enet->rx[qid / 2]->ering; + else + ering = enet->tx[qid / 2].ering; + + return ering; +} + +#define EEA_AQ_TIMEOUT_US (60 * 1000 * 1000) + +static int eea_adminq_submit(struct eea_net *enet, u16 cmd, + dma_addr_t req_addr, dma_addr_t res_addr, + u32 req_size, u32 res_size) +{ + struct eea_aq_cdesc *cdesc; + struct eea_aq_desc *desc; + int ret; + + desc = ering_aq_alloc_desc(enet->adminq.ring); + + desc->classid = cmd >> 8; + desc->command = cmd & 0xff; + + desc->data_addr = cpu_to_le64(req_addr); + desc->data_len = cpu_to_le32(req_size); + + desc->reply_addr = cpu_to_le64(res_addr); + desc->reply_len = cpu_to_le32(res_size); + + /* for update flags */ + wmb(); + + desc->flags = cpu_to_le16(enet->adminq.phase); + + ering_sq_commit_desc(enet->adminq.ring); + + ering_kick(enet->adminq.ring); + + ++enet->adminq.num; + + if ((enet->adminq.num % enet->adminq.ring->num) == 0) + enet->adminq.phase ^= EEA_RING_DESC_F_AQ_PHASE; + + ret = read_poll_timeout(ering_cq_get_desc, cdesc, cdesc, 0, + EEA_AQ_TIMEOUT_US, false, enet->adminq.ring); + if (ret) + return ret; + + ret = le32_to_cpu(cdesc->status); + + ering_cq_ack_desc(enet->adminq.ring, 1); + + if (ret) + netdev_err(enet->netdev, + "adminq exec failed. cmd: %d ret %d\n", cmd, ret); + + return ret; +} + +static int eea_adminq_exec(struct eea_net *enet, u16 cmd, + void *req, u32 req_size, void *res, u32 res_size) +{ + dma_addr_t req_addr = 0, res_addr = 0; + struct device *dma; + int ret; + + dma = enet->edev->dma_dev; + + if (req) { + req_addr = dma_map_single(dma, req, req_size, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dma, req_addr))) + return -ENOMEM; + } + + if (res) { + res_addr = dma_map_single(dma, res, res_size, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(dma, res_addr))) { + ret = -ENOMEM; + goto err_unmap_req; + } + } + + ret = eea_adminq_submit(enet, cmd, req_addr, res_addr, + req_size, res_size); + if (res) + dma_unmap_single(dma, res_addr, res_size, DMA_FROM_DEVICE); + +err_unmap_req: + if (req) + dma_unmap_single(dma, req_addr, req_size, DMA_TO_DEVICE); + + return ret; +} + +void eea_destroy_adminq(struct eea_net *enet) +{ + if (enet->adminq.ring) { + ering_free(enet->adminq.ring); + enet->adminq.ring = NULL; + enet->adminq.phase = 0; + } +} + +int eea_create_adminq(struct eea_net *enet, u32 qid) +{ + struct eea_ring *ering; + + ering = ering_alloc(qid, 64, enet->edev, sizeof(struct eea_aq_desc), + sizeof(struct eea_aq_desc), "adminq"); + if (!ering) + return -ENOMEM; + + eea_pci_active_aq(ering); + + enet->adminq.ring = ering; + enet->adminq.phase = BIT(7); + enet->adminq.num = 0; + + /* set device ready to active adminq */ + eea_device_ready(enet->edev); + + return 0; +} + +int eea_adminq_query_cfg(struct eea_net *enet, struct eea_aq_cfg *cfg) +{ + return eea_adminq_exec(enet, EEA_AQ_CMD_CFG_QUERY, NULL, 0, cfg, + sizeof(*cfg)); +} + +static void qcfg_fill(struct eea_aq_create *qcfg, struct eea_ring *ering, + u32 flags) +{ + qcfg->flags = cpu_to_le32(flags); + qcfg->qidx = cpu_to_le16(ering->index); + qcfg->depth = cpu_to_le16(ering->num); + + qcfg->hdr_buf_size = flags & EEA_QUEUE_FLAGS_HW_SPLIT_HDR ? 1 : 0; + qcfg->sq_desc_size = ering->sq.desc_size; + qcfg->cq_desc_size = ering->cq.desc_size; + qcfg->msix_vector = cpu_to_le16(ering->msix_vec); + + qcfg->sq_addr_low = cpu_to_le32(ering->sq.dma_addr); + qcfg->sq_addr_high = cpu_to_le32(ering->sq.dma_addr >> 32); + + qcfg->cq_addr_low = cpu_to_le32(ering->cq.dma_addr); + qcfg->cq_addr_high = cpu_to_le32(ering->cq.dma_addr >> 32); +} + +int eea_adminq_create_q(struct eea_net *enet, u32 qidx, u32 num, u32 flags) +{ + int i, db_size, q_size, qid, err = -ENOMEM; + struct device *dev = enet->edev->dma_dev; + struct eea_aq_create *q_buf; + dma_addr_t db_dma, q_dma; + struct eea_net_cfg *cfg; + struct eea_ring *ering; + __le32 *db_buf; + + cfg = &enet->cfg; + + if (cfg->split_hdr) + flags |= EEA_QUEUE_FLAGS_HW_SPLIT_HDR; + + flags |= EEA_QUEUE_FLAGS_SQCQ; + flags |= EEA_QUEUE_FLAGS_HWTS; + + db_size = sizeof(int) * num; + q_size = sizeof(struct eea_aq_create) * num; + + db_buf = dma_alloc_coherent(dev, db_size, &db_dma, GFP_KERNEL); + if (!db_buf) + return err; + + q_buf = dma_alloc_coherent(dev, q_size, &q_dma, GFP_KERNEL); + if (!q_buf) + goto err_free_db_buf; + + qid = qidx; + for (i = 0; i < num; i++, qid++) + qcfg_fill(q_buf + i, qid_to_ering(enet, qid), flags); + + err = eea_adminq_exec(enet, EEA_AQ_CMD_QUEUE_CREATE, + q_buf, q_size, db_buf, db_size); + if (err) + goto err_free_q_buf; + + qid = qidx; + for (i = 0; i < num; i++, qid++) { + ering = qid_to_ering(enet, qid); + ering->db = eea_pci_db_addr(ering->edev, + le32_to_cpu(db_buf[i])); + } + +err_free_q_buf: + dma_free_coherent(dev, q_size, q_buf, q_dma); + +err_free_db_buf: + dma_free_coherent(dev, db_size, db_buf, db_dma); + + return err; +} + +int eea_adminq_destroy_all_q(struct eea_net *enet) +{ + return eea_adminq_exec(enet, EEA_AQ_CMD_QUEUE_DESTROY_ALL, NULL, 0, + NULL, 0); +} + +struct eea_aq_dev_status *eea_adminq_dev_status(struct eea_net *enet) +{ + struct eea_aq_queue_drv_status *drv_status; + struct eea_aq_dev_status *dev_status; + struct eea_ring *ering; + int err, i, num, size; + void *rep, *req; + + num = enet->cfg.tx_ring_num * 2 + 1; + + req = kcalloc(num, sizeof(struct eea_aq_queue_drv_status), GFP_KERNEL); + if (!req) + return NULL; + + size = struct_size(dev_status, q_status, num); + + rep = kmalloc(size, GFP_KERNEL); + if (!rep) { + kfree(req); + return NULL; + } + + drv_status = req; + for (i = 0; i < enet->cfg.rx_ring_num * 2; ++i, ++drv_status) { + ering = qid_to_ering(enet, i); + drv_status->qidx = cpu_to_le16(i); + drv_status->cq_head = cpu_to_le16(ering->cq.head); + drv_status->sq_head = cpu_to_le16(ering->sq.head); + } + + drv_status->qidx = cpu_to_le16(i); + drv_status->cq_head = cpu_to_le16(enet->adminq.ring->cq.head); + drv_status->sq_head = cpu_to_le16(enet->adminq.ring->sq.head); + + err = eea_adminq_exec(enet, EEA_AQ_CMD_DEV_STATUS, + req, num * sizeof(struct eea_aq_queue_drv_status), + rep, size); + kfree(req); + if (err) { + kfree(rep); + return NULL; + } + + return rep; +} + +int eea_adminq_config_host_info(struct eea_net *enet) +{ + struct device *dev = enet->edev->dma_dev; + struct eea_aq_host_info_cfg *cfg; + struct eea_aq_host_info_rep *rep; + int rc = -ENOMEM; + + cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); + if (!cfg) + return rc; + + rep = kzalloc(sizeof(*rep), GFP_KERNEL); + if (!rep) + goto err_free_cfg; + + cfg->os_type = cpu_to_le16(EEA_OS_LINUX); + cfg->os_dist = cpu_to_le16(EEA_OS_DISTRO); + cfg->drv_type = cpu_to_le16(EEA_DRV_TYPE); + + cfg->kern_ver_major = cpu_to_le16(LINUX_VERSION_MAJOR); + cfg->kern_ver_minor = cpu_to_le16(LINUX_VERSION_PATCHLEVEL); + cfg->kern_ver_sub_minor = cpu_to_le16(LINUX_VERSION_SUBLEVEL); + + cfg->drv_ver_major = cpu_to_le16(EEA_VER_MAJOR); + cfg->drv_ver_minor = cpu_to_le16(EEA_VER_MINOR); + cfg->drv_ver_sub_minor = cpu_to_le16(EEA_VER_SUB_MINOR); + + cfg->spec_ver_major = cpu_to_le16(EEA_SPEC_VER_MAJOR); + cfg->spec_ver_minor = cpu_to_le16(EEA_SPEC_VER_MINOR); + + cfg->pci_bdf = cpu_to_le16(eea_pci_dev_id(enet->edev)); + cfg->pci_domain = cpu_to_le32(eea_pci_domain_nr(enet->edev)); + + strscpy(cfg->os_ver_str, utsname()->release, sizeof(cfg->os_ver_str)); + strscpy(cfg->isa_str, utsname()->machine, sizeof(cfg->isa_str)); + + rc = eea_adminq_exec(enet, EEA_AQ_CMD_HOST_INFO, + cfg, sizeof(*cfg), rep, sizeof(*rep)); + + if (!rc) { + if (rep->op_code == EEA_HINFO_REP_REJECT) { + dev_err(dev, "Device has refused the initialization due to provided host information\n"); + rc = -ENODEV; + } + if (rep->has_reply) { + rep->reply_str[EEA_HINFO_MAX_REP_LEN - 1] = '\0'; + dev_warn(dev, "Device replied: %s\n", + rep->reply_str); + } + } + + kfree(rep); +err_free_cfg: + kfree(cfg); + return rc; +} diff --git a/drivers/net/ethernet/alibaba/eea/eea_adminq.h b/drivers/net/ethernet/alibaba/eea/eea_adminq.h new file mode 100644 index 0000000000000..dce65967cc17b --- /dev/null +++ b/drivers/net/ethernet/alibaba/eea/eea_adminq.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Driver for Alibaba Elastic Ethernet Adapter. + * + * Copyright (C) 2025 Alibaba Inc. + */ + +#include "eea_pci.h" + +#ifndef __EEA_ADMINQ_H__ +#define __EEA_ADMINQ_H__ + +struct eea_aq_cfg { + __le32 rx_depth_max; + __le32 rx_depth_def; + + __le32 tx_depth_max; + __le32 tx_depth_def; + + __le32 max_tso_size; + __le32 max_tso_segs; + + u8 mac[ETH_ALEN]; + __le16 status; + + __le16 mtu; + __le16 reserved0; + __le16 reserved1; + u8 reserved2; + u8 reserved3; + + __le16 reserved4; + __le16 reserved5; + __le16 reserved6; +}; + +struct eea_aq_queue_status { + __le16 qidx; +#define EEA_QUEUE_STATUS_OK 0 +#define EEA_QUEUE_STATUS_NEED_RESET 1 + __le16 status; +}; + +struct eea_aq_dev_status { +#define EEA_LINK_DOWN_STATUS 0 +#define EEA_LINK_UP_STATUS 1 + __le16 link_status; + __le16 reserved; + + struct eea_aq_queue_status q_status[]; +}; + +struct eea_aq { + struct eea_ring *ring; + u32 num; + u16 phase; +}; + +struct eea_net; + +int eea_create_adminq(struct eea_net *enet, u32 qid); +void eea_destroy_adminq(struct eea_net *enet); + +int eea_adminq_query_cfg(struct eea_net *enet, struct eea_aq_cfg *cfg); + +int eea_adminq_create_q(struct eea_net *enet, u32 qidx, u32 num, u32 flags); +int eea_adminq_destroy_all_q(struct eea_net *enet); +struct eea_aq_dev_status *eea_adminq_dev_status(struct eea_net *enet); +int eea_adminq_config_host_info(struct eea_net *enet); +#endif diff --git a/drivers/net/ethernet/alibaba/eea/eea_net.c b/drivers/net/ethernet/alibaba/eea/eea_net.c new file mode 100644 index 0000000000000..6fdb5edfc95e5 --- /dev/null +++ b/drivers/net/ethernet/alibaba/eea/eea_net.c @@ -0,0 +1,193 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Driver for Alibaba Elastic Ethernet Adapter. + * + * Copyright (C) 2025 Alibaba Inc. + */ + +#include +#include +#include +#include +#include + +#include "eea_adminq.h" +#include "eea_net.h" +#include "eea_pci.h" +#include "eea_ring.h" + +#define EEA_SPLIT_HDR_SIZE 128 + +static void eea_update_cfg(struct eea_net *enet, + struct eea_device *edev, + struct eea_aq_cfg *hwcfg) +{ + enet->cfg_hw.rx_ring_depth = le32_to_cpu(hwcfg->rx_depth_max); + enet->cfg_hw.tx_ring_depth = le32_to_cpu(hwcfg->tx_depth_max); + + enet->cfg_hw.rx_ring_num = edev->rx_num; + enet->cfg_hw.tx_ring_num = edev->tx_num; + + enet->cfg.rx_ring_depth = le32_to_cpu(hwcfg->rx_depth_def); + enet->cfg.tx_ring_depth = le32_to_cpu(hwcfg->tx_depth_def); + + enet->cfg.rx_ring_num = edev->rx_num; + enet->cfg.tx_ring_num = edev->tx_num; + + enet->cfg_hw.split_hdr = EEA_SPLIT_HDR_SIZE; +} + +static int eea_netdev_init_features(struct net_device *netdev, + struct eea_net *enet, + struct eea_device *edev) +{ + struct eea_aq_cfg *cfg; + int err; + u32 mtu; + + cfg = kmalloc(sizeof(*cfg), GFP_KERNEL); + if (!cfg) + return -ENOMEM; + + err = eea_adminq_query_cfg(enet, cfg); + if (err) + goto err_free; + + mtu = le16_to_cpu(cfg->mtu); + if (mtu < ETH_MIN_MTU) { + dev_err(edev->dma_dev, "The device gave us an invalid MTU. Here we can only exit the initialization. %d < %d", + mtu, ETH_MIN_MTU); + err = -EINVAL; + goto err_free; + } + + eea_update_cfg(enet, edev, cfg); + + netdev->priv_flags |= IFF_UNICAST_FLT; + netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + + netdev->hw_features |= NETIF_F_HW_CSUM; + netdev->hw_features |= NETIF_F_GRO_HW; + netdev->hw_features |= NETIF_F_SG; + netdev->hw_features |= NETIF_F_TSO; + netdev->hw_features |= NETIF_F_TSO_ECN; + netdev->hw_features |= NETIF_F_TSO6; + netdev->hw_features |= NETIF_F_GSO_UDP_L4; + + netdev->features |= NETIF_F_HIGHDMA; + netdev->features |= NETIF_F_HW_CSUM; + netdev->features |= NETIF_F_SG; + netdev->features |= NETIF_F_GSO_ROBUST; + netdev->features |= netdev->hw_features & NETIF_F_ALL_TSO; + netdev->features |= NETIF_F_RXCSUM; + netdev->features |= NETIF_F_GRO_HW; + + netdev->vlan_features = netdev->features; + + eth_hw_addr_set(netdev, cfg->mac); + + enet->speed = SPEED_UNKNOWN; + enet->duplex = DUPLEX_UNKNOWN; + + netdev->min_mtu = ETH_MIN_MTU; + + netdev->mtu = mtu; + + /* If jumbo frames are already enabled, then the returned MTU will be a + * jumbo MTU, and the driver will automatically enable jumbo frame + * support by default. + */ + netdev->max_mtu = mtu; + + netif_carrier_on(netdev); + +err_free: + kfree(cfg); + return err; +} + +static const struct net_device_ops eea_netdev = { + .ndo_validate_addr = eth_validate_addr, + .ndo_features_check = passthru_features_check, +}; + +static struct eea_net *eea_netdev_alloc(struct eea_device *edev, u32 pairs) +{ + struct net_device *netdev; + struct eea_net *enet; + + netdev = alloc_etherdev_mq(sizeof(struct eea_net), pairs); + if (!netdev) { + dev_err(edev->dma_dev, + "alloc_etherdev_mq failed with pairs %d\n", pairs); + return NULL; + } + + netdev->netdev_ops = &eea_netdev; + SET_NETDEV_DEV(netdev, edev->dma_dev); + + enet = netdev_priv(netdev); + enet->netdev = netdev; + enet->edev = edev; + edev->enet = enet; + + return enet; +} + +int eea_net_probe(struct eea_device *edev) +{ + struct eea_net *enet; + int err = -ENOMEM; + + enet = eea_netdev_alloc(edev, edev->rx_num); + if (!enet) + return -ENOMEM; + + err = eea_create_adminq(enet, edev->rx_num + edev->tx_num); + if (err) + goto err_free_netdev; + + err = eea_adminq_config_host_info(enet); + if (err) + goto err_reset_dev; + + err = eea_netdev_init_features(enet->netdev, enet, edev); + if (err) + goto err_reset_dev; + + err = register_netdev(enet->netdev); + if (err) + goto err_reset_dev; + + netif_carrier_off(enet->netdev); + + netdev_dbg(enet->netdev, "eea probe success.\n"); + + return 0; + +err_reset_dev: + eea_device_reset(edev); + eea_destroy_adminq(enet); + +err_free_netdev: + free_netdev(enet->netdev); + return err; +} + +void eea_net_remove(struct eea_device *edev) +{ + struct net_device *netdev; + struct eea_net *enet; + + enet = edev->enet; + netdev = enet->netdev; + + unregister_netdev(netdev); + netdev_dbg(enet->netdev, "eea removed.\n"); + + eea_device_reset(edev); + + eea_destroy_adminq(enet); + + free_netdev(netdev); +} diff --git a/drivers/net/ethernet/alibaba/eea/eea_net.h b/drivers/net/ethernet/alibaba/eea/eea_net.h new file mode 100644 index 0000000000000..b35d7483de634 --- /dev/null +++ b/drivers/net/ethernet/alibaba/eea/eea_net.h @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Driver for Alibaba Elastic Ethernet Adapter. + * + * Copyright (C) 2025 Alibaba Inc. + */ + +#ifndef __EEA_NET_H__ +#define __EEA_NET_H__ + +#include +#include + +#include "eea_adminq.h" +#include "eea_ring.h" + +#define EEA_VER_MAJOR 1 +#define EEA_VER_MINOR 0 +#define EEA_VER_SUB_MINOR 0 + +struct eea_net_tx { + struct eea_net *enet; + + struct eea_ring *ering; + + struct eea_tx_meta *meta; + struct eea_tx_meta *free; + + struct device *dma_dev; + + u32 index; + + char name[16]; +}; + +struct eea_rx_meta { + struct eea_rx_meta *next; + + struct page *page; + dma_addr_t dma; + u32 offset; + u32 frags; + + struct page *hdr_page; + void *hdr_addr; + dma_addr_t hdr_dma; + + u32 id; + + u32 truesize; + u32 headroom; + u32 tailroom; + u32 room; + + u32 len; +}; + +struct eea_net_rx_pkt_ctx { + u16 idx; + + bool data_valid; + bool do_drop; + + struct sk_buff *head_skb; + struct sk_buff *curr_skb; +}; + +struct eea_net_rx { + struct eea_net *enet; + + struct eea_ring *ering; + + struct eea_rx_meta *meta; + struct eea_rx_meta *free; + + struct device *dma_dev; + + u32 index; + + u32 flags; + + u32 headroom; + + struct napi_struct napi; + + u16 irq_n; + + char name[16]; + + struct eea_net_rx_pkt_ctx pkt; + + struct page_pool *pp; +}; + +struct eea_net_cfg { + u32 rx_ring_depth; + u32 tx_ring_depth; + u32 rx_ring_num; + u32 tx_ring_num; + + u8 rx_sq_desc_size; + u8 rx_cq_desc_size; + u8 tx_sq_desc_size; + u8 tx_cq_desc_size; + + u32 split_hdr; +}; + +enum { + EEA_LINK_ERR_NONE, + EEA_LINK_ERR_HA_RESET_DEV, + EEA_LINK_ERR_LINK_DOWN, +}; + +struct eea_net { + struct eea_device *edev; + struct net_device *netdev; + + struct eea_aq adminq; + + struct eea_net_tx *tx; + struct eea_net_rx **rx; + + struct eea_net_cfg cfg; + struct eea_net_cfg cfg_hw; + + u32 link_err; + + bool started; + bool cpu_aff_set; + + u8 duplex; + u32 speed; + + u64 hw_ts_offset; +}; + +int eea_net_probe(struct eea_device *edev); +void eea_net_remove(struct eea_device *edev); +int eea_net_freeze(struct eea_device *edev); +int eea_net_restore(struct eea_device *edev); + +#endif diff --git a/drivers/net/ethernet/alibaba/eea/eea_pci.c b/drivers/net/ethernet/alibaba/eea/eea_pci.c index 5785d9b3fd309..ce320c3953a88 100644 --- a/drivers/net/ethernet/alibaba/eea/eea_pci.c +++ b/drivers/net/ethernet/alibaba/eea/eea_pci.c @@ -8,6 +8,7 @@ #include #include +#include "eea_net.h" #include "eea_pci.h" #define EEA_PCI_DB_OFFSET 4096 @@ -58,7 +59,9 @@ struct eea_pci_device { ((void __iomem *)((reg) + offsetof(struct eea_pci_cfg, item))) #define cfg_write8(reg, item, val) iowrite8(val, cfg_pointer(reg, item)) +#define cfg_write16(reg, item, val) iowrite16(val, cfg_pointer(reg, item)) #define cfg_write32(reg, item, val) iowrite32(val, cfg_pointer(reg, item)) +#define cfg_write64(reg, item, val) iowrite64_lo_hi(val, cfg_pointer(reg, item)) #define cfg_read8(reg, item) ioread8(cfg_pointer(reg, item)) #define cfg_read32(reg, item) ioread32(cfg_pointer(reg, item)) @@ -231,6 +234,20 @@ void __iomem *eea_pci_db_addr(struct eea_device *edev, u32 off) return edev->ep_dev->db_base + off; } +void eea_pci_active_aq(struct eea_ring *ering) +{ + struct eea_pci_device *ep_dev = ering->edev->ep_dev; + + cfg_write16(ep_dev->reg, aq_size, ering->num); + cfg_write16(ep_dev->reg, aq_msix_vector, ering->msix_vec); + + cfg_write64(ep_dev->reg, aq_sq_addr, ering->sq.dma_addr); + cfg_write64(ep_dev->reg, aq_cq_addr, ering->cq.dma_addr); + + ering->db = eea_pci_db_addr(ering->edev, + cfg_read32(ep_dev->reg, aq_db_off)); +} + u64 eea_pci_device_ts(struct eea_device *edev) { struct eea_pci_device *ep_dev = edev->ep_dev; @@ -252,7 +269,9 @@ static int eea_init_device(struct eea_device *edev) if (err) goto err; - /* do net device probe ... */ + err = eea_net_probe(edev); + if (err) + goto err; return 0; err: @@ -286,6 +305,9 @@ static void __eea_pci_remove(struct pci_dev *pci_dev, bool flush_ha_work) { struct eea_pci_device *ep_dev = pci_get_drvdata(pci_dev); struct device *dev = get_device(&ep_dev->pci_dev->dev); + struct eea_device *edev = &ep_dev->edev; + + eea_net_remove(edev); pci_disable_sriov(pci_dev); diff --git a/drivers/net/ethernet/alibaba/eea/eea_pci.h b/drivers/net/ethernet/alibaba/eea/eea_pci.h index 126704a207d53..d793128e556ca 100644 --- a/drivers/net/ethernet/alibaba/eea/eea_pci.h +++ b/drivers/net/ethernet/alibaba/eea/eea_pci.h @@ -10,6 +10,8 @@ #include +#include "eea_ring.h" + struct eea_pci_cap { __u8 cap_vndr; __u8 cap_next; @@ -43,6 +45,7 @@ u16 eea_pci_dev_id(struct eea_device *edev); int eea_device_reset(struct eea_device *dev); void eea_device_ready(struct eea_device *dev); +void eea_pci_active_aq(struct eea_ring *ering); u64 eea_pci_device_ts(struct eea_device *edev); From 50ef373f491227930dc8c679aa35d20b044d2897 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 10 Nov 2025 19:46:47 +0800 Subject: [PATCH 773/867] eea: create/destroy rx,tx queues for netdevice open and stop Add basic driver framework for the Alibaba Elastic Ethernet Adapter(EEA). This commit introduces the implementation for the netdevice open and stop. Reviewed-by: Dust Li Reviewed-by: Philo Lu Signed-off-by: Wen Gu Signed-off-by: Xuan Zhuo Signed-off-by: NipaLocal --- drivers/net/ethernet/alibaba/eea/Makefile | 4 +- drivers/net/ethernet/alibaba/eea/eea_net.c | 387 ++++++++++- drivers/net/ethernet/alibaba/eea/eea_net.h | 48 ++ drivers/net/ethernet/alibaba/eea/eea_pci.c | 178 +++++ drivers/net/ethernet/alibaba/eea/eea_pci.h | 14 + drivers/net/ethernet/alibaba/eea/eea_rx.c | 764 +++++++++++++++++++++ drivers/net/ethernet/alibaba/eea/eea_tx.c | 380 ++++++++++ 7 files changed, 1771 insertions(+), 4 deletions(-) create mode 100644 drivers/net/ethernet/alibaba/eea/eea_rx.c create mode 100644 drivers/net/ethernet/alibaba/eea/eea_tx.c diff --git a/drivers/net/ethernet/alibaba/eea/Makefile b/drivers/net/ethernet/alibaba/eea/Makefile index 91f318e8e0464..fa34a005fa01a 100644 --- a/drivers/net/ethernet/alibaba/eea/Makefile +++ b/drivers/net/ethernet/alibaba/eea/Makefile @@ -3,4 +3,6 @@ obj-$(CONFIG_EEA) += eea.o eea-y := eea_ring.o \ eea_net.o \ eea_pci.o \ - eea_adminq.o + eea_adminq.o \ + eea_tx.o \ + eea_rx.o diff --git a/drivers/net/ethernet/alibaba/eea/eea_net.c b/drivers/net/ethernet/alibaba/eea/eea_net.c index 6fdb5edfc95e5..dc5895f5d4954 100644 --- a/drivers/net/ethernet/alibaba/eea/eea_net.c +++ b/drivers/net/ethernet/alibaba/eea/eea_net.c @@ -18,6 +18,332 @@ #define EEA_SPLIT_HDR_SIZE 128 +static void enet_bind_new_q_and_cfg(struct eea_net *enet, + struct eea_net_init_ctx *ctx) +{ + struct eea_net_rx *rx; + struct eea_net_tx *tx; + int i; + + enet->cfg = ctx->cfg; + + enet->rx = ctx->rx; + enet->tx = ctx->tx; + + for (i = 0; i < ctx->cfg.rx_ring_num; i++) { + rx = ctx->rx[i]; + tx = &ctx->tx[i]; + + rx->enet = enet; + tx->enet = enet; + } +} + +void enet_init_ctx(struct eea_net *enet, struct eea_net_init_ctx *ctx) +{ + memset(ctx, 0, sizeof(*ctx)); + + ctx->netdev = enet->netdev; + ctx->edev = enet->edev; + ctx->cfg = enet->cfg; +} + +static void eea_free_rxtx_q_mem(struct eea_net *enet) +{ + struct eea_net_rx *rx; + struct eea_net_tx *tx; + int i; + + for (i = 0; i < enet->cfg.rx_ring_num; i++) { + rx = enet->rx[i]; + tx = &enet->tx[i]; + + eea_free_rx(rx); + eea_free_tx(tx); + } + + /* We called __netif_napi_del(), + * we need to respect an RCU grace period before freeing enet->rx + */ + synchronize_net(); + + kvfree(enet->rx); + kvfree(enet->tx); + + enet->rx = NULL; + enet->tx = NULL; +} + +/* alloc tx/rx: struct, ring, meta, pp, napi */ +static int eea_alloc_rxtx_q_mem(struct eea_net_init_ctx *ctx) +{ + struct eea_net_rx *rx; + struct eea_net_tx *tx; + int err, i; + + ctx->tx = kvcalloc(ctx->cfg.tx_ring_num, sizeof(*ctx->tx), GFP_KERNEL); + if (!ctx->tx) + return -ENOMEM; + + ctx->rx = kvcalloc(ctx->cfg.rx_ring_num, sizeof(*ctx->rx), GFP_KERNEL); + if (!ctx->rx) + goto err_free_tx; + + ctx->cfg.rx_sq_desc_size = sizeof(struct eea_rx_desc); + ctx->cfg.rx_cq_desc_size = sizeof(struct eea_rx_cdesc); + ctx->cfg.tx_sq_desc_size = sizeof(struct eea_tx_desc); + ctx->cfg.tx_cq_desc_size = sizeof(struct eea_tx_cdesc); + + ctx->cfg.tx_cq_desc_size /= 2; + + if (!ctx->cfg.split_hdr) + ctx->cfg.rx_sq_desc_size /= 2; + + for (i = 0; i < ctx->cfg.rx_ring_num; i++) { + rx = eea_alloc_rx(ctx, i); + if (!rx) + goto err_free; + + ctx->rx[i] = rx; + + tx = ctx->tx + i; + err = eea_alloc_tx(ctx, tx, i); + if (err) + goto err_free; + } + + return 0; + +err_free: + for (i = 0; i < ctx->cfg.rx_ring_num; i++) { + rx = ctx->rx[i]; + tx = ctx->tx + i; + + eea_free_rx(rx); + eea_free_tx(tx); + } + + kvfree(ctx->rx); + +err_free_tx: + kvfree(ctx->tx); + return -ENOMEM; +} + +static int eea_active_ring_and_irq(struct eea_net *enet) +{ + int err; + + err = eea_adminq_create_q(enet, /* qidx = */ 0, + enet->cfg.rx_ring_num + + enet->cfg.tx_ring_num, 0); + if (err) + return err; + + err = enet_rxtx_irq_setup(enet, 0, enet->cfg.rx_ring_num); + if (err) { + eea_adminq_destroy_all_q(enet); + return err; + } + + return 0; +} + +static int eea_unactive_ring_and_irq(struct eea_net *enet) +{ + struct eea_net_rx *rx; + int err, i; + + err = eea_adminq_destroy_all_q(enet); + if (err) + netdev_warn(enet->netdev, "unactive rxtx ring failed.\n"); + + for (i = 0; i < enet->cfg.rx_ring_num; i++) { + rx = enet->rx[i]; + eea_irq_free(rx); + } + + return err; +} + +/* stop rx napi, stop tx queue. */ +static int eea_stop_rxtx(struct net_device *netdev) +{ + struct eea_net *enet = netdev_priv(netdev); + int i; + + netif_tx_disable(netdev); + + for (i = 0; i < enet->cfg.rx_ring_num; i++) + enet_rx_stop(enet->rx[i]); + + netif_carrier_off(netdev); + + return 0; +} + +static int eea_start_rxtx(struct net_device *netdev) +{ + struct eea_net *enet = netdev_priv(netdev); + int i, err; + + err = netif_set_real_num_queues(netdev, enet->cfg.tx_ring_num, + enet->cfg.rx_ring_num); + if (err) + return err; + + for (i = 0; i < enet->cfg.rx_ring_num; i++) + enet_rx_start(enet->rx[i]); + + netif_tx_start_all_queues(netdev); + netif_carrier_on(netdev); + + enet->started = true; + + return 0; +} + +static int eea_netdev_stop(struct net_device *netdev) +{ + struct eea_net *enet = netdev_priv(netdev); + + /* This function can be called during device anomaly recovery. To + * prevent duplicate stop operations, the `started` flag is introduced + * for checking. + */ + + if (!enet->started) { + netdev_warn(netdev, "eea netdev stop: but dev is not started.\n"); + return 0; + } + + eea_stop_rxtx(netdev); + eea_unactive_ring_and_irq(enet); + eea_free_rxtx_q_mem(enet); + + enet->started = false; + + return 0; +} + +static int eea_netdev_open(struct net_device *netdev) +{ + struct eea_net *enet = netdev_priv(netdev); + struct eea_net_init_ctx ctx; + int err; + + if (enet->link_err) { + netdev_err(netdev, "netdev open err, because link error: %d\n", + enet->link_err); + return -EBUSY; + } + + enet_init_ctx(enet, &ctx); + + err = eea_alloc_rxtx_q_mem(&ctx); + if (err) + return err; + + enet_bind_new_q_and_cfg(enet, &ctx); + + err = eea_active_ring_and_irq(enet); + if (err) + return err; + + return eea_start_rxtx(netdev); +} + +/* resources: ring, buffers, irq */ +int eea_reset_hw_resources(struct eea_net *enet, struct eea_net_init_ctx *ctx) +{ + int err; + + if (!netif_running(enet->netdev)) { + enet->cfg = ctx->cfg; + return 0; + } + + err = eea_alloc_rxtx_q_mem(ctx); + if (err) { + netdev_warn(enet->netdev, + "eea reset: alloc q failed. stop reset. err %d\n", + err); + return err; + } + + eea_netdev_stop(enet->netdev); + + enet_bind_new_q_and_cfg(enet, ctx); + + err = eea_active_ring_and_irq(enet); + if (err) { + netdev_warn(enet->netdev, + "eea reset: active new ring and irq failed. err %d\n", + err); + return err; + } + + err = eea_start_rxtx(enet->netdev); + if (err) + netdev_warn(enet->netdev, + "eea reset: start queue failed. err %d\n", err); + + return err; +} + +int eea_queues_check_and_reset(struct eea_device *edev) +{ + struct eea_aq_queue_status *qstatus; + struct eea_aq_dev_status *dstatus; + struct eea_aq_queue_status *qs; + struct eea_net_init_ctx ctx; + bool need_reset = false; + int num, i, err = 0; + + rtnl_lock(); + + num = edev->enet->cfg.tx_ring_num * 2 + 1; + + dstatus = eea_adminq_dev_status(edev->enet); + if (!dstatus) { + netdev_warn(edev->enet->netdev, "query queue status failed.\n"); + err = -ENOMEM; + goto err_unlock; + } + + if (le16_to_cpu(dstatus->link_status) == EEA_LINK_DOWN_STATUS) { + eea_netdev_stop(edev->enet->netdev); + edev->enet->link_err = EEA_LINK_ERR_LINK_DOWN; + netdev_warn(edev->enet->netdev, "device link is down. stop device.\n"); + goto err_free; + } + + qstatus = dstatus->q_status; + + for (i = 0; i < num; ++i) { + qs = &qstatus[i]; + + if (le16_to_cpu(qs->status) == EEA_QUEUE_STATUS_NEED_RESET) { + netdev_warn(edev->enet->netdev, + "queue status: queue %u needs to reset\n", + le16_to_cpu(qs->qidx)); + need_reset = true; + } + } + + if (need_reset) { + enet_init_ctx(edev->enet, &ctx); + err = eea_reset_hw_resources(edev->enet, &ctx); + } + +err_free: + kfree(dstatus); + +err_unlock: + rtnl_unlock(); + return err; +} + static void eea_update_cfg(struct eea_net *enet, struct eea_device *edev, struct eea_aq_cfg *hwcfg) @@ -107,8 +433,12 @@ static int eea_netdev_init_features(struct net_device *netdev, } static const struct net_device_ops eea_netdev = { + .ndo_open = eea_netdev_open, + .ndo_stop = eea_netdev_stop, + .ndo_start_xmit = eea_tx_xmit, .ndo_validate_addr = eth_validate_addr, .ndo_features_check = passthru_features_check, + .ndo_tx_timeout = eea_tx_timeout, }; static struct eea_net *eea_netdev_alloc(struct eea_device *edev, u32 pairs) @@ -134,11 +464,48 @@ static struct eea_net *eea_netdev_alloc(struct eea_device *edev, u32 pairs) return enet; } +static void eea_update_ts_off(struct eea_device *edev, struct eea_net *enet) +{ + u64 ts; + + ts = eea_pci_device_ts(edev); + + enet->hw_ts_offset = ktime_get_real() - ts; +} + +static int eea_net_reprobe(struct eea_device *edev) +{ + struct eea_net *enet = edev->enet; + int err = 0; + + enet->edev = edev; + + if (!enet->adminq.ring) { + err = eea_create_adminq(enet, edev->rx_num + edev->tx_num); + if (err) + return err; + } + + eea_update_ts_off(edev, enet); + + if (edev->ha_reset_netdev_running) { + rtnl_lock(); + enet->link_err = 0; + err = eea_netdev_open(enet->netdev); + rtnl_unlock(); + } + + return err; +} + int eea_net_probe(struct eea_device *edev) { struct eea_net *enet; int err = -ENOMEM; + if (edev->ha_reset) + return eea_net_reprobe(edev); + enet = eea_netdev_alloc(edev, edev->rx_num); if (!enet) return -ENOMEM; @@ -159,6 +526,7 @@ int eea_net_probe(struct eea_device *edev) if (err) goto err_reset_dev; + eea_update_ts_off(edev, enet); netif_carrier_off(enet->netdev); netdev_dbg(enet->netdev, "eea probe success.\n"); @@ -182,12 +550,25 @@ void eea_net_remove(struct eea_device *edev) enet = edev->enet; netdev = enet->netdev; - unregister_netdev(netdev); - netdev_dbg(enet->netdev, "eea removed.\n"); + if (edev->ha_reset) { + edev->ha_reset_netdev_running = false; + if (netif_running(enet->netdev)) { + rtnl_lock(); + eea_netdev_stop(enet->netdev); + enet->link_err = EEA_LINK_ERR_HA_RESET_DEV; + enet->edev = NULL; + rtnl_unlock(); + edev->ha_reset_netdev_running = true; + } + } else { + unregister_netdev(netdev); + netdev_dbg(enet->netdev, "eea removed.\n"); + } eea_device_reset(edev); eea_destroy_adminq(enet); - free_netdev(netdev); + if (!edev->ha_reset) + free_netdev(netdev); } diff --git a/drivers/net/ethernet/alibaba/eea/eea_net.h b/drivers/net/ethernet/alibaba/eea/eea_net.h index b35d7483de634..b451f6765480c 100644 --- a/drivers/net/ethernet/alibaba/eea/eea_net.h +++ b/drivers/net/ethernet/alibaba/eea/eea_net.h @@ -18,6 +18,13 @@ #define EEA_VER_MINOR 0 #define EEA_VER_SUB_MINOR 0 +struct eea_tx_meta; + +struct eea_reprobe { + struct eea_net *enet; + bool running_before_reprobe; +}; + struct eea_net_tx { struct eea_net *enet; @@ -104,6 +111,18 @@ struct eea_net_cfg { u8 tx_cq_desc_size; u32 split_hdr; + + struct hwtstamp_config ts_cfg; +}; + +struct eea_net_init_ctx { + struct eea_net_cfg cfg; + + struct eea_net_tx *tx; + struct eea_net_rx **rx; + + struct net_device *netdev; + struct eea_device *edev; }; enum { @@ -135,9 +154,38 @@ struct eea_net { u64 hw_ts_offset; }; +int eea_tx_resize(struct eea_net *enet, struct eea_net_tx *tx, u32 ring_num); + int eea_net_probe(struct eea_device *edev); void eea_net_remove(struct eea_device *edev); int eea_net_freeze(struct eea_device *edev); int eea_net_restore(struct eea_device *edev); +int eea_reset_hw_resources(struct eea_net *enet, struct eea_net_init_ctx *ctx); +void enet_init_ctx(struct eea_net *enet, struct eea_net_init_ctx *ctx); +int eea_queues_check_and_reset(struct eea_device *edev); + +/* rx apis */ +int eea_poll(struct napi_struct *napi, int budget); + +void enet_rx_stop(struct eea_net_rx *rx); +void enet_rx_start(struct eea_net_rx *rx); + +void eea_free_rx(struct eea_net_rx *rx); +struct eea_net_rx *eea_alloc_rx(struct eea_net_init_ctx *ctx, u32 idx); + +void eea_irq_free(struct eea_net_rx *rx); + +int enet_rxtx_irq_setup(struct eea_net *enet, u32 qid, u32 num); + +/* tx apis */ +int eea_poll_tx(struct eea_net_tx *tx, int budget); +void eea_poll_cleantx(struct eea_net_rx *rx); +netdev_tx_t eea_tx_xmit(struct sk_buff *skb, struct net_device *netdev); + +void eea_tx_timeout(struct net_device *netdev, u32 txqueue); + +void eea_free_tx(struct eea_net_tx *tx); +int eea_alloc_tx(struct eea_net_init_ctx *ctx, struct eea_net_tx *tx, u32 idx); + #endif diff --git a/drivers/net/ethernet/alibaba/eea/eea_pci.c b/drivers/net/ethernet/alibaba/eea/eea_pci.c index ce320c3953a88..06f733147f8d5 100644 --- a/drivers/net/ethernet/alibaba/eea/eea_pci.c +++ b/drivers/net/ethernet/alibaba/eea/eea_pci.c @@ -13,6 +13,9 @@ #define EEA_PCI_DB_OFFSET 4096 +#define EEA_PCI_CAP_RESET_DEVICE 0xFA +#define EEA_PCI_CAP_RESET_FLAG BIT(1) + struct eea_pci_cfg { __le32 reserve0; __le32 reserve1; @@ -51,6 +54,7 @@ struct eea_pci_device { void __iomem *reg; void __iomem *db_base; + struct work_struct ha_handle_work; char ha_irq_name[32]; u8 reset_pos; }; @@ -67,6 +71,11 @@ struct eea_pci_device { #define cfg_read32(reg, item) ioread32(cfg_pointer(reg, item)) #define cfg_readq(reg, item) readq(cfg_pointer(reg, item)) +/* Due to circular references, we have to add function definitions here. */ +static int __eea_pci_probe(struct pci_dev *pci_dev, + struct eea_pci_device *ep_dev); +static void __eea_pci_remove(struct pci_dev *pci_dev, bool flush_ha_work); + const char *eea_pci_name(struct eea_device *edev) { return pci_name(edev->ep_dev->pci_dev); @@ -248,6 +257,153 @@ void eea_pci_active_aq(struct eea_ring *ering) cfg_read32(ep_dev->reg, aq_db_off)); } +void eea_pci_free_irq(struct eea_ring *ering, void *data) +{ + struct eea_pci_device *ep_dev = ering->edev->ep_dev; + int irq; + + irq = pci_irq_vector(ep_dev->pci_dev, ering->msix_vec); + irq_update_affinity_hint(irq, NULL); + free_irq(irq, data); +} + +int eea_pci_request_irq(struct eea_ring *ering, + irqreturn_t (*callback)(int irq, void *data), + void *data) +{ + struct eea_pci_device *ep_dev = ering->edev->ep_dev; + int irq; + + snprintf(ering->irq_name, sizeof(ering->irq_name), "eea-q%d@%s", + ering->index / 2, pci_name(ep_dev->pci_dev)); + + irq = pci_irq_vector(ep_dev->pci_dev, ering->msix_vec); + + return request_irq(irq, callback, 0, ering->irq_name, data); +} + +static int eea_ha_handle_reset(struct eea_pci_device *ep_dev) +{ + struct eea_device *edev; + struct pci_dev *pci_dev; + u16 reset; + + if (!ep_dev->reset_pos) + return 0; + + edev = &ep_dev->edev; + + pci_read_config_word(ep_dev->pci_dev, ep_dev->reset_pos, &reset); + + /* clear bit */ + pci_write_config_word(ep_dev->pci_dev, ep_dev->reset_pos, 0xFFFF); + + if (reset & EEA_PCI_CAP_RESET_FLAG) { + dev_warn(&ep_dev->pci_dev->dev, "recv device reset request.\n"); + + pci_dev = ep_dev->pci_dev; + + /* The pci remove callback may hold this lock. If the + * pci remove callback is called, then we can ignore the + * ha interrupt. + */ + if (mutex_trylock(&edev->ha_lock)) { + edev->ha_reset = true; + + __eea_pci_remove(pci_dev, false); + __eea_pci_probe(pci_dev, ep_dev); + + edev->ha_reset = false; + mutex_unlock(&edev->ha_lock); + } else { + dev_warn(&ep_dev->pci_dev->dev, + "ha device reset: trylock failed.\n"); + } + + return 1; + } + + return 0; +} + +/* ha handle code */ +static void eea_ha_handle_work(struct work_struct *work) +{ + struct eea_pci_device *ep_dev; + int done; + + ep_dev = container_of(work, struct eea_pci_device, ha_handle_work); + + /* Ha interrupt is triggered, so there maybe some error, we may need to + * reset the device or reset some queues. + */ + dev_warn(&ep_dev->pci_dev->dev, "recv ha interrupt.\n"); + + done = eea_ha_handle_reset(ep_dev); + if (done) + return; + + eea_queues_check_and_reset(&ep_dev->edev); +} + +static irqreturn_t eea_pci_ha_handle(int irq, void *data) +{ + struct eea_device *edev = data; + + schedule_work(&edev->ep_dev->ha_handle_work); + + return IRQ_HANDLED; +} + +static void eea_pci_free_ha_irq(struct eea_device *edev) +{ + struct eea_pci_device *ep_dev = edev->ep_dev; + int irq = pci_irq_vector(ep_dev->pci_dev, 0); + + free_irq(irq, edev); +} + +static int eea_pci_ha_init(struct eea_device *edev, struct pci_dev *pci_dev) +{ + u8 pos, cfg_type_off, type, cfg_drv_off, cfg_dev_off; + struct eea_pci_device *ep_dev = edev->ep_dev; + int irq; + + cfg_type_off = offsetof(struct eea_pci_cap, cfg_type); + cfg_drv_off = offsetof(struct eea_pci_reset_reg, driver); + cfg_dev_off = offsetof(struct eea_pci_reset_reg, device); + + for (pos = pci_find_capability(pci_dev, PCI_CAP_ID_VNDR); + pos > 0; + pos = pci_find_next_capability(pci_dev, pos, PCI_CAP_ID_VNDR)) { + pci_read_config_byte(pci_dev, pos + cfg_type_off, &type); + + if (type == EEA_PCI_CAP_RESET_DEVICE) { + /* notify device, driver support this feature. */ + pci_write_config_word(pci_dev, pos + cfg_drv_off, + EEA_PCI_CAP_RESET_FLAG); + pci_write_config_word(pci_dev, pos + cfg_dev_off, + 0xFFFF); + + edev->ep_dev->reset_pos = pos + cfg_dev_off; + goto found; + } + } + + dev_warn(&edev->ep_dev->pci_dev->dev, "Not Found reset cap.\n"); + +found: + snprintf(ep_dev->ha_irq_name, sizeof(ep_dev->ha_irq_name), "eea-ha@%s", + pci_name(ep_dev->pci_dev)); + + irq = pci_irq_vector(ep_dev->pci_dev, 0); + + INIT_WORK(&ep_dev->ha_handle_work, eea_ha_handle_work); + + return request_irq(irq, eea_pci_ha_handle, 0, + ep_dev->ha_irq_name, edev); +} + u64 eea_pci_device_ts(struct eea_device *edev) { struct eea_pci_device *ep_dev = edev->ep_dev; @@ -282,10 +438,13 @@ static int eea_init_device(struct eea_device *edev) static int __eea_pci_probe(struct pci_dev *pci_dev, struct eea_pci_device *ep_dev) { + struct eea_device *edev; int err; pci_set_drvdata(pci_dev, ep_dev); + edev = &ep_dev->edev; + err = eea_pci_setup(pci_dev, ep_dev); if (err) return err; @@ -294,8 +453,15 @@ static int __eea_pci_probe(struct pci_dev *pci_dev, if (err) goto err_pci_rel; + err = eea_pci_ha_init(edev, pci_dev); + if (err) + goto err_net_rm; + return 0; +err_net_rm: + eea_net_remove(edev); + err_pci_rel: eea_pci_release_resource(ep_dev); return err; @@ -307,6 +473,11 @@ static void __eea_pci_remove(struct pci_dev *pci_dev, bool flush_ha_work) struct device *dev = get_device(&ep_dev->pci_dev->dev); struct eea_device *edev = &ep_dev->edev; + eea_pci_free_ha_irq(edev); + + if (flush_ha_work) + flush_work(&ep_dev->ha_handle_work); + eea_net_remove(edev); pci_disable_sriov(pci_dev); @@ -334,6 +505,8 @@ static int eea_pci_probe(struct pci_dev *pci_dev, ep_dev->pci_dev = pci_dev; + mutex_init(&edev->ha_lock); + err = __eea_pci_probe(pci_dev, ep_dev); if (err) kfree(ep_dev); @@ -344,8 +517,13 @@ static int eea_pci_probe(struct pci_dev *pci_dev, static void eea_pci_remove(struct pci_dev *pci_dev) { struct eea_pci_device *ep_dev = pci_get_drvdata(pci_dev); + struct eea_device *edev; + + edev = &ep_dev->edev; + mutex_lock(&edev->ha_lock); __eea_pci_remove(pci_dev, true); + mutex_unlock(&edev->ha_lock); kfree(ep_dev); } diff --git a/drivers/net/ethernet/alibaba/eea/eea_pci.h b/drivers/net/ethernet/alibaba/eea/eea_pci.h index d793128e556ca..cdddb465d9564 100644 --- a/drivers/net/ethernet/alibaba/eea/eea_pci.h +++ b/drivers/net/ethernet/alibaba/eea/eea_pci.h @@ -10,6 +10,7 @@ #include +#include "eea_net.h" #include "eea_ring.h" struct eea_pci_cap { @@ -34,6 +35,12 @@ struct eea_device { u64 features; + bool ha_reset; + bool ha_reset_netdev_running; + + /* ha lock for the race between ha work and pci remove */ + struct mutex ha_lock; + u32 rx_num; u32 tx_num; u32 db_blk_size; @@ -47,7 +54,14 @@ int eea_device_reset(struct eea_device *dev); void eea_device_ready(struct eea_device *dev); void eea_pci_active_aq(struct eea_ring *ering); +int eea_pci_request_irq(struct eea_ring *ering, + irqreturn_t (*callback)(int irq, void *data), + void *data); +void eea_pci_free_irq(struct eea_ring *ering, void *data); + u64 eea_pci_device_ts(struct eea_device *edev); +int eea_pci_set_affinity(struct eea_ring *ering, + const struct cpumask *cpu_mask); void __iomem *eea_pci_db_addr(struct eea_device *edev, u32 off); #endif diff --git a/drivers/net/ethernet/alibaba/eea/eea_rx.c b/drivers/net/ethernet/alibaba/eea/eea_rx.c new file mode 100644 index 0000000000000..4a802cf87de02 --- /dev/null +++ b/drivers/net/ethernet/alibaba/eea/eea_rx.c @@ -0,0 +1,764 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Driver for Alibaba Elastic Ethernet Adapter. + * + * Copyright (C) 2025 Alibaba Inc. + */ + +#include +#include + +#include "eea_adminq.h" +#include "eea_net.h" +#include "eea_ring.h" + +#define EEA_SETUP_F_NAPI BIT(0) +#define EEA_SETUP_F_IRQ BIT(1) +#define EEA_ENABLE_F_NAPI BIT(2) + +#define EEA_PAGE_FRAGS_NUM 1024 + +#define EEA_RX_BUF_ALIGN 128 + +struct eea_rx_ctx { + void *buf; + + u32 len; + u32 hdr_len; + + u16 flags; + bool more; + + u32 frame_sz; + + struct eea_rx_meta *meta; +}; + +static struct eea_rx_meta *eea_rx_meta_get(struct eea_net_rx *rx) +{ + struct eea_rx_meta *meta; + + if (!rx->free) + return NULL; + + meta = rx->free; + rx->free = meta->next; + + return meta; +} + +static void eea_rx_meta_put(struct eea_net_rx *rx, struct eea_rx_meta *meta) +{ + meta->next = rx->free; + rx->free = meta; +} + +static void eea_free_rx_buffer(struct eea_net_rx *rx, struct eea_rx_meta *meta) +{ + u32 drain_count; + + drain_count = EEA_PAGE_FRAGS_NUM - meta->frags; + + if (page_pool_unref_page(meta->page, drain_count) == 0) + page_pool_put_unrefed_page(rx->pp, meta->page, -1, true); + + meta->page = NULL; +} + +static void meta_align_offset(struct eea_net_rx *rx, struct eea_rx_meta *meta) +{ + int h, b; + + h = rx->headroom; + b = meta->offset + h; + + /* For better performance, we align the buffer address to + * EEA_RX_BUF_ALIGN, as required by the device design. + */ + b = ALIGN(b, EEA_RX_BUF_ALIGN); + + meta->offset = b - h; +} + +static int eea_alloc_rx_buffer(struct eea_net_rx *rx, struct eea_rx_meta *meta) +{ + struct page *page; + + if (meta->page) + return 0; + + page = page_pool_dev_alloc_pages(rx->pp); + if (!page) + return -ENOMEM; + + page_pool_fragment_page(page, EEA_PAGE_FRAGS_NUM); + + meta->page = page; + meta->dma = page_pool_get_dma_addr(page); + meta->offset = 0; + meta->frags = 0; + + meta_align_offset(rx, meta); + + return 0; +} + +static void eea_consume_rx_buffer(struct eea_net_rx *rx, + struct eea_rx_meta *meta, + u32 consumed) +{ + int min; + + meta->offset += consumed; + ++meta->frags; + + min = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + min += rx->headroom; + min += ETH_DATA_LEN; + + meta_align_offset(rx, meta); + + if (min + meta->offset > PAGE_SIZE) + eea_free_rx_buffer(rx, meta); +} + +static void eea_free_rx_hdr(struct eea_net_rx *rx) +{ + struct eea_net *enet = rx->enet; + struct eea_rx_meta *meta; + int i; + + for (i = 0; i < enet->cfg.rx_ring_depth; ++i) { + meta = &rx->meta[i]; + meta->hdr_addr = NULL; + + if (!meta->hdr_page) + continue; + + dma_unmap_page(enet->edev->dma_dev, meta->hdr_dma, + PAGE_SIZE, DMA_FROM_DEVICE); + put_page(meta->hdr_page); + + meta->hdr_page = NULL; + } +} + +static int eea_alloc_rx_hdr(struct eea_net_init_ctx *ctx, struct eea_net_rx *rx) +{ + struct page *hdr_page = NULL; + struct eea_rx_meta *meta; + u32 offset = 0, hdrsize; + struct device *dmadev; + dma_addr_t dma; + int i; + + dmadev = ctx->edev->dma_dev; + hdrsize = ctx->cfg.split_hdr; + + for (i = 0; i < ctx->cfg.rx_ring_depth; ++i) { + meta = &rx->meta[i]; + + if (!hdr_page || offset + hdrsize > PAGE_SIZE) { + hdr_page = dev_alloc_page(); + if (!hdr_page) + return -ENOMEM; + + dma = dma_map_page(dmadev, hdr_page, 0, PAGE_SIZE, + DMA_FROM_DEVICE); + + if (unlikely(dma_mapping_error(dmadev, dma))) { + put_page(hdr_page); + return -ENOMEM; + } + + offset = 0; + meta->hdr_page = hdr_page; + meta->dma = dma; + } + + meta->hdr_dma = dma + offset; + meta->hdr_addr = page_address(hdr_page) + offset; + offset += hdrsize; + } + + return 0; +} + +static void eea_rx_meta_dma_sync_for_cpu(struct eea_net_rx *rx, + struct eea_rx_meta *meta, u32 len) +{ + dma_sync_single_for_cpu(rx->enet->edev->dma_dev, + meta->dma + meta->offset + meta->headroom, + len, DMA_FROM_DEVICE); +} + +static int eea_harden_check_overflow(struct eea_rx_ctx *ctx, + struct eea_net *enet) +{ + if (unlikely(ctx->len > ctx->meta->truesize - ctx->meta->room)) { + pr_debug("%s: rx error: len %u exceeds truesize %u\n", + enet->netdev->name, ctx->len, + ctx->meta->truesize - ctx->meta->room); + return -EINVAL; + } + + return 0; +} + +static int eea_harden_check_size(struct eea_rx_ctx *ctx, struct eea_net *enet) +{ + int err; + + err = eea_harden_check_overflow(ctx, enet); + if (err) + return err; + + if (unlikely(ctx->hdr_len + ctx->len < ETH_HLEN)) { + pr_debug("%s: short packet %u\n", enet->netdev->name, ctx->len); + return -EINVAL; + } + + return 0; +} + +static struct sk_buff *eea_build_skb(void *buf, u32 buflen, u32 headroom, + u32 len) +{ + struct sk_buff *skb; + + skb = build_skb(buf, buflen); + if (unlikely(!skb)) + return NULL; + + skb_reserve(skb, headroom); + skb_put(skb, len); + + return skb; +} + +static struct sk_buff *eea_rx_build_split_hdr_skb(struct eea_net_rx *rx, + struct eea_rx_ctx *ctx) +{ + struct eea_rx_meta *meta = ctx->meta; + struct sk_buff *skb; + u32 truesize; + + dma_sync_single_for_cpu(rx->enet->edev->dma_dev, meta->hdr_dma, + ctx->hdr_len, DMA_FROM_DEVICE); + + skb = napi_alloc_skb(&rx->napi, ctx->hdr_len); + if (unlikely(!skb)) + return NULL; + + truesize = meta->headroom + ctx->len; + + skb_put_data(skb, ctx->meta->hdr_addr, ctx->hdr_len); + + if (ctx->len) { + skb_add_rx_frag(skb, 0, meta->page, + meta->offset + meta->headroom, + ctx->len, truesize); + + eea_consume_rx_buffer(rx, meta, truesize); + } + + skb_mark_for_recycle(skb); + + return skb; +} + +static struct sk_buff *eea_rx_build_skb(struct eea_net_rx *rx, + struct eea_rx_ctx *ctx) +{ + struct eea_rx_meta *meta = ctx->meta; + u32 len, shinfo_size, truesize; + struct sk_buff *skb; + struct page *page; + void *buf, *pkt; + + page = meta->page; + if (!page) + return NULL; + + shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + + buf = page_address(page) + meta->offset; + pkt = buf + meta->headroom; + len = ctx->len; + truesize = meta->headroom + ctx->len + shinfo_size; + + skb = eea_build_skb(buf, truesize, pkt - buf, len); + if (unlikely(!skb)) + return NULL; + + eea_consume_rx_buffer(rx, meta, truesize); + skb_mark_for_recycle(skb); + + return skb; +} + +static int eea_skb_append_buf(struct eea_net_rx *rx, struct eea_rx_ctx *ctx) +{ + struct sk_buff *curr_skb = rx->pkt.curr_skb; + struct sk_buff *head_skb = rx->pkt.head_skb; + int num_skb_frags; + int offset; + + if (!curr_skb) + curr_skb = head_skb; + + num_skb_frags = skb_shinfo(curr_skb)->nr_frags; + if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { + struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); + + if (unlikely(!nskb)) + return -ENOMEM; + + if (curr_skb == head_skb) + skb_shinfo(curr_skb)->frag_list = nskb; + else + curr_skb->next = nskb; + + curr_skb = nskb; + head_skb->truesize += nskb->truesize; + num_skb_frags = 0; + + rx->pkt.curr_skb = curr_skb; + } + + if (curr_skb != head_skb) { + head_skb->data_len += ctx->len; + head_skb->len += ctx->len; + head_skb->truesize += ctx->meta->truesize; + } + + offset = ctx->meta->offset + ctx->meta->headroom; + + skb_add_rx_frag(curr_skb, num_skb_frags, ctx->meta->page, + offset, ctx->len, ctx->meta->truesize); + + eea_consume_rx_buffer(rx, ctx->meta, ctx->meta->headroom + ctx->len); + + return 0; +} + +static int process_remain_buf(struct eea_net_rx *rx, struct eea_rx_ctx *ctx) +{ + struct eea_net *enet = rx->enet; + + if (eea_harden_check_overflow(ctx, enet)) + goto err; + + if (eea_skb_append_buf(rx, ctx)) + goto err; + + return 0; + +err: + dev_kfree_skb(rx->pkt.head_skb); + rx->pkt.do_drop = true; + rx->pkt.head_skb = NULL; + return 0; +} + +static int process_first_buf(struct eea_net_rx *rx, struct eea_rx_ctx *ctx) +{ + struct eea_net *enet = rx->enet; + struct sk_buff *skb = NULL; + + if (eea_harden_check_size(ctx, enet)) + goto err; + + rx->pkt.data_valid = ctx->flags & EEA_DESC_F_DATA_VALID; + + if (ctx->hdr_len) + skb = eea_rx_build_split_hdr_skb(rx, ctx); + else + skb = eea_rx_build_skb(rx, ctx); + + if (unlikely(!skb)) + goto err; + + rx->pkt.head_skb = skb; + + return 0; + +err: + rx->pkt.do_drop = true; + return 0; +} + +static void eea_submit_skb(struct eea_net_rx *rx, struct sk_buff *skb, + struct eea_rx_cdesc *desc) +{ + struct eea_net *enet = rx->enet; + + if (rx->pkt.data_valid) + skb->ip_summed = CHECKSUM_UNNECESSARY; + + if (enet->cfg.ts_cfg.rx_filter == HWTSTAMP_FILTER_ALL) + skb_hwtstamps(skb)->hwtstamp = EEA_DESC_TS(desc) + + enet->hw_ts_offset; + + skb_record_rx_queue(skb, rx->index); + skb->protocol = eth_type_trans(skb, enet->netdev); + + napi_gro_receive(&rx->napi, skb); +} + +static void eea_rx_desc_to_ctx(struct eea_net_rx *rx, + struct eea_rx_ctx *ctx, + struct eea_rx_cdesc *desc) +{ + ctx->meta = &rx->meta[le16_to_cpu(desc->id)]; + ctx->len = le16_to_cpu(desc->len); + ctx->flags = le16_to_cpu(desc->flags); + + ctx->hdr_len = 0; + if (ctx->flags & EEA_DESC_F_SPLIT_HDR) + ctx->hdr_len = le16_to_cpu(desc->len_ex) & + EEA_RX_CDESC_HDR_LEN_MASK; + + ctx->more = ctx->flags & EEA_RING_DESC_F_MORE; +} + +static int eea_cleanrx(struct eea_net_rx *rx, int budget, + struct eea_rx_ctx *ctx) +{ + struct eea_rx_cdesc *desc; + struct eea_rx_meta *meta; + int packets; + + for (packets = 0; packets < budget; ) { + desc = ering_cq_get_desc(rx->ering); + if (!desc) + break; + + eea_rx_desc_to_ctx(rx, ctx, desc); + + meta = ctx->meta; + ctx->buf = page_address(meta->page) + meta->offset + + meta->headroom; + + if (unlikely(rx->pkt.do_drop)) + goto skip; + + eea_rx_meta_dma_sync_for_cpu(rx, meta, ctx->len); + + if (!rx->pkt.idx) + process_first_buf(rx, ctx); + else + process_remain_buf(rx, ctx); + + ++rx->pkt.idx; + + if (!ctx->more) { + if (likely(rx->pkt.head_skb)) + eea_submit_skb(rx, rx->pkt.head_skb, desc); + + ++packets; + } + +skip: + eea_rx_meta_put(rx, meta); + ering_cq_ack_desc(rx->ering, 1); + + if (!ctx->more) + memset(&rx->pkt, 0, sizeof(rx->pkt)); + } + + return packets; +} + +static bool eea_rx_post(struct eea_net *enet, struct eea_net_rx *rx) +{ + u32 tailroom, headroom, room, len; + struct eea_rx_meta *meta; + struct eea_rx_desc *desc; + int err = 0, num = 0; + dma_addr_t addr; + + tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + headroom = rx->headroom; + room = headroom + tailroom; + + while (true) { + meta = eea_rx_meta_get(rx); + if (!meta) + break; + + err = eea_alloc_rx_buffer(rx, meta); + if (err) { + eea_rx_meta_put(rx, meta); + break; + } + + len = PAGE_SIZE - meta->offset - room; + addr = meta->dma + meta->offset + headroom; + + desc = ering_sq_alloc_desc(rx->ering, meta->id, true, 0); + desc->addr = cpu_to_le64(addr); + desc->len = cpu_to_le16(len); + + if (meta->hdr_addr) + desc->hdr_addr = cpu_to_le64(meta->hdr_dma); + + ering_sq_commit_desc(rx->ering); + + meta->truesize = len + room; + meta->headroom = headroom; + meta->tailroom = tailroom; + meta->len = len; + ++num; + } + + if (num) + ering_kick(rx->ering); + + /* true means busy, napi should be called again. */ + return !!err; +} + +int eea_poll(struct napi_struct *napi, int budget) +{ + struct eea_net_rx *rx = container_of(napi, struct eea_net_rx, napi); + struct eea_net_tx *tx = &rx->enet->tx[rx->index]; + struct eea_net *enet = rx->enet; + struct eea_rx_ctx ctx = {}; + bool busy = false; + u32 received; + + eea_poll_tx(tx, budget); + + received = eea_cleanrx(rx, budget, &ctx); + + if (rx->ering->num_free > budget) + busy |= eea_rx_post(enet, rx); + + busy |= received >= budget; + + if (!busy) { + if (napi_complete_done(napi, received)) + ering_irq_active(rx->ering, tx->ering); + } + + if (busy) + return budget; + + return budget - 1; +} + +static void eea_free_rx_buffers(struct eea_net_rx *rx) +{ + struct eea_rx_meta *meta; + u32 i; + + for (i = 0; i < rx->enet->cfg.rx_ring_depth; ++i) { + meta = &rx->meta[i]; + if (!meta->page) + continue; + + eea_free_rx_buffer(rx, meta); + } +} + +static struct page_pool *eea_create_pp(struct eea_net_rx *rx, + struct eea_net_init_ctx *ctx, u32 idx) +{ + struct page_pool_params pp_params = {0}; + + pp_params.order = 0; + pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; + pp_params.pool_size = ctx->cfg.rx_ring_depth; + pp_params.nid = dev_to_node(ctx->edev->dma_dev); + pp_params.dev = ctx->edev->dma_dev; + pp_params.napi = &rx->napi; + pp_params.netdev = ctx->netdev; + pp_params.dma_dir = DMA_FROM_DEVICE; + pp_params.max_len = PAGE_SIZE; + + return page_pool_create(&pp_params); +} + +static void eea_destroy_page_pool(struct eea_net_rx *rx) +{ + if (rx->pp) + page_pool_destroy(rx->pp); +} + +static irqreturn_t irq_handler(int irq, void *data) +{ + struct eea_net_rx *rx = data; + + rx->irq_n++; + + napi_schedule_irqoff(&rx->napi); + + return IRQ_HANDLED; +} + +void enet_rx_stop(struct eea_net_rx *rx) +{ + if (rx->flags & EEA_ENABLE_F_NAPI) { + rx->flags &= ~EEA_ENABLE_F_NAPI; + napi_disable(&rx->napi); + } +} + +void enet_rx_start(struct eea_net_rx *rx) +{ + napi_enable(&rx->napi); + rx->flags |= EEA_ENABLE_F_NAPI; + + local_bh_disable(); + napi_schedule(&rx->napi); + local_bh_enable(); +} + +static int enet_irq_setup_for_q(struct eea_net_rx *rx) +{ + int err; + + ering_irq_unactive(rx->ering); + + err = eea_pci_request_irq(rx->ering, irq_handler, rx); + if (err) + return err; + + rx->flags |= EEA_SETUP_F_IRQ; + + return 0; +} + +void eea_irq_free(struct eea_net_rx *rx) +{ + if (rx->flags & EEA_SETUP_F_IRQ) { + eea_pci_free_irq(rx->ering, rx); + rx->flags &= ~EEA_SETUP_F_IRQ; + } +} + +int enet_rxtx_irq_setup(struct eea_net *enet, u32 qid, u32 num) +{ + struct eea_net_rx *rx; + int err, i; + + for (i = 0; i < enet->cfg.rx_ring_num; i++) { + rx = enet->rx[i]; + + err = enet_irq_setup_for_q(rx); + if (err) + goto err_free_irq; + } + + return 0; + +err_free_irq: + for (i = 0; i < enet->cfg.rx_ring_num; i++) { + rx = enet->rx[i]; + + eea_irq_free(rx); + } + return err; +} + +void eea_free_rx(struct eea_net_rx *rx) +{ + if (!rx) + return; + + if (rx->ering) { + ering_free(rx->ering); + rx->ering = NULL; + } + + if (rx->meta) { + eea_free_rx_buffers(rx); + eea_free_rx_hdr(rx); + kvfree(rx->meta); + rx->meta = NULL; + } + + if (rx->pp) { + eea_destroy_page_pool(rx); + rx->pp = NULL; + } + + if (rx->flags & EEA_SETUP_F_NAPI) { + rx->flags &= ~EEA_SETUP_F_NAPI; + netif_napi_del(&rx->napi); + } + + kfree(rx); +} + +static void eea_rx_meta_init(struct eea_net_rx *rx, u32 num) +{ + struct eea_rx_meta *meta; + int i; + + rx->free = NULL; + + for (i = 0; i < num; ++i) { + meta = &rx->meta[i]; + meta->id = i; + meta->next = rx->free; + rx->free = meta; + } +} + +struct eea_net_rx *eea_alloc_rx(struct eea_net_init_ctx *ctx, u32 idx) +{ + struct eea_ring *ering; + struct eea_net_rx *rx; + int err; + + rx = kzalloc(sizeof(*rx), GFP_KERNEL); + if (!rx) + return rx; + + rx->index = idx; + sprintf(rx->name, "rx.%u", idx); + + /* ering */ + ering = ering_alloc(idx * 2, ctx->cfg.rx_ring_depth, ctx->edev, + ctx->cfg.rx_sq_desc_size, + ctx->cfg.rx_cq_desc_size, + rx->name); + if (!ering) + goto err_free_rx; + + rx->ering = ering; + + rx->dma_dev = ctx->edev->dma_dev; + + /* meta */ + rx->meta = kvcalloc(ctx->cfg.rx_ring_depth, + sizeof(*rx->meta), GFP_KERNEL); + if (!rx->meta) + goto err_free_rx; + + eea_rx_meta_init(rx, ctx->cfg.rx_ring_depth); + + if (ctx->cfg.split_hdr) { + err = eea_alloc_rx_hdr(ctx, rx); + if (err) + goto err_free_rx; + } + + rx->pp = eea_create_pp(rx, ctx, idx); + if (IS_ERR(rx->pp)) { + err = PTR_ERR(rx->pp); + rx->pp = NULL; + goto err_free_rx; + } + + netif_napi_add(ctx->netdev, &rx->napi, eea_poll); + rx->flags |= EEA_SETUP_F_NAPI; + + return rx; + +err_free_rx: + eea_free_rx(rx); + return NULL; +} diff --git a/drivers/net/ethernet/alibaba/eea/eea_tx.c b/drivers/net/ethernet/alibaba/eea/eea_tx.c new file mode 100644 index 0000000000000..a9c4cd0d75ffb --- /dev/null +++ b/drivers/net/ethernet/alibaba/eea/eea_tx.c @@ -0,0 +1,380 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Driver for Alibaba Elastic Ethernet Adapter. + * + * Copyright (C) 2025 Alibaba Inc. + */ + +#include + +#include "eea_net.h" +#include "eea_pci.h" +#include "eea_ring.h" + +struct eea_sq_free_stats { + u64 packets; + u64 bytes; +}; + +struct eea_tx_meta { + struct eea_tx_meta *next; + + u32 id; + + union { + struct sk_buff *skb; + void *data; + }; + + u32 num; + + dma_addr_t dma_addr; + struct eea_tx_desc *desc; + u16 dma_len; +}; + +static struct eea_tx_meta *eea_tx_meta_get(struct eea_net_tx *tx) +{ + struct eea_tx_meta *meta; + + if (!tx->free) + return NULL; + + meta = tx->free; + tx->free = meta->next; + + return meta; +} + +static void eea_tx_meta_put_and_unmap(struct eea_net_tx *tx, + struct eea_tx_meta *meta) +{ + struct eea_tx_meta *head; + + head = meta; + + while (true) { + dma_unmap_single(tx->dma_dev, meta->dma_addr, + meta->dma_len, DMA_TO_DEVICE); + + meta->data = NULL; + + if (meta->next) { + meta = meta->next; + continue; + } + + break; + } + + meta->next = tx->free; + tx->free = head; +} + +static void eea_meta_free_xmit(struct eea_net_tx *tx, + struct eea_tx_meta *meta, + bool in_napi, + struct eea_tx_cdesc *desc, + struct eea_sq_free_stats *stats) +{ + struct sk_buff *skb = meta->skb; + + if (!skb) { + netdev_err(tx->enet->netdev, + "tx meta->skb is null. id %d num: %d\n", + meta->id, meta->num); + return; + } + + if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && desc)) { + struct skb_shared_hwtstamps ts = {}; + + ts.hwtstamp = EEA_DESC_TS(desc) + tx->enet->hw_ts_offset; + skb_tstamp_tx(skb, &ts); + } + + stats->bytes += meta->skb->len; + napi_consume_skb(meta->skb, in_napi); +} + +static u32 eea_clean_tx(struct eea_net_tx *tx) +{ + struct eea_sq_free_stats stats = {0}; + struct eea_tx_cdesc *desc; + struct eea_tx_meta *meta; + + while ((desc = ering_cq_get_desc(tx->ering))) { + ++stats.packets; + + meta = &tx->meta[le16_to_cpu(desc->id)]; + + eea_meta_free_xmit(tx, meta, true, desc, &stats); + + ering_cq_ack_desc(tx->ering, meta->num); + eea_tx_meta_put_and_unmap(tx, meta); + } + + return stats.packets; +} + +int eea_poll_tx(struct eea_net_tx *tx, int budget) +{ + struct eea_net *enet = tx->enet; + u32 index = tx - enet->tx; + struct netdev_queue *txq; + u32 cleaned; + + txq = netdev_get_tx_queue(enet->netdev, index); + + __netif_tx_lock(txq, raw_smp_processor_id()); + + cleaned = eea_clean_tx(tx); + + if (netif_tx_queue_stopped(txq) && cleaned > 0) + netif_tx_wake_queue(txq); + + __netif_tx_unlock(txq); + + return 0; +} + +static int eea_fill_desc_from_skb(const struct sk_buff *skb, + struct eea_ring *ering, + struct eea_tx_desc *desc) +{ + if (skb_is_gso(skb)) { + struct skb_shared_info *sinfo = skb_shinfo(skb); + + desc->gso_size = cpu_to_le16(sinfo->gso_size); + if (sinfo->gso_type & SKB_GSO_TCPV4) + desc->gso_type = EEA_TX_GSO_TCPV4; + + else if (sinfo->gso_type & SKB_GSO_TCPV6) + desc->gso_type = EEA_TX_GSO_TCPV6; + + else if (sinfo->gso_type & SKB_GSO_UDP_L4) + desc->gso_type = EEA_TX_GSO_UDP_L4; + + else + return -EINVAL; + + if (sinfo->gso_type & SKB_GSO_TCP_ECN) + desc->gso_type |= EEA_TX_GSO_ECN; + } else { + desc->gso_type = EEA_TX_GSO_NONE; + } + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + desc->csum_start = cpu_to_le16(skb_checksum_start_offset(skb)); + desc->csum_offset = cpu_to_le16(skb->csum_offset); + } + + return 0; +} + +static struct eea_tx_meta *eea_tx_desc_fill(struct eea_net_tx *tx, + dma_addr_t addr, u32 len, + bool is_last, void *data, u16 flags) +{ + struct eea_tx_meta *meta; + struct eea_tx_desc *desc; + + meta = eea_tx_meta_get(tx); + + desc = ering_sq_alloc_desc(tx->ering, meta->id, is_last, flags); + desc->addr = cpu_to_le64(addr); + desc->len = cpu_to_le16(len); + + meta->next = NULL; + meta->dma_len = len; + meta->dma_addr = addr; + meta->data = data; + meta->num = 1; + meta->desc = desc; + + return meta; +} + +static int eea_tx_add_skb_frag(struct eea_net_tx *tx, + struct eea_tx_meta *head_meta, + const skb_frag_t *frag, bool is_last) +{ + u32 len = skb_frag_size(frag); + struct eea_tx_meta *meta; + dma_addr_t addr; + + addr = skb_frag_dma_map(tx->dma_dev, frag, 0, len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(tx->dma_dev, addr))) + return -ENOMEM; + + meta = eea_tx_desc_fill(tx, addr, len, is_last, NULL, 0); + + meta->next = head_meta->next; + head_meta->next = meta; + + return 0; +} + +static int eea_tx_post_skb(struct eea_net_tx *tx, struct sk_buff *skb) +{ + const struct skb_shared_info *shinfo = skb_shinfo(skb); + u32 hlen = skb_headlen(skb); + struct eea_tx_meta *meta; + dma_addr_t addr; + int i, err; + u16 flags; + + addr = dma_map_single(tx->dma_dev, skb->data, hlen, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(tx->dma_dev, addr))) + return -ENOMEM; + + flags = skb->ip_summed == CHECKSUM_PARTIAL ? EEA_DESC_F_DO_CSUM : 0; + + meta = eea_tx_desc_fill(tx, addr, hlen, !shinfo->nr_frags, skb, flags); + + if (eea_fill_desc_from_skb(skb, tx->ering, meta->desc)) + goto err_cancel; + + for (i = 0; i < shinfo->nr_frags; i++) { + const skb_frag_t *frag = &shinfo->frags[i]; + bool is_last = i == (shinfo->nr_frags - 1); + + err = eea_tx_add_skb_frag(tx, meta, frag, is_last); + if (err) + goto err_cancel; + } + + meta->num = shinfo->nr_frags + 1; + ering_sq_commit_desc(tx->ering); + + return 0; + +err_cancel: + ering_sq_cancel(tx->ering); + eea_tx_meta_put_and_unmap(tx, meta); + return -ENOMEM; +} + +static void eea_tx_kick(struct eea_net_tx *tx) +{ + ering_kick(tx->ering); +} + +netdev_tx_t eea_tx_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + const struct skb_shared_info *shinfo = skb_shinfo(skb); + struct eea_net *enet = netdev_priv(netdev); + int qnum = skb_get_queue_mapping(skb); + struct eea_net_tx *tx = &enet->tx[qnum]; + struct netdev_queue *txq; + int err, n; + + txq = netdev_get_tx_queue(netdev, qnum); + + n = shinfo->nr_frags + 1; + + if (!netif_txq_maybe_stop(txq, tx->ering->num_free, n, n)) { + /* maybe the previous skbs was xmitted without kick. */ + eea_tx_kick(tx); + return NETDEV_TX_BUSY; + } + + skb_tx_timestamp(skb); + + err = eea_tx_post_skb(tx, skb); + if (unlikely(err)) + dev_kfree_skb_any(skb); + + if (!netdev_xmit_more() || netif_xmit_stopped(txq)) + eea_tx_kick(tx); + + return NETDEV_TX_OK; +} + +static void eea_free_meta(struct eea_net_tx *tx) +{ + struct eea_sq_free_stats stats; + struct eea_tx_meta *meta; + int i; + + while ((meta = eea_tx_meta_get(tx))) + meta->skb = NULL; + + for (i = 0; i < tx->enet->cfg.tx_ring_num; i++) { + meta = &tx->meta[i]; + + if (!meta->skb) + continue; + + eea_meta_free_xmit(tx, meta, false, NULL, &stats); + + meta->skb = NULL; + } + + kvfree(tx->meta); + tx->meta = NULL; +} + +void eea_tx_timeout(struct net_device *netdev, unsigned int txqueue) +{ + struct netdev_queue *txq = netdev_get_tx_queue(netdev, txqueue); + struct eea_net *priv = netdev_priv(netdev); + struct eea_net_tx *tx = &priv->tx[txqueue]; + + netdev_err(netdev, "TX timeout on queue: %u, tx: %s, ering: 0x%x, %u usecs ago\n", + txqueue, tx->name, tx->ering->index, + jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start))); +} + +void eea_free_tx(struct eea_net_tx *tx) +{ + if (!tx) + return; + + if (tx->ering) { + ering_free(tx->ering); + tx->ering = NULL; + } + + if (tx->meta) + eea_free_meta(tx); +} + +int eea_alloc_tx(struct eea_net_init_ctx *ctx, struct eea_net_tx *tx, u32 idx) +{ + struct eea_tx_meta *meta; + struct eea_ring *ering; + u32 i; + + sprintf(tx->name, "tx.%u", idx); + + ering = ering_alloc(idx * 2 + 1, ctx->cfg.tx_ring_depth, ctx->edev, + ctx->cfg.tx_sq_desc_size, + ctx->cfg.tx_cq_desc_size, + tx->name); + if (!ering) + goto err_free_tx; + + tx->ering = ering; + tx->index = idx; + tx->dma_dev = ctx->edev->dma_dev; + + /* meta */ + tx->meta = kvcalloc(ctx->cfg.tx_ring_depth, + sizeof(*tx->meta), GFP_KERNEL); + if (!tx->meta) + goto err_free_tx; + + for (i = 0; i < ctx->cfg.tx_ring_depth; ++i) { + meta = &tx->meta[i]; + meta->id = i; + meta->next = tx->free; + tx->free = meta; + } + + return 0; + +err_free_tx: + eea_free_tx(tx); + return -ENOMEM; +} From cb8149e5fbb8da9f58ebce91a40b1ee1865a33a9 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 10 Nov 2025 19:46:48 +0800 Subject: [PATCH 774/867] eea: introduce ethtool support Add basic driver framework for the Alibaba Elastic Ethernet Adapter(EEA). This commit introduces ethtool support. Reviewed-by: Andrew Lunn Reviewed-by: Dust Li Reviewed-by: Philo Lu Signed-off-by: Wen Gu Signed-off-by: Xuan Zhuo Signed-off-by: NipaLocal --- drivers/net/ethernet/alibaba/eea/Makefile | 1 + .../net/ethernet/alibaba/eea/eea_ethtool.c | 276 ++++++++++++++++++ .../net/ethernet/alibaba/eea/eea_ethtool.h | 50 ++++ drivers/net/ethernet/alibaba/eea/eea_net.c | 2 + drivers/net/ethernet/alibaba/eea/eea_net.h | 5 + drivers/net/ethernet/alibaba/eea/eea_rx.c | 29 +- drivers/net/ethernet/alibaba/eea/eea_tx.c | 24 +- 7 files changed, 383 insertions(+), 4 deletions(-) create mode 100644 drivers/net/ethernet/alibaba/eea/eea_ethtool.c create mode 100644 drivers/net/ethernet/alibaba/eea/eea_ethtool.h diff --git a/drivers/net/ethernet/alibaba/eea/Makefile b/drivers/net/ethernet/alibaba/eea/Makefile index fa34a005fa01a..8f8fbb8d2d9a2 100644 --- a/drivers/net/ethernet/alibaba/eea/Makefile +++ b/drivers/net/ethernet/alibaba/eea/Makefile @@ -4,5 +4,6 @@ eea-y := eea_ring.o \ eea_net.o \ eea_pci.o \ eea_adminq.o \ + eea_ethtool.o \ eea_tx.o \ eea_rx.o diff --git a/drivers/net/ethernet/alibaba/eea/eea_ethtool.c b/drivers/net/ethernet/alibaba/eea/eea_ethtool.c new file mode 100644 index 0000000000000..16621c1bec2de --- /dev/null +++ b/drivers/net/ethernet/alibaba/eea/eea_ethtool.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Driver for Alibaba Elastic Ethernet Adapter. + * + * Copyright (C) 2025 Alibaba Inc. + */ + +#include +#include + +#include "eea_adminq.h" + +struct eea_stat_desc { + char desc[ETH_GSTRING_LEN]; + size_t offset; +}; + +#define EEA_TX_STAT(m) {#m, offsetof(struct eea_tx_stats, m)} +#define EEA_RX_STAT(m) {#m, offsetof(struct eea_rx_stats, m)} + +static const struct eea_stat_desc eea_rx_stats_desc[] = { + EEA_RX_STAT(descs), + EEA_RX_STAT(kicks), +}; + +static const struct eea_stat_desc eea_tx_stats_desc[] = { + EEA_TX_STAT(descs), + EEA_TX_STAT(kicks), +}; + +#define EEA_TX_STATS_LEN ARRAY_SIZE(eea_tx_stats_desc) +#define EEA_RX_STATS_LEN ARRAY_SIZE(eea_rx_stats_desc) + +static void eea_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *info) +{ + struct eea_net *enet = netdev_priv(netdev); + struct eea_device *edev = enet->edev; + + strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); + strscpy(info->bus_info, eea_pci_name(edev), sizeof(info->bus_info)); +} + +static void eea_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) +{ + struct eea_net *enet = netdev_priv(netdev); + + ring->rx_max_pending = enet->cfg_hw.rx_ring_depth; + ring->tx_max_pending = enet->cfg_hw.tx_ring_depth; + ring->rx_pending = enet->cfg.rx_ring_depth; + ring->tx_pending = enet->cfg.tx_ring_depth; + + kernel_ring->tcp_data_split = enet->cfg.split_hdr ? + ETHTOOL_TCP_DATA_SPLIT_ENABLED : + ETHTOOL_TCP_DATA_SPLIT_DISABLED; +} + +static int eea_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) +{ + struct eea_net *enet = netdev_priv(netdev); + struct eea_net_init_ctx ctx; + bool need_update = false; + struct eea_net_cfg *cfg; + bool sh; + + enet_init_ctx(enet, &ctx); + + cfg = &ctx.cfg; + + if (ring->rx_pending != cfg->rx_ring_depth) + need_update = true; + + if (ring->tx_pending != cfg->tx_ring_depth) + need_update = true; + + sh = kernel_ring->tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED; + if (sh != !!(cfg->split_hdr)) + need_update = true; + + if (!need_update) + return 0; + + cfg->rx_ring_depth = ring->rx_pending; + cfg->tx_ring_depth = ring->tx_pending; + + cfg->split_hdr = sh ? enet->cfg_hw.split_hdr : 0; + + return eea_reset_hw_resources(enet, &ctx); +} + +static int eea_set_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + struct eea_net *enet = netdev_priv(netdev); + u16 queue_pairs = channels->combined_count; + struct eea_net_init_ctx ctx; + struct eea_net_cfg *cfg; + + enet_init_ctx(enet, &ctx); + + cfg = &ctx.cfg; + + cfg->rx_ring_num = queue_pairs; + cfg->tx_ring_num = queue_pairs; + + return eea_reset_hw_resources(enet, &ctx); +} + +static void eea_get_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + struct eea_net *enet = netdev_priv(netdev); + + channels->combined_count = enet->cfg.rx_ring_num; + channels->max_combined = enet->cfg_hw.rx_ring_num; +} + +static void eea_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +{ + struct eea_net *enet = netdev_priv(netdev); + u8 *p = data; + u32 i, j; + + if (stringset != ETH_SS_STATS) + return; + + for (i = 0; i < enet->cfg.rx_ring_num; i++) { + for (j = 0; j < EEA_RX_STATS_LEN; j++) + ethtool_sprintf(&p, "rx%u_%s", i, + eea_rx_stats_desc[j].desc); + } + + for (i = 0; i < enet->cfg.tx_ring_num; i++) { + for (j = 0; j < EEA_TX_STATS_LEN; j++) + ethtool_sprintf(&p, "tx%u_%s", i, + eea_tx_stats_desc[j].desc); + } +} + +static int eea_get_sset_count(struct net_device *netdev, int sset) +{ + struct eea_net *enet = netdev_priv(netdev); + + if (sset != ETH_SS_STATS) + return -EOPNOTSUPP; + + return enet->cfg.rx_ring_num * EEA_RX_STATS_LEN + + enet->cfg.tx_ring_num * EEA_TX_STATS_LEN; +} + +static void eea_stats_fill_for_q(struct u64_stats_sync *syncp, u32 num, + const struct eea_stat_desc *desc, + u64 *data, u32 idx) +{ + void *stats_base = syncp; + u32 start, i; + + do { + start = u64_stats_fetch_begin(syncp); + for (i = 0; i < num; i++) + data[idx + i] = + u64_stats_read(stats_base + desc[i].offset); + + } while (u64_stats_fetch_retry(syncp, start)); +} + +static void eea_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) +{ + struct eea_net *enet = netdev_priv(netdev); + u32 i, idx = 0; + + for (i = 0; i < enet->cfg.rx_ring_num; i++) { + struct eea_net_rx *rx = enet->rx[i]; + + eea_stats_fill_for_q(&rx->stats.syncp, EEA_RX_STATS_LEN, + eea_rx_stats_desc, data, idx); + + idx += EEA_RX_STATS_LEN; + } + + for (i = 0; i < enet->cfg.tx_ring_num; i++) { + struct eea_net_tx *tx = &enet->tx[i]; + + eea_stats_fill_for_q(&tx->stats.syncp, EEA_TX_STATS_LEN, + eea_tx_stats_desc, data, idx); + + idx += EEA_TX_STATS_LEN; + } +} + +void eea_update_rx_stats(struct eea_rx_stats *rx_stats, + struct eea_rx_ctx_stats *stats) +{ + u64_stats_update_begin(&rx_stats->syncp); + u64_stats_add(&rx_stats->descs, stats->descs); + u64_stats_add(&rx_stats->packets, stats->packets); + u64_stats_add(&rx_stats->bytes, stats->bytes); + u64_stats_add(&rx_stats->drops, stats->drops); + u64_stats_add(&rx_stats->split_hdr_bytes, stats->split_hdr_bytes); + u64_stats_add(&rx_stats->split_hdr_packets, stats->split_hdr_packets); + u64_stats_add(&rx_stats->length_errors, stats->length_errors); + u64_stats_update_end(&rx_stats->syncp); +} + +void eea_stats(struct net_device *netdev, struct rtnl_link_stats64 *tot) +{ + struct eea_net *enet = netdev_priv(netdev); + u64 packets, bytes; + u32 start; + int i; + + if (enet->rx) { + for (i = 0; i < enet->cfg.rx_ring_num; i++) { + struct eea_net_rx *rx = enet->rx[i]; + + do { + start = u64_stats_fetch_begin(&rx->stats.syncp); + packets = u64_stats_read(&rx->stats.packets); + bytes = u64_stats_read(&rx->stats.bytes); + } while (u64_stats_fetch_retry(&rx->stats.syncp, + start)); + + tot->rx_packets += packets; + tot->rx_bytes += bytes; + } + } + + if (enet->tx) { + for (i = 0; i < enet->cfg.tx_ring_num; i++) { + struct eea_net_tx *tx = &enet->tx[i]; + + do { + start = u64_stats_fetch_begin(&tx->stats.syncp); + packets = u64_stats_read(&tx->stats.packets); + bytes = u64_stats_read(&tx->stats.bytes); + } while (u64_stats_fetch_retry(&tx->stats.syncp, + start)); + + tot->tx_packets += packets; + tot->tx_bytes += bytes; + } + } +} + +static int eea_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) +{ + struct eea_net *enet = netdev_priv(netdev); + + cmd->base.speed = enet->speed; + cmd->base.duplex = enet->duplex; + cmd->base.port = PORT_OTHER; + + return 0; +} + +const struct ethtool_ops eea_ethtool_ops = { + .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT, + .get_drvinfo = eea_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_ringparam = eea_get_ringparam, + .set_ringparam = eea_set_ringparam, + .set_channels = eea_set_channels, + .get_channels = eea_get_channels, + .get_strings = eea_get_strings, + .get_sset_count = eea_get_sset_count, + .get_ethtool_stats = eea_get_ethtool_stats, + .get_link_ksettings = eea_get_link_ksettings, +}; diff --git a/drivers/net/ethernet/alibaba/eea/eea_ethtool.h b/drivers/net/ethernet/alibaba/eea/eea_ethtool.h new file mode 100644 index 0000000000000..1ee89b49addd2 --- /dev/null +++ b/drivers/net/ethernet/alibaba/eea/eea_ethtool.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Driver for Alibaba Elastic Ethernet Adapter. + * + * Copyright (C) 2025 Alibaba Inc. + */ + +#ifndef __EEA_ETHTOOL_H__ +#define __EEA_ETHTOOL_H__ + +struct eea_tx_stats { + struct u64_stats_sync syncp; + u64_stats_t descs; + u64_stats_t packets; + u64_stats_t bytes; + u64_stats_t drops; + u64_stats_t kicks; +}; + +struct eea_rx_ctx_stats { + u64 descs; + u64 packets; + u64 bytes; + u64 drops; + u64 split_hdr_bytes; + u64 split_hdr_packets; + + u64 length_errors; +}; + +struct eea_rx_stats { + struct u64_stats_sync syncp; + u64_stats_t descs; + u64_stats_t packets; + u64_stats_t bytes; + u64_stats_t drops; + u64_stats_t kicks; + u64_stats_t split_hdr_bytes; + u64_stats_t split_hdr_packets; + + u64_stats_t length_errors; +}; + +void eea_update_rx_stats(struct eea_rx_stats *rx_stats, + struct eea_rx_ctx_stats *stats); +void eea_stats(struct net_device *netdev, struct rtnl_link_stats64 *tot); + +extern const struct ethtool_ops eea_ethtool_ops; + +#endif diff --git a/drivers/net/ethernet/alibaba/eea/eea_net.c b/drivers/net/ethernet/alibaba/eea/eea_net.c index dc5895f5d4954..397a055337743 100644 --- a/drivers/net/ethernet/alibaba/eea/eea_net.c +++ b/drivers/net/ethernet/alibaba/eea/eea_net.c @@ -437,6 +437,7 @@ static const struct net_device_ops eea_netdev = { .ndo_stop = eea_netdev_stop, .ndo_start_xmit = eea_tx_xmit, .ndo_validate_addr = eth_validate_addr, + .ndo_get_stats64 = eea_stats, .ndo_features_check = passthru_features_check, .ndo_tx_timeout = eea_tx_timeout, }; @@ -454,6 +455,7 @@ static struct eea_net *eea_netdev_alloc(struct eea_device *edev, u32 pairs) } netdev->netdev_ops = &eea_netdev; + netdev->ethtool_ops = &eea_ethtool_ops; SET_NETDEV_DEV(netdev, edev->dma_dev); enet = netdev_priv(netdev); diff --git a/drivers/net/ethernet/alibaba/eea/eea_net.h b/drivers/net/ethernet/alibaba/eea/eea_net.h index b451f6765480c..d98a1a94d86e5 100644 --- a/drivers/net/ethernet/alibaba/eea/eea_net.h +++ b/drivers/net/ethernet/alibaba/eea/eea_net.h @@ -12,6 +12,7 @@ #include #include "eea_adminq.h" +#include "eea_ethtool.h" #include "eea_ring.h" #define EEA_VER_MAJOR 1 @@ -38,6 +39,8 @@ struct eea_net_tx { u32 index; char name[16]; + + struct eea_tx_stats stats; }; struct eea_rx_meta { @@ -90,6 +93,8 @@ struct eea_net_rx { struct napi_struct napi; + struct eea_rx_stats stats; + u16 irq_n; char name[16]; diff --git a/drivers/net/ethernet/alibaba/eea/eea_rx.c b/drivers/net/ethernet/alibaba/eea/eea_rx.c index 4a802cf87de02..e0a0a9e29e991 100644 --- a/drivers/net/ethernet/alibaba/eea/eea_rx.c +++ b/drivers/net/ethernet/alibaba/eea/eea_rx.c @@ -32,6 +32,8 @@ struct eea_rx_ctx { u32 frame_sz; struct eea_rx_meta *meta; + + struct eea_rx_ctx_stats stats; }; static struct eea_rx_meta *eea_rx_meta_get(struct eea_net_rx *rx) @@ -199,6 +201,7 @@ static int eea_harden_check_overflow(struct eea_rx_ctx *ctx, pr_debug("%s: rx error: len %u exceeds truesize %u\n", enet->netdev->name, ctx->len, ctx->meta->truesize - ctx->meta->room); + ++ctx->stats.length_errors; return -EINVAL; } @@ -215,6 +218,7 @@ static int eea_harden_check_size(struct eea_rx_ctx *ctx, struct eea_net *enet) if (unlikely(ctx->hdr_len + ctx->len < ETH_HLEN)) { pr_debug("%s: short packet %u\n", enet->netdev->name, ctx->len); + ++ctx->stats.length_errors; return -EINVAL; } @@ -356,6 +360,7 @@ static int process_remain_buf(struct eea_net_rx *rx, struct eea_rx_ctx *ctx) err: dev_kfree_skb(rx->pkt.head_skb); + ++ctx->stats.drops; rx->pkt.do_drop = true; rx->pkt.head_skb = NULL; return 0; @@ -384,6 +389,7 @@ static int process_first_buf(struct eea_net_rx *rx, struct eea_rx_ctx *ctx) return 0; err: + ++ctx->stats.drops; rx->pkt.do_drop = true; return 0; } @@ -415,9 +421,12 @@ static void eea_rx_desc_to_ctx(struct eea_net_rx *rx, ctx->flags = le16_to_cpu(desc->flags); ctx->hdr_len = 0; - if (ctx->flags & EEA_DESC_F_SPLIT_HDR) + if (ctx->flags & EEA_DESC_F_SPLIT_HDR) { ctx->hdr_len = le16_to_cpu(desc->len_ex) & EEA_RX_CDESC_HDR_LEN_MASK; + ctx->stats.split_hdr_bytes += ctx->hdr_len; + ++ctx->stats.split_hdr_packets; + } ctx->more = ctx->flags & EEA_RING_DESC_F_MORE; } @@ -445,6 +454,8 @@ static int eea_cleanrx(struct eea_net_rx *rx, int budget, eea_rx_meta_dma_sync_for_cpu(rx, meta, ctx->len); + ctx->stats.bytes += ctx->len; + if (!rx->pkt.idx) process_first_buf(rx, ctx); else @@ -462,17 +473,20 @@ static int eea_cleanrx(struct eea_net_rx *rx, int budget, skip: eea_rx_meta_put(rx, meta); ering_cq_ack_desc(rx->ering, 1); + ++ctx->stats.descs; if (!ctx->more) memset(&rx->pkt, 0, sizeof(rx->pkt)); } + ctx->stats.packets = packets; + return packets; } static bool eea_rx_post(struct eea_net *enet, struct eea_net_rx *rx) { - u32 tailroom, headroom, room, len; + u32 tailroom, headroom, room, flags, len; struct eea_rx_meta *meta; struct eea_rx_desc *desc; int err = 0, num = 0; @@ -512,9 +526,14 @@ static bool eea_rx_post(struct eea_net *enet, struct eea_net_rx *rx) ++num; } - if (num) + if (num) { ering_kick(rx->ering); + flags = u64_stats_update_begin_irqsave(&rx->stats.syncp); + u64_stats_inc(&rx->stats.kicks); + u64_stats_update_end_irqrestore(&rx->stats.syncp, flags); + } + /* true means busy, napi should be called again. */ return !!err; } @@ -535,6 +554,8 @@ int eea_poll(struct napi_struct *napi, int budget) if (rx->ering->num_free > budget) busy |= eea_rx_post(enet, rx); + eea_update_rx_stats(&rx->stats, &ctx.stats); + busy |= received >= budget; if (!busy) { @@ -720,6 +741,8 @@ struct eea_net_rx *eea_alloc_rx(struct eea_net_init_ctx *ctx, u32 idx) rx->index = idx; sprintf(rx->name, "rx.%u", idx); + u64_stats_init(&rx->stats.syncp); + /* ering */ ering = ering_alloc(idx * 2, ctx->cfg.rx_ring_depth, ctx->edev, ctx->cfg.rx_sq_desc_size, diff --git a/drivers/net/ethernet/alibaba/eea/eea_tx.c b/drivers/net/ethernet/alibaba/eea/eea_tx.c index a9c4cd0d75ffb..acee33add96a7 100644 --- a/drivers/net/ethernet/alibaba/eea/eea_tx.c +++ b/drivers/net/ethernet/alibaba/eea/eea_tx.c @@ -114,6 +114,13 @@ static u32 eea_clean_tx(struct eea_net_tx *tx) eea_tx_meta_put_and_unmap(tx, meta); } + if (stats.packets) { + u64_stats_update_begin(&tx->stats.syncp); + u64_stats_add(&tx->stats.bytes, stats.bytes); + u64_stats_add(&tx->stats.packets, stats.packets); + u64_stats_update_end(&tx->stats.syncp); + } + return stats.packets; } @@ -247,6 +254,10 @@ static int eea_tx_post_skb(struct eea_net_tx *tx, struct sk_buff *skb) meta->num = shinfo->nr_frags + 1; ering_sq_commit_desc(tx->ering); + u64_stats_update_begin(&tx->stats.syncp); + u64_stats_add(&tx->stats.descs, meta->num); + u64_stats_update_end(&tx->stats.syncp); + return 0; err_cancel: @@ -258,6 +269,10 @@ static int eea_tx_post_skb(struct eea_net_tx *tx, struct sk_buff *skb) static void eea_tx_kick(struct eea_net_tx *tx) { ering_kick(tx->ering); + + u64_stats_update_begin(&tx->stats.syncp); + u64_stats_inc(&tx->stats.kicks); + u64_stats_update_end(&tx->stats.syncp); } netdev_tx_t eea_tx_xmit(struct sk_buff *skb, struct net_device *netdev) @@ -282,8 +297,13 @@ netdev_tx_t eea_tx_xmit(struct sk_buff *skb, struct net_device *netdev) skb_tx_timestamp(skb); err = eea_tx_post_skb(tx, skb); - if (unlikely(err)) + if (unlikely(err)) { + u64_stats_update_begin(&tx->stats.syncp); + u64_stats_inc(&tx->stats.drops); + u64_stats_update_end(&tx->stats.syncp); + dev_kfree_skb_any(skb); + } if (!netdev_xmit_more() || netif_xmit_stopped(txq)) eea_tx_kick(tx); @@ -346,6 +366,8 @@ int eea_alloc_tx(struct eea_net_init_ctx *ctx, struct eea_net_tx *tx, u32 idx) struct eea_ring *ering; u32 i; + u64_stats_init(&tx->stats.syncp); + sprintf(tx->name, "tx.%u", idx); ering = ering_alloc(idx * 2 + 1, ctx->cfg.tx_ring_depth, ctx->edev, From cb55fbe15a96efb1c60c030991b739e24615265b Mon Sep 17 00:00:00 2001 From: Florian Fuchs Date: Mon, 10 Nov 2025 12:45:23 +0100 Subject: [PATCH 775/867] net: ps3_gelic_net: handle skb allocation failures Handle skb allocation failures in RX path, to avoid NULL pointer dereference and RX stalls under memory pressure. If the refill fails with -ENOMEM, complete napi polling and wake up later to retry via timer. Also explicitly re-enable RX DMA after oom, so the dmac doesn't remain stopped in this situation. Previously, memory pressure could lead to skb allocation failures and subsequent Oops like: Oops: Kernel access of bad area, sig: 11 [#2] Hardware name: SonyPS3 Cell Broadband Engine 0x701000 PS3 NIP [c0003d0000065900] gelic_net_poll+0x6c/0x2d0 [ps3_gelic] (unreliable) LR [c0003d00000659c4] gelic_net_poll+0x130/0x2d0 [ps3_gelic] Call Trace: gelic_net_poll+0x130/0x2d0 [ps3_gelic] (unreliable) __napi_poll+0x44/0x168 net_rx_action+0x178/0x290 Steps to reproduce the issue: 1. Start a continuous network traffic, like scp of a 20GB file 2. Inject failslab errors using the kernel fault injection: echo -1 > /sys/kernel/debug/failslab/times echo 30 > /sys/kernel/debug/failslab/interval echo 100 > /sys/kernel/debug/failslab/probability 3. After some time, traces start to appear, kernel Oopses and the system stops Step 2 is not always necessary, as it is usually already triggered by the transfer of a big enough file. Fixes: 02c1889166b4 ("ps3: gigabit ethernet driver for PS3, take3") Signed-off-by: Florian Fuchs Signed-off-by: NipaLocal --- drivers/net/ethernet/toshiba/ps3_gelic_net.c | 54 +++++++++++++++----- drivers/net/ethernet/toshiba/ps3_gelic_net.h | 1 + 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index 5ee8e8980393c..a8121f7583f9c 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -259,6 +259,7 @@ void gelic_card_down(struct gelic_card *card) mutex_lock(&card->updown_lock); if (atomic_dec_if_positive(&card->users) == 0) { pr_debug("%s: real do\n", __func__); + timer_delete_sync(&card->rx_oom_timer); napi_disable(&card->napi); /* * Disable irq. Wireless interrupts will @@ -970,7 +971,8 @@ static void gelic_net_pass_skb_up(struct gelic_descr *descr, * gelic_card_decode_one_descr - processes an rx descriptor * @card: card structure * - * returns 1 if a packet has been sent to the stack, otherwise 0 + * returns 1 if a packet has been sent to the stack, -ENOMEM on skb alloc + * failure, otherwise 0 * * processes an rx descriptor by iommu-unmapping the data buffer and passing * the packet up to the stack @@ -981,16 +983,17 @@ static int gelic_card_decode_one_descr(struct gelic_card *card) struct gelic_descr_chain *chain = &card->rx_chain; struct gelic_descr *descr = chain->head; struct net_device *netdev = NULL; - int dmac_chain_ended; + int dmac_chain_ended = 0; status = gelic_descr_get_status(descr); if (status == GELIC_DESCR_DMA_CARDOWNED) return 0; - if (status == GELIC_DESCR_DMA_NOT_IN_USE) { + if (status == GELIC_DESCR_DMA_NOT_IN_USE || !descr->skb) { dev_dbg(ctodev(card), "dormant descr? %p\n", descr); - return 0; + dmac_chain_ended = 1; + goto refill; } /* netdevice select */ @@ -1048,9 +1051,10 @@ static int gelic_card_decode_one_descr(struct gelic_card *card) refill: /* is the current descriptor terminated with next_descr == NULL? */ - dmac_chain_ended = - be32_to_cpu(descr->hw_regs.dmac_cmd_status) & - GELIC_DESCR_RX_DMA_CHAIN_END; + if (!dmac_chain_ended) + dmac_chain_ended = + be32_to_cpu(descr->hw_regs.dmac_cmd_status) & + GELIC_DESCR_RX_DMA_CHAIN_END; /* * So that always DMAC can see the end * of the descriptor chain to avoid @@ -1062,10 +1066,12 @@ static int gelic_card_decode_one_descr(struct gelic_card *card) gelic_descr_set_status(descr, GELIC_DESCR_DMA_NOT_IN_USE); /* - * this call can fail, but for now, just leave this - * descriptor without skb + * this call can fail, propagate the error */ - gelic_descr_prepare_rx(card, descr); + int ret = gelic_descr_prepare_rx(card, descr); + + if (ret) + return ret; chain->tail = descr; chain->head = descr->next; @@ -1087,6 +1093,17 @@ static int gelic_card_decode_one_descr(struct gelic_card *card) return 1; } +/** + * gelic_rx_oom_timer - Restart napi poll if oom occurred + * @t: timer list + */ +static void gelic_rx_oom_timer(struct timer_list *t) +{ + struct gelic_card *card = timer_container_of(card, t, rx_oom_timer); + + napi_schedule(&card->napi); +} + /** * gelic_net_poll - NAPI poll function called by the stack to return packets * @napi: napi structure @@ -1099,12 +1116,21 @@ static int gelic_net_poll(struct napi_struct *napi, int budget) { struct gelic_card *card = container_of(napi, struct gelic_card, napi); int packets_done = 0; + int work_result = 0; while (packets_done < budget) { - if (!gelic_card_decode_one_descr(card)) - break; + work_result = gelic_card_decode_one_descr(card); + if (work_result == 1) { + packets_done++; + continue; + } + break; + } - packets_done++; + if (work_result == -ENOMEM) { + napi_complete_done(napi, packets_done); + mod_timer(&card->rx_oom_timer, jiffies + 1); + return packets_done; } if (packets_done < budget) { @@ -1576,6 +1602,8 @@ static struct gelic_card *gelic_alloc_card_net(struct net_device **netdev) mutex_init(&card->updown_lock); atomic_set(&card->users, 0); + timer_setup(&card->rx_oom_timer, gelic_rx_oom_timer, 0); + return card; } diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.h b/drivers/net/ethernet/toshiba/ps3_gelic_net.h index f7d7931e51b77..c10f1984a5a11 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.h +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.h @@ -268,6 +268,7 @@ struct gelic_vlan_id { struct gelic_card { struct napi_struct napi; struct net_device *netdev[GELIC_PORT_MAX]; + struct timer_list rx_oom_timer; /* * hypervisor requires irq_status should be * 8 bytes aligned, but u64 member is From 0ab7cc83bf508b74c806018036c34de6eae6880d Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Mon, 10 Nov 2025 13:42:53 +0100 Subject: [PATCH 776/867] net: sparx5/lan969x: populate netdev of_node Populate of_node for the port netdevs, to make the individual ports of_nodes available in sysfs. Signed-off-by: Robert Marko Signed-off-by: NipaLocal --- drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index 40b1bfc600a79..582145713cfd1 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -395,6 +395,8 @@ static int sparx5_create_port(struct sparx5 *sparx5, spx5_port->phylink = phylink; + spx5_port->ndev->dev.of_node = spx5_port->of_node; + return 0; } From 7a805ef2f065526ef5998d24b8e14663e1feed04 Mon Sep 17 00:00:00 2001 From: Ilya Krutskih Date: Mon, 10 Nov 2025 13:44:22 +0000 Subject: [PATCH 777/867] net: fealnx: fixed possible out of band acces to an array fixed possible out of band access to an array If the fealnx_init_one() function is called more than MAX_UNITS times or card_idx is less than zero Added a check: 0 <= card_idx < MAX_UNITS Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Ilya Krutskih Signed-off-by: NipaLocal --- drivers/net/ethernet/fealnx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c index 3c9961806f756..2b731fcb7bc02 100644 --- a/drivers/net/ethernet/fealnx.c +++ b/drivers/net/ethernet/fealnx.c @@ -491,8 +491,8 @@ static int fealnx_init_one(struct pci_dev *pdev, card_idx++; sprintf(boardname, "fealnx%d", card_idx); - - option = card_idx < MAX_UNITS ? options[card_idx] : 0; + if (card_idx >= 0) + option = card_idx < MAX_UNITS ? options[card_idx] : 0; i = pci_enable_device(pdev); if (i) return i; @@ -623,7 +623,7 @@ static int fealnx_init_one(struct pci_dev *pdev, np->default_port = option & 15; } - if (card_idx < MAX_UNITS && full_duplex[card_idx] > 0) + if ((0 <= card_idx && MAX_UNITS > card_idx) && full_duplex[card_idx] > 0) np->mii.full_duplex = full_duplex[card_idx]; if (np->mii.full_duplex) { From 27d0f445bb5379f3d8b1f9b91d5195217ebaa614 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 10 Nov 2025 14:42:43 +0000 Subject: [PATCH 778/867] net: stmmac: meson8b: use PHY_INTF_SEL_x Use PHY_INTF_SEL_x definitions for phy_intf_sel bitfield. Reviewed-by: Martin Blumenstingl Signed-off-by: Russell King (Oracle) Signed-off-by: NipaLocal --- drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index a50782994b979..f485b9b858bf6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -26,8 +26,8 @@ #define PRG_ETH0_RGMII_MODE BIT(0) #define PRG_ETH0_EXT_PHY_MODE_MASK GENMASK(2, 0) -#define PRG_ETH0_EXT_RGMII_MODE 1 -#define PRG_ETH0_EXT_RMII_MODE 4 +#define PRG_ETH0_EXT_RGMII_MODE PHY_INTF_SEL_RGMII +#define PRG_ETH0_EXT_RMII_MODE PHY_INTF_SEL_RMII /* mux to choose between fclk_div2 (bit unset) and mpll2 (bit set) */ #define PRG_ETH0_CLK_M250_SEL_MASK GENMASK(4, 4) From 84f8f0b59175e2c63b318b8a82615d133b8ed538 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 10 Nov 2025 14:42:48 +0000 Subject: [PATCH 779/867] net: stmmac: meson8b: use phy_intf_sel directly Rearrange meson_axg_set_phy_mode() to use phy_intf_sel directly, converting it to the register field for meson8b_dwmac_mask_bits(). Reviewed-by: Martin Blumenstingl Signed-off-by: Russell King (Oracle) Signed-off-by: NipaLocal --- .../net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index f485b9b858bf6..865cd61661342 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -26,8 +26,6 @@ #define PRG_ETH0_RGMII_MODE BIT(0) #define PRG_ETH0_EXT_PHY_MODE_MASK GENMASK(2, 0) -#define PRG_ETH0_EXT_RGMII_MODE PHY_INTF_SEL_RGMII -#define PRG_ETH0_EXT_RMII_MODE PHY_INTF_SEL_RMII /* mux to choose between fclk_div2 (bit unset) and mpll2 (bit set) */ #define PRG_ETH0_CLK_M250_SEL_MASK GENMASK(4, 4) @@ -238,21 +236,19 @@ static int meson8b_set_phy_mode(struct meson8b_dwmac *dwmac) static int meson_axg_set_phy_mode(struct meson8b_dwmac *dwmac) { + int phy_intf_sel; + switch (dwmac->phy_mode) { case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_TXID: /* enable RGMII mode */ - meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, - PRG_ETH0_EXT_PHY_MODE_MASK, - PRG_ETH0_EXT_RGMII_MODE); + phy_intf_sel = PHY_INTF_SEL_RGMII; break; case PHY_INTERFACE_MODE_RMII: /* disable RGMII mode -> enables RMII mode */ - meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, - PRG_ETH0_EXT_PHY_MODE_MASK, - PRG_ETH0_EXT_RMII_MODE); + phy_intf_sel = PHY_INTF_SEL_RMII; break; default: dev_err(dwmac->dev, "fail to set phy-mode %s\n", @@ -260,6 +256,10 @@ static int meson_axg_set_phy_mode(struct meson8b_dwmac *dwmac) return -EINVAL; } + meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, PRG_ETH0_EXT_PHY_MODE_MASK, + FIELD_PREP(PRG_ETH0_EXT_PHY_MODE_MASK, + phy_intf_sel)); + return 0; } From 325961b4221f2a8009b19a3ec24d850fc7c31eab Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 10 Nov 2025 14:42:53 +0000 Subject: [PATCH 780/867] net: stmmac: meson8b: use stmmac_get_phy_intf_sel() Use stmmac_get_phy_intf_sel() to decode the PHY interface mode to the phy_intf_sel value, validate the result and use that to set the control register to select the operating mode for the DWMAC core. Reviewed-by: Martin Blumenstingl Signed-off-by: Russell King (Oracle) Signed-off-by: NipaLocal --- .../ethernet/stmicro/stmmac/dwmac-meson8b.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index 865cd61661342..e4d5c41294f4e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -238,22 +238,12 @@ static int meson_axg_set_phy_mode(struct meson8b_dwmac *dwmac) { int phy_intf_sel; - switch (dwmac->phy_mode) { - case PHY_INTERFACE_MODE_RGMII: - case PHY_INTERFACE_MODE_RGMII_RXID: - case PHY_INTERFACE_MODE_RGMII_ID: - case PHY_INTERFACE_MODE_RGMII_TXID: - /* enable RGMII mode */ - phy_intf_sel = PHY_INTF_SEL_RGMII; - break; - case PHY_INTERFACE_MODE_RMII: - /* disable RGMII mode -> enables RMII mode */ - phy_intf_sel = PHY_INTF_SEL_RMII; - break; - default: + phy_intf_sel = stmmac_get_phy_intf_sel(dwmac->phy_mode); + if (phy_intf_sel != PHY_INTF_SEL_RGMII && + phy_intf_sel != PHY_INTF_SEL_RMII) { dev_err(dwmac->dev, "fail to set phy-mode %s\n", phy_modes(dwmac->phy_mode)); - return -EINVAL; + return phy_intf_sel < 0 ? phy_intf_sel : -EINVAL; } meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, PRG_ETH0_EXT_PHY_MODE_MASK, From 304e9bf499cffcfc21e5cec0991a5d4e7fdb0885 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 10 Nov 2025 08:00:59 -0800 Subject: [PATCH 781/867] net: phy: Add support for 25, 50 and 100Gbps PMA to genphy_c45_read_pma Add support for reading 25, 50, and 100G from the PMA interface for a C45 device. By doing this we enable support for future devices that support higher speeds than the current limit of 10G. Signed-off-by: Alexander Duyck Signed-off-by: NipaLocal --- drivers/net/phy/phy-c45.c | 9 +++++++++ include/uapi/linux/mdio.h | 6 ++++++ 2 files changed, 15 insertions(+) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index e8e5be4684ab9..1d747fbaa10c0 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -627,6 +627,15 @@ int genphy_c45_read_pma(struct phy_device *phydev) case MDIO_CTRL1_SPEED10G: phydev->speed = SPEED_10000; break; + case MDIO_PMA_CTRL1_SPEED25G: + phydev->speed = SPEED_25000; + break; + case MDIO_PMA_CTRL1_SPEED50G: + phydev->speed = SPEED_50000; + break; + case MDIO_PMA_CTRL1_SPEED100G: + phydev->speed = SPEED_100000; + break; default: phydev->speed = SPEED_UNKNOWN; break; diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index 6975f182b22c6..75ed41fc46c66 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -116,6 +116,12 @@ #define MDIO_CTRL1_SPEED10G (MDIO_CTRL1_SPEEDSELEXT | 0x00) /* 10PASS-TS/2BASE-TL */ #define MDIO_CTRL1_SPEED10P2B (MDIO_CTRL1_SPEEDSELEXT | 0x04) +/* 100 Gb/s */ +#define MDIO_PMA_CTRL1_SPEED100G (MDIO_CTRL1_SPEEDSELEXT | 0x0c) +/* 25 Gb/s */ +#define MDIO_PMA_CTRL1_SPEED25G (MDIO_CTRL1_SPEEDSELEXT | 0x10) +/* 50 Gb/s */ +#define MDIO_PMA_CTRL1_SPEED50G (MDIO_CTRL1_SPEEDSELEXT | 0x14) /* 2.5 Gb/s */ #define MDIO_CTRL1_SPEED2_5G (MDIO_CTRL1_SPEEDSELEXT | 0x18) /* 5 Gb/s */ From 1334603219af99e480c85f60a1221f5978cd6601 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 10 Nov 2025 08:01:10 -0800 Subject: [PATCH 782/867] net: phy: Rename MDIO_CTRL1_SPEED for 2.5G and 5G to reflect PMA values The 2.5G and 5G values are not consistent between the PCS CTRL1 and PMA CTRL1 values. In order to avoid confusion between the two I am updating the values to include "PMA" in the name similar to values used in similar places. Signed-off-by: Alexander Duyck Signed-off-by: NipaLocal --- drivers/net/phy/phy-c45.c | 8 ++++---- include/uapi/linux/mdio.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 1d747fbaa10c0..d161fe3fee75b 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -148,12 +148,12 @@ int genphy_c45_pma_setup_forced(struct phy_device *phydev) ctrl2 |= MDIO_PMA_CTRL2_1000BT; break; case SPEED_2500: - ctrl1 |= MDIO_CTRL1_SPEED2_5G; + ctrl1 |= MDIO_PMA_CTRL1_SPEED2_5G; /* Assume 2.5Gbase-T */ ctrl2 |= MDIO_PMA_CTRL2_2_5GBT; break; case SPEED_5000: - ctrl1 |= MDIO_CTRL1_SPEED5G; + ctrl1 |= MDIO_PMA_CTRL1_SPEED5G; /* Assume 5Gbase-T */ ctrl2 |= MDIO_PMA_CTRL2_5GBT; break; @@ -618,10 +618,10 @@ int genphy_c45_read_pma(struct phy_device *phydev) case MDIO_PMA_CTRL1_SPEED1000: phydev->speed = SPEED_1000; break; - case MDIO_CTRL1_SPEED2_5G: + case MDIO_PMA_CTRL1_SPEED2_5G: phydev->speed = SPEED_2500; break; - case MDIO_CTRL1_SPEED5G: + case MDIO_PMA_CTRL1_SPEED5G: phydev->speed = SPEED_5000; break; case MDIO_CTRL1_SPEED10G: diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index 75ed41fc46c66..c33aa864ef664 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -123,9 +123,9 @@ /* 50 Gb/s */ #define MDIO_PMA_CTRL1_SPEED50G (MDIO_CTRL1_SPEEDSELEXT | 0x14) /* 2.5 Gb/s */ -#define MDIO_CTRL1_SPEED2_5G (MDIO_CTRL1_SPEEDSELEXT | 0x18) +#define MDIO_PMA_CTRL1_SPEED2_5G (MDIO_CTRL1_SPEEDSELEXT | 0x18) /* 5 Gb/s */ -#define MDIO_CTRL1_SPEED5G (MDIO_CTRL1_SPEEDSELEXT | 0x1c) +#define MDIO_PMA_CTRL1_SPEED5G (MDIO_CTRL1_SPEEDSELEXT | 0x1c) /* Status register 1. */ #define MDIO_STAT1_LPOWERABLE 0x0002 /* Low-power ability */ From ab5db147c45ce98ec487025dfe016588b9aed834 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 10 Nov 2025 08:01:18 -0800 Subject: [PATCH 783/867] net: pcs: xpcs: Add support for 25G, 50G, and 100G interfaces With this change we are adding support for 25G, 50G, and 100G interface types to the XPCS driver. This had supposedly been enabled with the addition of XLGMII but I don't see any capability for configuration there so I suspect it may need to be refactored in the future. With this change we can enable the XPCS driver with the selected interface and it should be able to detect link, speed, and report the link status to the phylink interface. Signed-off-by: Alexander Duyck Signed-off-by: NipaLocal --- drivers/net/pcs/pcs-xpcs.c | 105 +++++++++++++++++++++++++++++++++++-- include/uapi/linux/mdio.h | 3 ++ 2 files changed, 104 insertions(+), 4 deletions(-) diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 3d1bd5aac0937..b33767c7b45c0 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -37,6 +37,16 @@ static const int xpcs_10gkr_features[] = { __ETHTOOL_LINK_MODE_MASK_NBITS, }; +static const int xpcs_25gbaser_features[] = { + ETHTOOL_LINK_MODE_MII_BIT, + ETHTOOL_LINK_MODE_Pause_BIT, + ETHTOOL_LINK_MODE_Asym_Pause_BIT, + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT, + __ETHTOOL_LINK_MODE_MASK_NBITS, +}; + static const int xpcs_xlgmii_features[] = { ETHTOOL_LINK_MODE_Pause_BIT, ETHTOOL_LINK_MODE_Asym_Pause_BIT, @@ -67,6 +77,40 @@ static const int xpcs_xlgmii_features[] = { __ETHTOOL_LINK_MODE_MASK_NBITS, }; +static const int xpcs_50gbaser_features[] = { + ETHTOOL_LINK_MODE_MII_BIT, + ETHTOOL_LINK_MODE_Pause_BIT, + ETHTOOL_LINK_MODE_Asym_Pause_BIT, + ETHTOOL_LINK_MODE_50000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseDR_Full_BIT, + __ETHTOOL_LINK_MODE_MASK_NBITS, +}; + +static const int xpcs_50gbaser2_features[] = { + ETHTOOL_LINK_MODE_MII_BIT, + ETHTOOL_LINK_MODE_Pause_BIT, + ETHTOOL_LINK_MODE_Asym_Pause_BIT, + ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT, + __ETHTOOL_LINK_MODE_MASK_NBITS, +}; + +static const int xpcs_100gbasep_features[] = { + ETHTOOL_LINK_MODE_MII_BIT, + ETHTOOL_LINK_MODE_Pause_BIT, + ETHTOOL_LINK_MODE_Asym_Pause_BIT, + ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT, + __ETHTOOL_LINK_MODE_MASK_NBITS, +}; + static const int xpcs_10gbaser_features[] = { ETHTOOL_LINK_MODE_Pause_BIT, ETHTOOL_LINK_MODE_Asym_Pause_BIT, @@ -523,9 +567,38 @@ static int xpcs_get_max_xlgmii_speed(struct dw_xpcs *xpcs, return speed; } -static void xpcs_resolve_pma(struct dw_xpcs *xpcs, - struct phylink_link_state *state) +static int xpcs_c45_read_pcs_speed(struct dw_xpcs *xpcs, + struct phylink_link_state *state) { + int pcs_ctrl1; + + pcs_ctrl1 = xpcs_read(xpcs, MDIO_MMD_PCS, MDIO_CTRL1); + if (pcs_ctrl1 < 0) + return pcs_ctrl1; + + switch (pcs_ctrl1 & MDIO_CTRL1_SPEEDSEL) { + case MDIO_PCS_CTRL1_SPEED25G: + state->speed = SPEED_25000; + break; + case MDIO_PCS_CTRL1_SPEED50G: + state->speed = SPEED_50000; + break; + case MDIO_PCS_CTRL1_SPEED100G: + state->speed = SPEED_100000; + break; + default: + state->speed = SPEED_UNKNOWN; + break; + } + + return 0; +} + +static int xpcs_resolve_pma(struct dw_xpcs *xpcs, + struct phylink_link_state *state) +{ + int err = 0; + state->pause = MLO_PAUSE_TX | MLO_PAUSE_RX; state->duplex = DUPLEX_FULL; @@ -536,10 +609,18 @@ static void xpcs_resolve_pma(struct dw_xpcs *xpcs, case PHY_INTERFACE_MODE_XLGMII: state->speed = xpcs_get_max_xlgmii_speed(xpcs, state); break; + case PHY_INTERFACE_MODE_25GBASER: + case PHY_INTERFACE_MODE_50GBASER: + case PHY_INTERFACE_MODE_LAUI: + case PHY_INTERFACE_MODE_100GBASEP: + err = xpcs_c45_read_pcs_speed(xpcs, state); + break; default: state->speed = SPEED_UNKNOWN; break; } + + return err; } static int xpcs_validate(struct phylink_pcs *pcs, unsigned long *supported, @@ -945,10 +1026,10 @@ static int xpcs_get_state_c73(struct dw_xpcs *xpcs, phylink_resolve_c73(state); } else { - xpcs_resolve_pma(xpcs, state); + ret = xpcs_resolve_pma(xpcs, state); } - return 0; + return ret; } static int xpcs_get_state_c37_sgmii(struct dw_xpcs *xpcs, @@ -1312,10 +1393,26 @@ static const struct dw_xpcs_compat synopsys_xpcs_compat[] = { .interface = PHY_INTERFACE_MODE_10GKR, .supported = xpcs_10gkr_features, .an_mode = DW_AN_C73, + }, { + .interface = PHY_INTERFACE_MODE_25GBASER, + .supported = xpcs_25gbaser_features, + .an_mode = DW_AN_C73, }, { .interface = PHY_INTERFACE_MODE_XLGMII, .supported = xpcs_xlgmii_features, .an_mode = DW_AN_C73, + }, { + .interface = PHY_INTERFACE_MODE_50GBASER, + .supported = xpcs_50gbaser_features, + .an_mode = DW_AN_C73, + }, { + .interface = PHY_INTERFACE_MODE_LAUI, + .supported = xpcs_50gbaser2_features, + .an_mode = DW_AN_C73, + }, { + .interface = PHY_INTERFACE_MODE_100GBASEP, + .supported = xpcs_100gbasep_features, + .an_mode = DW_AN_C73, }, { .interface = PHY_INTERFACE_MODE_10GBASER, .supported = xpcs_10gbaser_features, diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index c33aa864ef664..2da509c9c0a58 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -118,10 +118,13 @@ #define MDIO_CTRL1_SPEED10P2B (MDIO_CTRL1_SPEEDSELEXT | 0x04) /* 100 Gb/s */ #define MDIO_PMA_CTRL1_SPEED100G (MDIO_CTRL1_SPEEDSELEXT | 0x0c) +#define MDIO_PCS_CTRL1_SPEED100G (MDIO_CTRL1_SPEEDSELEXT | 0x10) /* 25 Gb/s */ #define MDIO_PMA_CTRL1_SPEED25G (MDIO_CTRL1_SPEEDSELEXT | 0x10) +#define MDIO_PCS_CTRL1_SPEED25G (MDIO_CTRL1_SPEEDSELEXT | 0x14) /* 50 Gb/s */ #define MDIO_PMA_CTRL1_SPEED50G (MDIO_CTRL1_SPEEDSELEXT | 0x14) +#define MDIO_PCS_CTRL1_SPEED50G (MDIO_CTRL1_SPEEDSELEXT | 0x18) /* 2.5 Gb/s */ #define MDIO_PMA_CTRL1_SPEED2_5G (MDIO_CTRL1_SPEEDSELEXT | 0x18) /* 5 Gb/s */ From 69f796fdd1f60fd72dd7fe910e8cf1cf02dc0702 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 10 Nov 2025 08:01:26 -0800 Subject: [PATCH 784/867] net: pcs: xpcs: Fix PMA identifier handling in XPCS The XPCS driver was mangling the PMA identifier as the original code appears to have been focused on just capturing the OUI. Rather than store a mangled ID it is better to work with the actual PMA ID and instead just mask out the values that don't apply rather than shifting them and reordering them as you still don't get the original OUI for the NIC without having to bitswap the values as per the definition of the layout in IEEE 802.3-2022 22.2.4.3.1. By laying it out as it was in the hardware it is also less likely for us to have an unintentional collision as the enum values will occupy the revision number area while the OUI occupies the upper 22 bits. Signed-off-by: Alexander Duyck Signed-off-by: NipaLocal --- drivers/net/pcs/pcs-xpcs.c | 9 ++++----- include/linux/pcs/pcs-xpcs.h | 2 +- include/uapi/linux/mdio.h | 5 +++++ 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index b33767c7b45c0..8b5b5b63b74b7 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -1365,17 +1365,16 @@ static int xpcs_read_ids(struct dw_xpcs *xpcs) if (ret < 0) return ret; - id = ret; + id = ret << 16; ret = xpcs_read(xpcs, MDIO_MMD_PMAPMD, MDIO_DEVID2); if (ret < 0) return ret; - /* Note the inverted dword order and masked out Model/Revision numbers - * with respect to what is done with the PCS ID... + /* For now we only record the OUI for the PMAPMD, we may want to + * add the model number at some point in the future. */ - ret = (ret >> 10) & 0x3F; - id |= ret << 16; + id |= ret & MDIO_DEVID2_OUI; /* Set the PMA ID if it hasn't been pre-initialized */ if (xpcs->info.pma == DW_XPCS_PMA_ID_NATIVE) diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index e40f554ff717a..4cf6bd611e5ac 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -38,7 +38,7 @@ enum dw_xpcs_pma_id { DW_XPCS_PMA_GEN4_6G_ID, DW_XPCS_PMA_GEN5_10G_ID, DW_XPCS_PMA_GEN5_12G_ID, - WX_TXGBE_XPCS_PMA_10G_ID = 0x0018fc80, + WX_TXGBE_XPCS_PMA_10G_ID = 0xfc806000, }; struct dw_xpcs_info { diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index 2da509c9c0a58..b287f84036a55 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -142,6 +142,11 @@ #define MDIO_AN_STAT1_PAGE 0x0040 /* Page received */ #define MDIO_AN_STAT1_XNP 0x0080 /* Extended next page status */ +/* Device Identifier 2 */ +#define MDIO_DEVID2_OUI 0xfc00 /* OUI Portion of PHY ID */ +#define MDIO_DEVID2_MODEL_NUM 0x03f0 /* Manufacturer's Model Number */ +#define MDIO_DEVID2_REV_NUM 0x000f /* Revision Number */ + /* Speed register. */ #define MDIO_SPEED_10G 0x0001 /* 10G capable */ #define MDIO_PMA_SPEED_2B 0x0002 /* 2BASE-TL capable */ From 58c2da24a768c314294cb171143e9af0ee04f3c1 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 10 Nov 2025 08:01:33 -0800 Subject: [PATCH 785/867] net: pcs: xpcs: Add support for FBNIC 25G, 50G, 100G PMA The fbnic driver is planning to make use of the XPCS driver to enable support for PCS and better integration with phylink. To do this though we will need to enable several workarounds since the PMA/PMD interface for fbnic is likely to be unique since it is a mix of two different vendor products with a unique wrapper around the IP. As such I have generated a PHY identifier based on IEEE 802.3-2022 22.2.4.3.1 using the OUI belonging to Meta Platforms and used with our NICs. Using this we will provide it as the PHY ID via the SW based MDIO interface so that the fbnic device can be identified and necessary workarounds enabled in the XPCS driver. As an initial workaround this change adds an exception so that soft_reset is not set when the driver is initially bound to the PCS. In addition I have added logic to integrate the PMA link state into the link state for the PCS. With this we can avoid the link coming up too soon on the FBNIC PHY and as a result we can avoid link flaps. Signed-off-by: Alexander Duyck Signed-off-by: NipaLocal --- drivers/net/pcs/pcs-xpcs.c | 23 +++++++++++++++++++++-- include/linux/pcs/pcs-xpcs.h | 2 ++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 8b5b5b63b74b7..69a6c03fd9e79 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -597,7 +597,25 @@ static int xpcs_c45_read_pcs_speed(struct dw_xpcs *xpcs, static int xpcs_resolve_pma(struct dw_xpcs *xpcs, struct phylink_link_state *state) { - int err = 0; + int pma_stat1, err = 0; + + /* The Meta Platforms FBNIC PMD will go into a training state for + * about 4 seconds when the link first comes up. During this time the + * PCS link will bounce. To avoid reporting link up too soon we include + * the PMA/PMD state provided by the driver. + */ + if (xpcs->info.pma == MP_FBNIC_XPCS_PMA_100G_ID) { + pma_stat1 = xpcs_read(xpcs, MDIO_MMD_PMAPMD, MDIO_STAT1); + if (pma_stat1 < 0) { + state->link = false; + return pma_stat1; + } + + if (!(pma_stat1 & MDIO_STAT1_LSTATUS)) { + state->link = false; + return 0; + } + } state->pause = MLO_PAUSE_TX | MLO_PAUSE_RX; state->duplex = DUPLEX_FULL; @@ -1591,7 +1609,8 @@ static struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev) xpcs_get_interfaces(xpcs, xpcs->pcs.supported_interfaces); - if (xpcs->info.pma == WX_TXGBE_XPCS_PMA_10G_ID) + if (xpcs->info.pma == WX_TXGBE_XPCS_PMA_10G_ID || + xpcs->info.pma == MP_FBNIC_XPCS_PMA_100G_ID) xpcs->pcs.poll = false; else xpcs->need_reset = true; diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index 4cf6bd611e5ac..36073f7b6bb40 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -39,6 +39,8 @@ enum dw_xpcs_pma_id { DW_XPCS_PMA_GEN5_10G_ID, DW_XPCS_PMA_GEN5_12G_ID, WX_TXGBE_XPCS_PMA_10G_ID = 0xfc806000, + /* Meta Platforms OUI 88:25:08, model 0, revision 0 */ + MP_FBNIC_XPCS_PMA_100G_ID = 0x46904000, }; struct dw_xpcs_info { From beb34dbda5ed81cb255a07ab954fb79f0ed354d7 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 10 Nov 2025 08:01:40 -0800 Subject: [PATCH 786/867] fbnic: Rename PCS IRQ to MAC IRQ as it is actually a MAC interrupt Throughout several spots in the code I had called out the IRQ as being related to the PCS. However the actual IRQ is a part of the MAC and it is just exposing PCS data. To more accurately reflect the owner of the calls this change makes it so that we rename the functions and values that are taking in the interrupt value and processing it to reflect that it is a MAC call and not a PCS one. This change is mostly motivated by the fact that we will be moving the handling of this interrupt from being PCS focused to being more PMA/PMD focused as this will drive the phydev driver that I am adding instead of driving the PCS directly. Signed-off-by: Alexander Duyck Signed-off-by: NipaLocal --- drivers/net/ethernet/meta/fbnic/fbnic.h | 6 ++-- drivers/net/ethernet/meta/fbnic/fbnic_irq.c | 30 +++++++++---------- drivers/net/ethernet/meta/fbnic/fbnic_mac.c | 14 ++++----- drivers/net/ethernet/meta/fbnic/fbnic_mac.h | 8 ++--- .../net/ethernet/meta/fbnic/fbnic_netdev.c | 4 +-- .../net/ethernet/meta/fbnic/fbnic_phylink.c | 2 +- 6 files changed, 32 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h index b03e5a3d51445..98929add5f21c 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic.h @@ -34,7 +34,7 @@ struct fbnic_dev { u32 __iomem *uc_addr4; const struct fbnic_mac *mac; unsigned int fw_msix_vector; - unsigned int pcs_msix_vector; + unsigned int mac_msix_vector; unsigned short num_irqs; struct { @@ -175,8 +175,8 @@ void fbnic_fw_free_mbx(struct fbnic_dev *fbd); void fbnic_hwmon_register(struct fbnic_dev *fbd); void fbnic_hwmon_unregister(struct fbnic_dev *fbd); -int fbnic_pcs_request_irq(struct fbnic_dev *fbd); -void fbnic_pcs_free_irq(struct fbnic_dev *fbd); +int fbnic_mac_request_irq(struct fbnic_dev *fbd); +void fbnic_mac_free_irq(struct fbnic_dev *fbd); void fbnic_napi_name_irqs(struct fbnic_dev *fbd); int fbnic_napi_request_irq(struct fbnic_dev *fbd, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c index 1c88a2bf3a7a7..40947e142c5d0 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c @@ -118,12 +118,12 @@ void fbnic_fw_free_mbx(struct fbnic_dev *fbd) fbd->fw_msix_vector = 0; } -static irqreturn_t fbnic_pcs_msix_intr(int __always_unused irq, void *data) +static irqreturn_t fbnic_mac_msix_intr(int __always_unused irq, void *data) { struct fbnic_dev *fbd = data; struct fbnic_net *fbn; - if (fbd->mac->pcs_get_link_event(fbd) == FBNIC_LINK_EVENT_NONE) { + if (fbd->mac->get_link_event(fbd) == FBNIC_LINK_EVENT_NONE) { fbnic_wr32(fbd, FBNIC_INTR_MASK_CLEAR(0), 1u << FBNIC_PCS_MSIX_ENTRY); return IRQ_HANDLED; @@ -137,20 +137,20 @@ static irqreturn_t fbnic_pcs_msix_intr(int __always_unused irq, void *data) } /** - * fbnic_pcs_request_irq - Configure the PCS to enable it to advertise link + * fbnic_mac_request_irq - Configure the MAC to enable it to advertise link * @fbd: Pointer to device to initialize * - * This function provides basic bringup for the MAC/PCS IRQ. For now the IRQ + * This function provides basic bringup for the MAC/PHY IRQ. For now the IRQ * will remain disabled until we start the MAC/PCS/PHY logic via phylink. * * Return: non-zero on failure. **/ -int fbnic_pcs_request_irq(struct fbnic_dev *fbd) +int fbnic_mac_request_irq(struct fbnic_dev *fbd) { struct pci_dev *pdev = to_pci_dev(fbd->dev); int vector, err; - WARN_ON(fbd->pcs_msix_vector); + WARN_ON(fbd->mac_msix_vector); vector = pci_irq_vector(pdev, FBNIC_PCS_MSIX_ENTRY); if (vector < 0) @@ -159,7 +159,7 @@ int fbnic_pcs_request_irq(struct fbnic_dev *fbd) /* Request the IRQ for PCS link vector. * Map PCS cause to it, and unmask it */ - err = request_irq(vector, &fbnic_pcs_msix_intr, 0, + err = request_irq(vector, &fbnic_mac_msix_intr, 0, fbd->netdev->name, fbd); if (err) return err; @@ -168,22 +168,22 @@ int fbnic_pcs_request_irq(struct fbnic_dev *fbd) fbnic_wr32(fbd, FBNIC_INTR_MSIX_CTRL(FBNIC_INTR_MSIX_CTRL_PCS_IDX), FBNIC_PCS_MSIX_ENTRY | FBNIC_INTR_MSIX_CTRL_ENABLE); - fbd->pcs_msix_vector = vector; + fbd->mac_msix_vector = vector; return 0; } /** - * fbnic_pcs_free_irq - Teardown the PCS IRQ to prepare for stopping + * fbnic_mac_free_irq - Teardown the MAC IRQ to prepare for stopping * @fbd: Pointer to device that is stopping * - * This function undoes the work done in fbnic_pcs_request_irq and prepares + * This function undoes the work done in fbnic_mac_request_irq and prepares * the device to no longer receive traffic on the host interface. **/ -void fbnic_pcs_free_irq(struct fbnic_dev *fbd) +void fbnic_mac_free_irq(struct fbnic_dev *fbd) { /* Vector has already been freed */ - if (!fbd->pcs_msix_vector) + if (!fbd->mac_msix_vector) return; /* Disable interrupt */ @@ -192,14 +192,14 @@ void fbnic_pcs_free_irq(struct fbnic_dev *fbd) fbnic_wrfl(fbd); /* Synchronize IRQ to prevent race that would unmask vector */ - synchronize_irq(fbd->pcs_msix_vector); + synchronize_irq(fbd->mac_msix_vector); /* Mask the vector */ fbnic_wr32(fbd, FBNIC_INTR_MASK_SET(0), 1u << FBNIC_PCS_MSIX_ENTRY); /* Free the vector */ - free_irq(fbd->pcs_msix_vector, fbd); - fbd->pcs_msix_vector = 0; + free_irq(fbd->mac_msix_vector, fbd); + fbd->mac_msix_vector = 0; } void fbnic_synchronize_irq(struct fbnic_dev *fbd, int nr) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c index 2a84bd1d7e268..28a2e1fd3760a 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c @@ -434,14 +434,14 @@ static void fbnic_mac_tx_pause_config(struct fbnic_dev *fbd, bool tx_pause) wr32(fbd, FBNIC_RXB_PAUSE_DROP_CTRL, rxb_pause_ctrl); } -static int fbnic_pcs_get_link_event_asic(struct fbnic_dev *fbd) +static int fbnic_mac_get_link_event(struct fbnic_dev *fbd) { - u32 pcs_intr_mask = rd32(fbd, FBNIC_SIG_PCS_INTR_STS); + u32 intr_mask = rd32(fbd, FBNIC_SIG_PCS_INTR_STS); - if (pcs_intr_mask & FBNIC_SIG_PCS_INTR_LINK_DOWN) + if (intr_mask & FBNIC_SIG_PCS_INTR_LINK_DOWN) return FBNIC_LINK_EVENT_DOWN; - return (pcs_intr_mask & FBNIC_SIG_PCS_INTR_LINK_UP) ? + return (intr_mask & FBNIC_SIG_PCS_INTR_LINK_UP) ? FBNIC_LINK_EVENT_UP : FBNIC_LINK_EVENT_NONE; } @@ -521,7 +521,7 @@ static bool fbnic_mac_get_pcs_link_status(struct fbnic_dev *fbd) return !lane_mask; } -static bool fbnic_pcs_get_link_asic(struct fbnic_dev *fbd) +static bool fbnic_mac_get_link(struct fbnic_dev *fbd) { bool link; @@ -869,8 +869,8 @@ static const struct fbnic_mac fbnic_mac_asic = { .init_regs = fbnic_mac_init_regs, .pcs_enable = fbnic_pcs_enable_asic, .pcs_disable = fbnic_pcs_disable_asic, - .pcs_get_link = fbnic_pcs_get_link_asic, - .pcs_get_link_event = fbnic_pcs_get_link_event_asic, + .get_link = fbnic_mac_get_link, + .get_link_event = fbnic_mac_get_link_event, .get_fec_stats = fbnic_mac_get_fec_stats, .get_pcs_stats = fbnic_mac_get_pcs_stats, .get_eth_mac_stats = fbnic_mac_get_eth_mac_stats, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.h b/drivers/net/ethernet/meta/fbnic/fbnic_mac.h index ede5ff0dae22f..414c170abcba0 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.h @@ -59,9 +59,9 @@ enum fbnic_sensor_id { * Configure and enable PCS to enable link if not already enabled * void (*pcs_disable)(struct fbnic_dev *fbd); * Shutdown the link if we are the only consumer of it. - * bool (*pcs_get_link)(struct fbnic_dev *fbd); + * bool (*get_link)(struct fbnic_dev *fbd); * Check PCS link status - * int (*pcs_get_link_event)(struct fbnic_dev *fbd) + * int (*get_link_event)(struct fbnic_dev *fbd) * Get the current link event status, reports true if link has * changed to either FBNIC_LINK_EVENT_DOWN or FBNIC_LINK_EVENT_UP * @@ -76,8 +76,8 @@ struct fbnic_mac { int (*pcs_enable)(struct fbnic_dev *fbd); void (*pcs_disable)(struct fbnic_dev *fbd); - bool (*pcs_get_link)(struct fbnic_dev *fbd); - int (*pcs_get_link_event)(struct fbnic_dev *fbd); + bool (*get_link)(struct fbnic_dev *fbd); + int (*get_link_event)(struct fbnic_dev *fbd); void (*get_fec_stats)(struct fbnic_dev *fbd, bool reset, struct fbnic_fec_stats *fec_stats); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index e95be0e7bd9e0..2d5ae89b4a154 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -44,7 +44,7 @@ int __fbnic_open(struct fbnic_net *fbn) if (err) goto time_stop; - err = fbnic_pcs_request_irq(fbd); + err = fbnic_mac_request_irq(fbd); if (err) goto time_stop; @@ -89,7 +89,7 @@ static int fbnic_stop(struct net_device *netdev) phylink_suspend(fbn->phylink, fbnic_bmc_present(fbn->fbd)); fbnic_down(fbn); - fbnic_pcs_free_irq(fbn->fbd); + fbnic_mac_free_irq(fbn->fbd); fbnic_time_stop(fbn); fbnic_fw_xmit_ownership_msg(fbn->fbd, false); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c b/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c index 7ce3fdd252828..3c0bd435ee287 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c @@ -132,7 +132,7 @@ fbnic_phylink_pcs_get_state(struct phylink_pcs *pcs, unsigned int neg_mode, state->duplex = DUPLEX_FULL; - state->link = fbd->mac->pcs_get_link(fbd); + state->link = fbd->mac->get_link(fbd); } static int From 9d218bf86d8c0705428c4ffc50dce04972491cb2 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 10 Nov 2025 08:01:48 -0800 Subject: [PATCH 787/867] fbnic: Add logic to track PMD state via MAC/PCS signals One complication with the design of our part is that the PMD doesn't provide a direct signal to the host. Instead we have visibility to signals that the PCS provides to the MAC that allow us to check the link state through that. That said we need to account for several things in the PMD and firmware when managing the link. Specifically when the link first starts to come up the PMD will cause the link to flap as the firmware will begin a training cycle when the link is first detected. As a result this will cause link flapping if we were to immediately report link up when the PCS first detects it. To address that we are adding a pmd_state variable that is meant to be a countdown of sorts indicating the state of the PMD. If the link is down the PMD will start out in the initialize state, otherwise if the link is up it will start out in the training state ready to report link up. If link is detected while in the initialize state the PMD state will switch to training, and if after 4 seconds the link is still stable we will transition to the send_data state. With this we can avoid link flapping when a cable is first connected to the NIC. One side effect of this is that we need to pull the link state away from the PCS. For now we use a union of the PCS link state register value and the pmd_state. The plan is to add a phydev driver to report the pmd_state to the phylink interface. With that we can then look at switching over to the use of the XPCS driver for fbnic instead of having an internal one. Signed-off-by: Alexander Duyck Signed-off-by: NipaLocal --- drivers/net/ethernet/meta/fbnic/fbnic.h | 4 + drivers/net/ethernet/meta/fbnic/fbnic_csr.h | 2 + drivers/net/ethernet/meta/fbnic/fbnic_irq.c | 4 +- drivers/net/ethernet/meta/fbnic/fbnic_mac.c | 59 +++++++++----- drivers/net/ethernet/meta/fbnic/fbnic_mac.h | 35 +++++--- .../net/ethernet/meta/fbnic/fbnic_netdev.c | 2 +- .../net/ethernet/meta/fbnic/fbnic_netdev.h | 2 +- drivers/net/ethernet/meta/fbnic/fbnic_pci.c | 14 ++-- .../net/ethernet/meta/fbnic/fbnic_phylink.c | 79 ++++++++++++++----- 9 files changed, 145 insertions(+), 56 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h index 98929add5f21c..fac1283d0ade6 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic.h @@ -83,6 +83,10 @@ struct fbnic_dev { /* Last @time_high refresh time in jiffies (to catch stalls) */ unsigned long last_read; + /* PMD specific data */ + unsigned long end_of_pmd_training; + u8 pmd_state; + /* Local copy of hardware statistics */ struct fbnic_hw_stats hw_stats; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h index d3a7ad921f18c..422265dc7abd6 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h @@ -787,6 +787,8 @@ enum { /* MAC PCS registers */ #define FBNIC_CSR_START_PCS 0x10000 /* CSR section delimiter */ +#define FBNIC_PCS_PAGE(n) (0x10000 + 0x400 * (n)) /* 0x40000 + 1024*n */ +#define FBNIC_PCS(reg, n) ((reg) + FBNIC_PCS_PAGE(n)) #define FBNIC_CSR_END_PCS 0x10668 /* CSR section delimiter */ #define FBNIC_CSR_START_RSFEC 0x10800 /* CSR section delimiter */ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c index 40947e142c5d0..9b068b82f30a0 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c @@ -131,7 +131,9 @@ static irqreturn_t fbnic_mac_msix_intr(int __always_unused irq, void *data) fbn = netdev_priv(fbd->netdev); - phylink_pcs_change(&fbn->phylink_pcs, false); + /* Record link down events */ + if (!fbd->mac->get_link(fbd, fbn->aui, fbn->fec)) + phylink_pcs_change(&fbn->phylink_pcs, false); return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c index 28a2e1fd3760a..fd01d95c53482 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c @@ -466,9 +466,8 @@ static u32 __fbnic_mac_cmd_config_asic(struct fbnic_dev *fbd, return command_config; } -static bool fbnic_mac_get_pcs_link_status(struct fbnic_dev *fbd) +static bool fbnic_mac_get_link_status(struct fbnic_dev *fbd, u8 aui, u8 fec) { - struct fbnic_net *fbn = netdev_priv(fbd->netdev); u32 pcs_status, lane_mask = ~0; pcs_status = rd32(fbd, FBNIC_SIG_PCS_OUT0); @@ -476,7 +475,7 @@ static bool fbnic_mac_get_pcs_link_status(struct fbnic_dev *fbd) return false; /* Define the expected lane mask for the status bits we need to check */ - switch (fbn->aui) { + switch (aui) { case FBNIC_AUI_100GAUI2: lane_mask = 0xf; break; @@ -484,7 +483,7 @@ static bool fbnic_mac_get_pcs_link_status(struct fbnic_dev *fbd) lane_mask = 3; break; case FBNIC_AUI_LAUI2: - switch (fbn->fec) { + switch (fec) { case FBNIC_FEC_OFF: lane_mask = 0x63; break; @@ -502,7 +501,7 @@ static bool fbnic_mac_get_pcs_link_status(struct fbnic_dev *fbd) } /* Use an XOR to remove the bits we expect to see set */ - switch (fbn->fec) { + switch (fec) { case FBNIC_FEC_OFF: lane_mask ^= FIELD_GET(FBNIC_SIG_PCS_OUT0_BLOCK_LOCK, pcs_status); @@ -521,7 +520,36 @@ static bool fbnic_mac_get_pcs_link_status(struct fbnic_dev *fbd) return !lane_mask; } -static bool fbnic_mac_get_link(struct fbnic_dev *fbd) +static bool fbnic_pmd_update_state(struct fbnic_dev *fbd, bool signal_detect) +{ + /* Delay link up for 4 seconds to allow for link training. + * The state transitions for this are as follows: + * + * All states have the following two transitions in common: + * Loss of signal -> FBNIC_PMD_INITIALIZE + * The condition handled below (!signal) + * Reconfiguration -> FBNIC_PMD_INITIALIZE + * Occurs when mac_prepare starts a PHY reconfig + * FBNIC_PMD_TRAINING: + * signal still detected && 4s have passed -> Report link up + * When link is brought up in link_up -> FBNIC_PMD_SEND_DATA + * FBNIC_PMD_INITIALIZE: + * signal detected -> FBNIC_PMD_TRAINING + */ + if (!signal_detect) { + fbd->pmd_state = FBNIC_PMD_INITIALIZE; + } else if (fbd->pmd_state == FBNIC_PMD_TRAINING && + time_before(fbd->end_of_pmd_training, jiffies)) { + return true; + } else if (fbd->pmd_state == FBNIC_PMD_INITIALIZE) { + fbd->end_of_pmd_training = jiffies + 4 * HZ; + fbd->pmd_state = FBNIC_PMD_TRAINING; + } + + return fbd->pmd_state == FBNIC_PMD_SEND_DATA; +} + +static bool fbnic_mac_get_link(struct fbnic_dev *fbd, u8 aui, u8 fec) { bool link; @@ -538,7 +566,8 @@ static bool fbnic_mac_get_link(struct fbnic_dev *fbd) wr32(fbd, FBNIC_SIG_PCS_INTR_STS, FBNIC_SIG_PCS_INTR_LINK_DOWN | FBNIC_SIG_PCS_INTR_LINK_UP); - link = fbnic_mac_get_pcs_link_status(fbd); + link = fbnic_mac_get_link_status(fbd, aui, fec); + link = fbnic_pmd_update_state(fbd, link); /* Enable interrupt to only capture changes in link state */ wr32(fbd, FBNIC_SIG_PCS_INTR_MASK, @@ -586,20 +615,15 @@ void fbnic_mac_get_fw_settings(struct fbnic_dev *fbd, u8 *aui, u8 *fec) } } -static int fbnic_pcs_enable_asic(struct fbnic_dev *fbd) +static void fbnic_mac_prepare(struct fbnic_dev *fbd, u8 aui, u8 fec) { /* Mask and clear the PCS interrupt, will be enabled by link handler */ wr32(fbd, FBNIC_SIG_PCS_INTR_MASK, ~0); wr32(fbd, FBNIC_SIG_PCS_INTR_STS, ~0); - return 0; -} - -static void fbnic_pcs_disable_asic(struct fbnic_dev *fbd) -{ - /* Mask and clear the PCS interrupt */ - wr32(fbd, FBNIC_SIG_PCS_INTR_MASK, ~0); - wr32(fbd, FBNIC_SIG_PCS_INTR_STS, ~0); + /* If we don't have link tear it all down and start over */ + if (!fbnic_mac_get_link_status(fbd, aui, fec)) + fbd->pmd_state = FBNIC_PMD_INITIALIZE; } static void fbnic_mac_link_down_asic(struct fbnic_dev *fbd) @@ -867,10 +891,9 @@ static int fbnic_mac_get_sensor_asic(struct fbnic_dev *fbd, int id, static const struct fbnic_mac fbnic_mac_asic = { .init_regs = fbnic_mac_init_regs, - .pcs_enable = fbnic_pcs_enable_asic, - .pcs_disable = fbnic_pcs_disable_asic, .get_link = fbnic_mac_get_link, .get_link_event = fbnic_mac_get_link_event, + .prepare = fbnic_mac_prepare, .get_fec_stats = fbnic_mac_get_fec_stats, .get_pcs_stats = fbnic_mac_get_pcs_stats, .get_eth_mac_stats = fbnic_mac_get_eth_mac_stats, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.h b/drivers/net/ethernet/meta/fbnic/fbnic_mac.h index 414c170abcba0..2b08046645f2c 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.h @@ -10,6 +10,23 @@ struct fbnic_dev; #define FBNIC_MAX_JUMBO_FRAME_SIZE 9742 +/* States loosely based on section 136.8.11.7.5 of IEEE 802.3-2022 Ethernet + * Standard. These are needed to track the state of the PHY as it has a delay + * of several seconds from the time link comes up until it has completed + * training that we need to wait to report the link. + * + * Currently we treat training as a single block as this is managed by the + * firmware. + * + * We have FBNIC_PMD_SEND_DATA set to 0 as the expected default at driver load + * and we initialize the structure containing it to zero at allocation. + */ +enum { + FBNIC_PMD_SEND_DATA = 0x0, + FBNIC_PMD_INITIALIZE = 0x1, + FBNIC_PMD_TRAINING = 0x2, +}; + enum { FBNIC_LINK_EVENT_NONE = 0, FBNIC_LINK_EVENT_UP = 1, @@ -55,15 +72,15 @@ enum fbnic_sensor_id { * void (*init_regs)(struct fbnic_dev *fbd); * Initialize MAC registers to enable Tx/Rx paths and FIFOs. * - * void (*pcs_enable)(struct fbnic_dev *fbd); - * Configure and enable PCS to enable link if not already enabled - * void (*pcs_disable)(struct fbnic_dev *fbd); - * Shutdown the link if we are the only consumer of it. - * bool (*get_link)(struct fbnic_dev *fbd); - * Check PCS link status * int (*get_link_event)(struct fbnic_dev *fbd) * Get the current link event status, reports true if link has * changed to either FBNIC_LINK_EVENT_DOWN or FBNIC_LINK_EVENT_UP + * bool (*get_link)(struct fbnic_dev *fbd, u8 aui, u8 fec); + * Check link status + * + * void (*prepare)(struct fbnic_dev *fbd, u8 aui, u8 fec); + * Prepare PHY for init by fetching settings, disabling interrupts, + * and sending an updated PHY config to FW if needed. * * void (*link_down)(struct fbnic_dev *fbd); * Configure MAC for link down event @@ -74,10 +91,10 @@ enum fbnic_sensor_id { struct fbnic_mac { void (*init_regs)(struct fbnic_dev *fbd); - int (*pcs_enable)(struct fbnic_dev *fbd); - void (*pcs_disable)(struct fbnic_dev *fbd); - bool (*get_link)(struct fbnic_dev *fbd); int (*get_link_event)(struct fbnic_dev *fbd); + bool (*get_link)(struct fbnic_dev *fbd, u8 aui, u8 fec); + + void (*prepare)(struct fbnic_dev *fbd, u8 aui, u8 fec); void (*get_fec_stats)(struct fbnic_dev *fbd, bool reset, struct fbnic_fec_stats *fec_stats); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index 2d5ae89b4a154..65318a5b466e4 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -86,10 +86,10 @@ static int fbnic_stop(struct net_device *netdev) { struct fbnic_net *fbn = netdev_priv(netdev); + fbnic_mac_free_irq(fbn->fbd); phylink_suspend(fbn->phylink, fbnic_bmc_present(fbn->fbd)); fbnic_down(fbn); - fbnic_mac_free_irq(fbn->fbd); fbnic_time_stop(fbn); fbnic_fw_xmit_ownership_msg(fbn->fbd, false); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h index b0a87c57910f2..c2e45ff64e347 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h @@ -107,7 +107,7 @@ int fbnic_phylink_ethtool_ksettings_get(struct net_device *netdev, int fbnic_phylink_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam); int fbnic_phylink_init(struct net_device *netdev); - +void fbnic_phylink_pmd_training_complete_notify(struct net_device *netdev); bool fbnic_check_split_frames(struct bpf_prog *prog, unsigned int mtu, u32 hds_threshold); #endif /* _FBNIC_NETDEV_H_ */ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c index 4620f1847f2e0..040bd520b160e 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c @@ -207,6 +207,10 @@ static void fbnic_service_task(struct work_struct *work) { struct fbnic_dev *fbd = container_of(to_delayed_work(work), struct fbnic_dev, service_task); + struct net_device *netdev = fbd->netdev; + + if (netif_running(netdev)) + fbnic_phylink_pmd_training_complete_notify(netdev); rtnl_lock(); @@ -218,13 +222,13 @@ static void fbnic_service_task(struct work_struct *work) fbnic_bmc_rpc_check(fbd); - if (netif_carrier_ok(fbd->netdev)) { - netdev_lock(fbd->netdev); - fbnic_napi_depletion_check(fbd->netdev); - netdev_unlock(fbd->netdev); + if (netif_carrier_ok(netdev)) { + netdev_lock(netdev); + fbnic_napi_depletion_check(netdev); + netdev_unlock(netdev); } - if (netif_running(fbd->netdev)) + if (netif_running(netdev)) schedule_delayed_work(&fbd->service_task, HZ); rtnl_unlock(); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c b/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c index 3c0bd435ee287..27e4073d9898b 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c @@ -132,25 +132,9 @@ fbnic_phylink_pcs_get_state(struct phylink_pcs *pcs, unsigned int neg_mode, state->duplex = DUPLEX_FULL; - state->link = fbd->mac->get_link(fbd); -} - -static int -fbnic_phylink_pcs_enable(struct phylink_pcs *pcs) -{ - struct fbnic_net *fbn = fbnic_pcs_to_net(pcs); - struct fbnic_dev *fbd = fbn->fbd; - - return fbd->mac->pcs_enable(fbd); -} - -static void -fbnic_phylink_pcs_disable(struct phylink_pcs *pcs) -{ - struct fbnic_net *fbn = fbnic_pcs_to_net(pcs); - struct fbnic_dev *fbd = fbn->fbd; - - return fbd->mac->pcs_disable(fbd); + state->link = (fbd->pmd_state == FBNIC_PMD_SEND_DATA) && + (rd32(fbd, FBNIC_PCS(MDIO_STAT1, 0)) & + MDIO_STAT1_LSTATUS); } static int @@ -164,8 +148,6 @@ fbnic_phylink_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, static const struct phylink_pcs_ops fbnic_phylink_pcs_ops = { .pcs_config = fbnic_phylink_pcs_config, - .pcs_enable = fbnic_phylink_pcs_enable, - .pcs_disable = fbnic_phylink_pcs_disable, .pcs_get_state = fbnic_phylink_pcs_get_state, }; @@ -179,12 +161,39 @@ fbnic_phylink_mac_select_pcs(struct phylink_config *config, return &fbn->phylink_pcs; } +static int +fbnic_phylink_mac_prepare(struct phylink_config *config, unsigned int mode, + phy_interface_t iface) +{ + struct net_device *netdev = to_net_dev(config->dev); + struct fbnic_net *fbn = netdev_priv(netdev); + struct fbnic_dev *fbd = fbn->fbd; + + fbd->mac->prepare(fbd, fbn->aui, fbn->fec); + + return 0; +} + static void fbnic_phylink_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) { } +static int +fbnic_phylink_mac_finish(struct phylink_config *config, unsigned int mode, + phy_interface_t iface) +{ + struct net_device *netdev = to_net_dev(config->dev); + struct fbnic_net *fbn = netdev_priv(netdev); + struct fbnic_dev *fbd = fbn->fbd; + + /* Retest the link state and restart interrupts */ + fbd->mac->get_link(fbd, fbn->aui, fbn->fec); + + return 0; +} + static void fbnic_phylink_mac_link_down(struct phylink_config *config, unsigned int mode, phy_interface_t interface) @@ -213,7 +222,9 @@ fbnic_phylink_mac_link_up(struct phylink_config *config, static const struct phylink_mac_ops fbnic_phylink_mac_ops = { .mac_select_pcs = fbnic_phylink_mac_select_pcs, + .mac_prepare = fbnic_phylink_mac_prepare, .mac_config = fbnic_phylink_mac_config, + .mac_finish = fbnic_phylink_mac_finish, .mac_link_down = fbnic_phylink_mac_link_down, .mac_link_up = fbnic_phylink_mac_link_up, }; @@ -254,3 +265,29 @@ int fbnic_phylink_init(struct net_device *netdev) return 0; } + +/** + * fbnic_phylink_pmd_training_complete_notify - PMD training complete notifier + * @netdev: Netdev struct phylink device attached to + * + * When the link first comes up the PMD will have a period of 2 to 3 seconds + * where the link will flutter due to link training. To avoid spamming the + * kernel log with messages about this we add a delay of 4 seconds from the + * time of the last PCS report of link so that we can guarantee we are unlikely + * to see any further link loss events due to link training. + **/ +void fbnic_phylink_pmd_training_complete_notify(struct net_device *netdev) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + struct fbnic_dev *fbd = fbn->fbd; + + if (fbd->pmd_state != FBNIC_PMD_TRAINING) + return; + + if (!time_before(fbd->end_of_pmd_training, jiffies)) + return; + + fbd->pmd_state = FBNIC_PMD_SEND_DATA; + + phylink_pcs_change(&fbn->phylink_pcs, false); +} From 0d71c9f3b9fce5f0c0b0ebb71f1e0a06e31c7ea2 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 10 Nov 2025 08:01:55 -0800 Subject: [PATCH 788/867] fbnic: Cleanup handling for link down event statistics The code for handling link down event tracking wasn't working in the existing code. Specifically we should be tracking unexpected link down events, not expected ones. To do this tracking we can use the pmd_state variable and track cases where we transition from send_data to initialize in the interrupt. These should be the cases where we would be seeing unexpected link down events. In addition we have cases where the PCS will reset following the training due to errors generated while the PMD was training. This will result in a PCS reset which will flap the link. To avoid counting this link flap as the NIC has yet to report link up we will only record the link event if the netif_carrier was already reporeted as present. In order for the stat to have any value we have to display it so this change adds logic to display it as a part of the ethtool stats. Signed-off-by: Alexander Duyck Signed-off-by: NipaLocal --- drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c | 9 +++++++++ drivers/net/ethernet/meta/fbnic/fbnic_irq.c | 13 ++++++++++++- drivers/net/ethernet/meta/fbnic/fbnic_phylink.c | 2 -- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c index 95fac020eb93c..693ebdf387055 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c @@ -1863,6 +1863,14 @@ fbnic_get_rmon_stats(struct net_device *netdev, *ranges = fbnic_rmon_ranges; } +static void fbnic_get_link_ext_stats(struct net_device *netdev, + struct ethtool_link_ext_stats *stats) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + + stats->link_down_events = fbn->link_down_events; +} + static const struct ethtool_ops fbnic_ethtool_ops = { .cap_link_lanes_supported = true, .supported_coalesce_params = ETHTOOL_COALESCE_USECS | @@ -1874,6 +1882,7 @@ static const struct ethtool_ops fbnic_ethtool_ops = { .get_regs_len = fbnic_get_regs_len, .get_regs = fbnic_get_regs, .get_link = ethtool_op_get_link, + .get_link_ext_stats = fbnic_get_link_ext_stats, .get_coalesce = fbnic_get_coalesce, .set_coalesce = fbnic_set_coalesce, .get_ringparam = fbnic_get_ringparam, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c index 9b068b82f30a0..73dd10b7a1a82 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c @@ -122,6 +122,7 @@ static irqreturn_t fbnic_mac_msix_intr(int __always_unused irq, void *data) { struct fbnic_dev *fbd = data; struct fbnic_net *fbn; + u64 link_down_event; if (fbd->mac->get_link_event(fbd) == FBNIC_LINK_EVENT_NONE) { fbnic_wr32(fbd, FBNIC_INTR_MASK_CLEAR(0), @@ -129,11 +130,21 @@ static irqreturn_t fbnic_mac_msix_intr(int __always_unused irq, void *data) return IRQ_HANDLED; } + /* If the link is up this would be a loss event */ + link_down_event = (fbd->pmd_state == FBNIC_PMD_SEND_DATA) ? 1 : 0; + fbn = netdev_priv(fbd->netdev); /* Record link down events */ - if (!fbd->mac->get_link(fbd, fbn->aui, fbn->fec)) + if (!fbd->mac->get_link(fbd, fbn->aui, fbn->fec)) { + /* Do not count link down events if the PCS has yet to + * acknowledge the link. This allows for the flushing out + * PCS errors generated during link training. + */ + if (netif_carrier_ok(fbd->netdev)) + fbn->link_down_events += link_down_event; phylink_pcs_change(&fbn->phylink_pcs, false); + } return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c b/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c index 27e4073d9898b..592e9642a4184 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c @@ -203,8 +203,6 @@ fbnic_phylink_mac_link_down(struct phylink_config *config, unsigned int mode, struct fbnic_dev *fbd = fbn->fbd; fbd->mac->link_down(fbd); - - fbn->link_down_events++; } static void From 235bee47e1f7c6ab6acd688bab63b0697f69258a Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 10 Nov 2025 08:02:02 -0800 Subject: [PATCH 789/867] fbnic: Add SW shim for MDIO interface to PMA/PMD and PCS In order for us to support a phydev and PCS device we need to add an MDIO bus to allow the drivers to have access to the registers for the device. This change adds such an interface. The interface will consist of 2 PHYs each consisting of a PMA/PMD and a PCS located at addresses 0 and 1. There is a need for 2 PHYs due to the fact that in order to support the 2 lane modes we will needed to access and configure the PCS vendor registers and RSFEC registers from the second lane identical to the first. One side effect of this is that we have to report config values for both lanes of the PHY as those registers can be poked and technically they would be valid. For now I am going to have the second lane report speeds equivalent to the given config for 2 lanes as we should be configuring both lanes identical for the 2 lane modes. The plan is in the future to extend out this interface adding RSFEC support to the PMA through a remapping our CSRs which will essentially convert the standard c45 offsets to ones matching the setup within our device. Signed-off-by: Alexander Duyck Signed-off-by: NipaLocal --- drivers/net/ethernet/meta/fbnic/Makefile | 1 + drivers/net/ethernet/meta/fbnic/fbnic.h | 5 + drivers/net/ethernet/meta/fbnic/fbnic_mac.h | 1 + drivers/net/ethernet/meta/fbnic/fbnic_mdio.c | 190 +++++++++++++++++++ drivers/net/ethernet/meta/fbnic/fbnic_pci.c | 3 + 5 files changed, 200 insertions(+) create mode 100644 drivers/net/ethernet/meta/fbnic/fbnic_mdio.c diff --git a/drivers/net/ethernet/meta/fbnic/Makefile b/drivers/net/ethernet/meta/fbnic/Makefile index 15e8ff6496159..72c41af65364b 100644 --- a/drivers/net/ethernet/meta/fbnic/Makefile +++ b/drivers/net/ethernet/meta/fbnic/Makefile @@ -21,6 +21,7 @@ fbnic-y := fbnic_csr.o \ fbnic_pci.o \ fbnic_phylink.o \ fbnic_rpc.o \ + fbnic_mdio.o \ fbnic_time.o \ fbnic_tlv.o \ fbnic_txrx.o \ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h index fac1283d0ade6..779a083b92159 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic.h @@ -95,6 +95,9 @@ struct fbnic_dev { u64 prev_firmware_time; struct fbnic_fw_log fw_log; + + /* MDIO bus for PHYs */ + struct mii_bus *mdio_bus; }; /* Reserve entry 0 in the MSI-X "others" array until we have filled all @@ -204,6 +207,8 @@ void fbnic_dbg_exit(void); void fbnic_rpc_reset_valid_entries(struct fbnic_dev *fbd); +int fbnic_mdiobus_create(struct fbnic_dev *fbd); + void fbnic_csr_get_regs(struct fbnic_dev *fbd, u32 *data, u32 *regs_version); int fbnic_csr_regs_len(struct fbnic_dev *fbd); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.h b/drivers/net/ethernet/meta/fbnic/fbnic_mac.h index 2b08046645f2c..2a9440df5e1d1 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.h @@ -55,6 +55,7 @@ enum { FBNIC_AUI_50GAUI1 = 2, /* 53.125GBd 53.125 * 1 */ FBNIC_AUI_100GAUI2 = 3, /* 106.25GBd 53.125 * 2 */ FBNIC_AUI_UNKNOWN = 4, + __FBNIC_AUI_MAX__ }; #define FBNIC_AUI_MODE_R2 (FBNIC_AUI_LAUI2) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mdio.c b/drivers/net/ethernet/meta/fbnic/fbnic_mdio.c new file mode 100644 index 0000000000000..7eeaeb03529b6 --- /dev/null +++ b/drivers/net/ethernet/meta/fbnic/fbnic_mdio.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) Meta Platforms, Inc. and affiliates. */ + +#include +#include + +#include "fbnic.h" +#include "fbnic_netdev.h" + +#define DW_VENDOR BIT(15) +#define FBNIC_PCS_VENDOR BIT(9) +#define FBNIC_PCS_ZERO_MASK (DW_VENDOR - FBNIC_PCS_VENDOR) + +static int +fbnic_mdio_read_pmapmd(struct fbnic_dev *fbd, int addr, int regnum) +{ + u16 ctrl1[__FBNIC_AUI_MAX__][2] = { + { MDIO_PMA_CTRL1_SPEED25G, MDIO_PMA_CTRL1_SPEED50G }, + { MDIO_PMA_CTRL1_SPEED50G, MDIO_PMA_CTRL1_SPEED50G }, + { MDIO_PMA_CTRL1_SPEED50G, MDIO_PMA_CTRL1_SPEED100G }, + { MDIO_PMA_CTRL1_SPEED100G, MDIO_PMA_CTRL1_SPEED100G }, + { 0, 0 }}; + u8 aui = FBNIC_AUI_UNKNOWN; + struct fbnic_net *fbn; + int ret = 0; + + if (fbd->netdev) { + fbn = netdev_priv(fbd->netdev); + if (fbn->aui < FBNIC_AUI_UNKNOWN) + aui = fbn->aui; + } + + switch (regnum) { + case MDIO_CTRL1: + ret = ctrl1[aui][addr & 1]; + break; + case MDIO_STAT1: + ret = (fbd->pmd_state == FBNIC_PMD_SEND_DATA) ? + MDIO_STAT1_LSTATUS : 0; + break; + case MDIO_DEVID1: + ret = MP_FBNIC_XPCS_PMA_100G_ID >> 16; + break; + case MDIO_DEVID2: + ret = MP_FBNIC_XPCS_PMA_100G_ID & 0xffff; + break; + case MDIO_DEVS1: + ret = MDIO_DEVS_PMAPMD | MDIO_DEVS_PCS; + break; + case MDIO_STAT2: + ret = MDIO_STAT2_DEVPRST_VAL; + break; + default: + break; + } + + dev_dbg(fbd->dev, + "SWMII PMAPMD Rd: Addr: %d RegNum: %d Value: 0x%04x\n", + addr, regnum, ret); + + return ret; +} + +static int +fbnic_mdio_read_pcs(struct fbnic_dev *fbd, int addr, int regnum) +{ + int ret; + + /* Report 0 for reserved registers */ + if (regnum & FBNIC_PCS_ZERO_MASK) + return 0; + + /* Intercept and return correct ID for PCS */ + if (regnum == MDIO_DEVID1) + return DW_XPCS_ID >> 16; + if (regnum == MDIO_DEVID2) + return DW_XPCS_ID & 0xffff; + if (regnum == MDIO_DEVS1) + return MDIO_DEVS_PMAPMD | MDIO_DEVS_PCS; + + /* Swap vendor page bit for FBNIC PCS vendor page bit */ + if (regnum & DW_VENDOR) + regnum ^= DW_VENDOR | FBNIC_PCS_VENDOR; + + ret = fbnic_rd32(fbd, FBNIC_PCS_PAGE(addr) + regnum); + + dev_dbg(fbd->dev, + "SWMII PCS Rd: Addr: %d RegNum: %d Value: 0x%04x\n", + addr, regnum, ret); + + return ret; +} + +static int +fbnic_mdio_read_c45(struct mii_bus *bus, int addr, int devnum, int regnum) +{ + struct fbnic_dev *fbd = bus->priv; + + if (addr & ~1) + return 0; + + if (devnum == MDIO_MMD_PMAPMD) + return fbnic_mdio_read_pmapmd(fbd, addr, regnum); + + if (devnum == MDIO_MMD_PCS) + return fbnic_mdio_read_pcs(fbd, addr, regnum); + + return 0; +} + +static void +fbnic_mdio_write_pmapmd(struct fbnic_dev *fbd, int addr, int regnum, u16 val) +{ + dev_dbg(fbd->dev, + "SWMII PMAPMD Wr: Addr: %d RegNum: %d Value: 0x%04x\n", + addr, regnum, val); +} + +static void +fbnic_mdio_write_pcs(struct fbnic_dev *fbd, int addr, int regnum, u16 val) +{ + /* Skip write for reserved registers */ + if (regnum & FBNIC_PCS_ZERO_MASK) + return; + + /* Swap vendor page bit for FBNIC PCS vendor page bit */ + if (regnum & DW_VENDOR) + regnum ^= DW_VENDOR | FBNIC_PCS_VENDOR; + + fbnic_wr32(fbd, FBNIC_PCS_PAGE(addr) + regnum, val); + + dev_dbg(fbd->dev, + "SWMII PCS Wr: Addr: %d RegNum: %d Value: 0x%04x\n", + addr, regnum, val); +} + +static int +fbnic_mdio_write_c45(struct mii_bus *bus, int addr, int devnum, + int regnum, u16 val) +{ + struct fbnic_dev *fbd = bus->priv; + + if (addr & ~1) + return 0; + + if (devnum == MDIO_MMD_PMAPMD) + fbnic_mdio_write_pmapmd(fbd, addr, regnum, val); + + if (devnum == MDIO_MMD_PCS) + fbnic_mdio_write_pcs(fbd, addr, regnum, val); + + return 0; +} + +/** + * fbnic_mdiobus_create - Create an MDIO bus to allow interfacing w/ PHYs + * @fbd: Pointer to FBNIC device structure to populate bus on + * + * Initialize an MDIO bus and place a pointer to it on the fbd struct. This bus + * will be used to interface with the PMA/PMD and PCS. + * + * Return: 0 on success, negative on failure + **/ +int fbnic_mdiobus_create(struct fbnic_dev *fbd) +{ + struct mii_bus *bus; + int err; + + bus = devm_mdiobus_alloc(fbd->dev); + if (!bus) + return -ENOMEM; + + bus->name = "fbnic_mii_bus"; + bus->read_c45 = &fbnic_mdio_read_c45; + bus->write_c45 = &fbnic_mdio_write_c45; + bus->parent = fbd->dev; + bus->phy_mask = GENMASK(31, 2); + bus->priv = fbd; + snprintf(bus->id, MII_BUS_ID_SIZE, "%s-mii", dev_name(fbd->dev)); + + err = devm_mdiobus_register(fbd->dev, bus); + if (err) { + dev_err(fbd->dev, "Failed to create MDIO bus: %d\n", err); + return err; + } + + fbd->mdio_bus = bus; + + return 0; +} diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c index 040bd520b160e..7991e28700813 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c @@ -339,6 +339,9 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto init_failure_mode; } + if (fbnic_mdiobus_create(fbd)) + goto init_failure_mode; + netdev = fbnic_netdev_alloc(fbd); if (!netdev) { dev_err(&pdev->dev, "Netdev allocation failed\n"); From e3496afdeac27b37077a7c64ebe13e21aaf99e70 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 10 Nov 2025 08:02:09 -0800 Subject: [PATCH 790/867] fbnic: Replace use of internal PCS w/ Designware XPCS As we have exposed the PCS registers via the SWMII we can now start looking at connecting the XPCS driver to those registers and let it mange the PCS instead of us doing it directly from the fbnic driver. For now this just gets us the ability to detect link. The hop is in the future to add some of the vendor specific registers to being enabling XPCS configuration of the interface. Signed-off-by: Alexander Duyck Signed-off-by: NipaLocal --- drivers/net/ethernet/meta/Kconfig | 1 + drivers/net/ethernet/meta/fbnic/fbnic_irq.c | 2 +- .../net/ethernet/meta/fbnic/fbnic_netdev.c | 7 +- .../net/ethernet/meta/fbnic/fbnic_netdev.h | 4 +- .../net/ethernet/meta/fbnic/fbnic_phylink.c | 104 ++++++++---------- 5 files changed, 55 insertions(+), 63 deletions(-) diff --git a/drivers/net/ethernet/meta/Kconfig b/drivers/net/ethernet/meta/Kconfig index dff51f23d295e..ca5c7ac2a5bc2 100644 --- a/drivers/net/ethernet/meta/Kconfig +++ b/drivers/net/ethernet/meta/Kconfig @@ -26,6 +26,7 @@ config FBNIC depends on PTP_1588_CLOCK_OPTIONAL select NET_DEVLINK select PAGE_POOL + select PCS_XPCS select PHYLINK select PLDMFW help diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c index 73dd10b7a1a82..f2ccb33fa67ae 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c @@ -143,7 +143,7 @@ static irqreturn_t fbnic_mac_msix_intr(int __always_unused irq, void *data) */ if (netif_carrier_ok(fbd->netdev)) fbn->link_down_events += link_down_event; - phylink_pcs_change(&fbn->phylink_pcs, false); + phylink_pcs_change(fbn->pcs, false); } return IRQ_HANDLED; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index 65318a5b466e4..81c9d5c9a4b2c 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -697,10 +697,7 @@ void fbnic_reset_queues(struct fbnic_net *fbn, **/ void fbnic_netdev_free(struct fbnic_dev *fbd) { - struct fbnic_net *fbn = netdev_priv(fbd->netdev); - - if (fbn->phylink) - phylink_destroy(fbn->phylink); + fbnic_phylink_destroy(fbd->netdev); free_netdev(fbd->netdev); fbd->netdev = NULL; @@ -802,7 +799,7 @@ struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd) netif_tx_stop_all_queues(netdev); - if (fbnic_phylink_init(netdev)) { + if (fbnic_phylink_create(netdev)) { fbnic_netdev_free(fbd); return NULL; } diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h index c2e45ff64e347..54a8bf172fa61 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h @@ -44,7 +44,7 @@ struct fbnic_net { struct phylink *phylink; struct phylink_config phylink_config; - struct phylink_pcs phylink_pcs; + struct phylink_pcs *pcs; u8 aui; u8 fec; @@ -106,6 +106,8 @@ int fbnic_phylink_ethtool_ksettings_get(struct net_device *netdev, struct ethtool_link_ksettings *cmd); int fbnic_phylink_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam); +int fbnic_phylink_create(struct net_device *netdev); +void fbnic_phylink_destroy(struct net_device *netdev); int fbnic_phylink_init(struct net_device *netdev); void fbnic_phylink_pmd_training_complete_notify(struct net_device *netdev); bool fbnic_check_split_frames(struct bpf_prog *prog, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c b/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c index 592e9642a4184..188155f43416e 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) Meta Platforms, Inc. and affiliates. */ +#include #include #include @@ -101,56 +102,6 @@ int fbnic_phylink_get_fecparam(struct net_device *netdev, return 0; } -static struct fbnic_net * -fbnic_pcs_to_net(struct phylink_pcs *pcs) -{ - return container_of(pcs, struct fbnic_net, phylink_pcs); -} - -static void -fbnic_phylink_pcs_get_state(struct phylink_pcs *pcs, unsigned int neg_mode, - struct phylink_link_state *state) -{ - struct fbnic_net *fbn = fbnic_pcs_to_net(pcs); - struct fbnic_dev *fbd = fbn->fbd; - - switch (fbn->aui) { - case FBNIC_AUI_25GAUI: - state->speed = SPEED_25000; - break; - case FBNIC_AUI_LAUI2: - case FBNIC_AUI_50GAUI1: - state->speed = SPEED_50000; - break; - case FBNIC_AUI_100GAUI2: - state->speed = SPEED_100000; - break; - default: - state->link = 0; - return; - } - - state->duplex = DUPLEX_FULL; - - state->link = (fbd->pmd_state == FBNIC_PMD_SEND_DATA) && - (rd32(fbd, FBNIC_PCS(MDIO_STAT1, 0)) & - MDIO_STAT1_LSTATUS); -} - -static int -fbnic_phylink_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, - phy_interface_t interface, - const unsigned long *advertising, - bool permit_pause_to_mac) -{ - return 0; -} - -static const struct phylink_pcs_ops fbnic_phylink_pcs_ops = { - .pcs_config = fbnic_phylink_pcs_config, - .pcs_get_state = fbnic_phylink_pcs_get_state, -}; - static struct phylink_pcs * fbnic_phylink_mac_select_pcs(struct phylink_config *config, phy_interface_t interface) @@ -158,7 +109,7 @@ fbnic_phylink_mac_select_pcs(struct phylink_config *config, struct net_device *netdev = to_net_dev(config->dev); struct fbnic_net *fbn = netdev_priv(netdev); - return &fbn->phylink_pcs; + return fbn->pcs; } static int @@ -227,13 +178,33 @@ static const struct phylink_mac_ops fbnic_phylink_mac_ops = { .mac_link_up = fbnic_phylink_mac_link_up, }; -int fbnic_phylink_init(struct net_device *netdev) +/** + * fbnic_phylink_create - Phylink device creation + * @netdev: Network Device struct to attach phylink device + * + * Initialize and attach a phylink instance to the device. The phylink + * device will make use of the netdev struct to track carrier and will + * eventually be used to expose the current state of the MAC and PCS + * setup. + * + * Return: 0 on success, negative on failure + **/ +int fbnic_phylink_create(struct net_device *netdev) { struct fbnic_net *fbn = netdev_priv(netdev); struct fbnic_dev *fbd = fbn->fbd; + struct phylink_pcs *pcs; struct phylink *phylink; + int err; + + pcs = xpcs_create_pcs_mdiodev(fbd->mdio_bus, 0); + if (IS_ERR(pcs)) { + err = PTR_ERR(pcs); + dev_err(fbd->dev, "Failed to create PCS device: %d\n", err); + return err; + } - fbn->phylink_pcs.ops = &fbnic_phylink_pcs_ops; + fbn->pcs = pcs; fbn->phylink_config.dev = &netdev->dev; fbn->phylink_config.type = PHYLINK_NETDEV; @@ -256,14 +227,35 @@ int fbnic_phylink_init(struct net_device *netdev) phylink = phylink_create(&fbn->phylink_config, NULL, fbnic_phylink_select_interface(fbn->aui), &fbnic_phylink_mac_ops); - if (IS_ERR(phylink)) - return PTR_ERR(phylink); + if (IS_ERR(phylink)) { + err = PTR_ERR(phylink); + dev_err(netdev->dev.parent, + "Failed to create Phylink interface, err: %d\n", err); + xpcs_destroy_pcs(pcs); + return err; + } fbn->phylink = phylink; return 0; } +/** + * fbnic_phylink_destroy - Teardown phylink related interfaces + * @netdev: Network Device struct containing phylink device + * + * Detach and free resources related to phylink interface. + **/ +void fbnic_phylink_destroy(struct net_device *netdev) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + + if (fbn->phylink) + phylink_destroy(fbn->phylink); + if (fbn->pcs) + xpcs_destroy_pcs(fbn->pcs); +} + /** * fbnic_phylink_pmd_training_complete_notify - PMD training complete notifier * @netdev: Netdev struct phylink device attached to @@ -287,5 +279,5 @@ void fbnic_phylink_pmd_training_complete_notify(struct net_device *netdev) fbd->pmd_state = FBNIC_PMD_SEND_DATA; - phylink_pcs_change(&fbn->phylink_pcs, false); + phylink_pcs_change(fbn->pcs, false); } From 7af9a1c9b65e0e89614becb211f269134f02cf89 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 10 Nov 2025 19:23:40 +0100 Subject: [PATCH 791/867] selftests: mptcp: connect: fix fallback note due to OoO The "fallback due to TCP OoO" was never printed because the stat_ooo_now variable was checked twice: once in the parent if-statement, and one in the child one. The second condition was then always true then, and the 'else' branch was never taken. The idea is that when there are more ACK + MP_CAPABLE than expected, the test either fails if there was no out of order packets, or a notice is printed. Fixes: 69ca3d29a755 ("mptcp: update selftest for fallback due to OoO") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: NipaLocal --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 47ecb5b3836eb..9b7b93f8eb0c3 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -492,7 +492,7 @@ do_transfer() "than expected (${expect_synrx})" retc=1 fi - if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then + if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ]; then if [ ${stat_ooo_now} -eq 0 ]; then mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx_now_l})" \ "than expected (${expect_ackrx})" From c9d97da78e96b93b4a3aa7a284d782937309886e Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 10 Nov 2025 19:23:41 +0100 Subject: [PATCH 792/867] selftests: mptcp: join: rm: set backup flag Some of these 'remove' tests rarely fail because a subflow has been reset instead of cleanly removed. This can happen when one extra subflow which has never carried data is being closed (FIN) on one side, while the other is sending data for the first time. To avoid such subflows to be used right at the end, the backup flag has been added. With that, data will be only carried on the initial subflow. Fixes: d2c4333a801c ("selftests: mptcp: add testcases for removing addrs") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: NipaLocal --- .../testing/selftests/net/mptcp/mptcp_join.sh | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 4faf58fecc94f..2c491163fc275 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2547,7 +2547,7 @@ remove_tests() if reset "remove single subflow"; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup addr_nr_ns2=-1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 @@ -2560,8 +2560,8 @@ remove_tests() if reset "remove multiple subflows"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 - pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup addr_nr_ns2=-2 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 @@ -2572,7 +2572,7 @@ remove_tests() # single address, remove if reset "remove single address"; then pm_nl_set_limits $ns1 0 1 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 1 addr_nr_ns1=-1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2585,9 +2585,9 @@ remove_tests() # subflow and signal, remove if reset "remove subflow and signal"; then pm_nl_set_limits $ns1 0 2 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 2 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 @@ -2599,10 +2599,10 @@ remove_tests() # subflows and signal, remove if reset "remove subflows and signal"; then pm_nl_set_limits $ns1 0 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 3 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup addr_nr_ns1=-1 addr_nr_ns2=-2 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -2614,9 +2614,9 @@ remove_tests() # addresses remove if reset "remove addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250 - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup pm_nl_set_limits $ns2 3 3 addr_nr_ns1=-3 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 @@ -2629,10 +2629,10 @@ remove_tests() # invalid addresses remove if reset "remove invalid addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.12.1 flags signal + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup # broadcast IP: no packet for this address will be received on ns1 - pm_nl_add_endpoint $ns1 224.0.0.1 flags signal - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 224.0.0.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup pm_nl_set_limits $ns2 2 2 addr_nr_ns1=-3 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 @@ -2646,10 +2646,10 @@ remove_tests() # subflows and signal, flush if reset "flush subflows and signal"; then pm_nl_set_limits $ns1 0 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 3 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -2662,9 +2662,9 @@ remove_tests() if reset "flush subflows"; then pm_nl_set_limits $ns1 3 3 pm_nl_set_limits $ns2 3 3 - pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup id 150 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -2681,9 +2681,9 @@ remove_tests() # addresses flush if reset "flush addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250 - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup pm_nl_set_limits $ns2 3 3 addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2696,9 +2696,9 @@ remove_tests() # invalid addresses flush if reset "flush invalid addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.12.1 flags signal - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.14.1 flags signal + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.14.1 flags signal,backup pm_nl_set_limits $ns2 3 3 addr_nr_ns1=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 From 1aa81ed22e7ebd148ab6e836622973c0f814e3e4 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 10 Nov 2025 19:23:42 +0100 Subject: [PATCH 793/867] selftests: mptcp: join: endpoints: longer transfer In rare cases, when the test environment is very slow, some userspace tests can fail because some expected events have not been seen. Because the tests are expecting a long on-going connection, and they are not waiting for the end of the transfer, it is fine to make the connection longer. This connection will be killed at the end, after the verifications, so making it longer doesn't change anything, apart from avoid it to end before the end of the verifications To play it safe, all endpoints tests not waiting for the end of the transfer are now sharing a longer file (128KB) at slow speed. Fixes: 69c6ce7b6eca ("selftests: mptcp: add implicit endpoint test case") Cc: stable@vger.kernel.org Fixes: e274f7154008 ("selftests: mptcp: add subflow limits test-cases") Fixes: b5e2fb832f48 ("selftests: mptcp: add explicit test case for remove/readd") Fixes: e06959e9eebd ("selftests: mptcp: join: test for flush/re-add endpoints") Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: NipaLocal --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 2c491163fc275..7c121d5609143 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -4089,7 +4089,7 @@ endpoint_tests() pm_nl_set_limits $ns1 2 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - { speed=slow \ + { test_linkfail=128 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! @@ -4116,7 +4116,7 @@ endpoint_tests() pm_nl_set_limits $ns2 0 3 pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow - { test_linkfail=4 speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! @@ -4194,7 +4194,7 @@ endpoint_tests() # broadcast IP: no packet for this address will be received on ns1 pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal - { test_linkfail=4 speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! @@ -4267,7 +4267,7 @@ endpoint_tests() # broadcast IP: no packet for this address will be received on ns1 pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow - { test_linkfail=4 speed=20 \ + { test_linkfail=128 speed=20 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! From 5461e7c4e58cb4d22c1384bc052f4d159cf3ddea Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 10 Nov 2025 19:23:43 +0100 Subject: [PATCH 794/867] selftests: mptcp: join: userspace: longer transfer In rare cases, when the test environment is very slow, some userspace tests can fail because some expected events have not been seen. Because the tests are expecting a long on-going connection, and they are not waiting for the end of the transfer, it is fine to make the connection longer. This connection will be killed at the end, after the verifications, so making it longer doesn't change anything, apart from avoid it to end before the end of the verifications To play it safe, all userspace tests not waiting for the end of the transfer are now sharing a longer file (128KB) at slow speed. Fixes: 4369c198e599 ("selftests: mptcp: test userspace pm out of transfer") Cc: stable@vger.kernel.org Fixes: b2e2248f365a ("selftests: mptcp: userspace pm create id 0 subflow") Fixes: e3b47e460b4b ("selftests: mptcp: userspace pm remove initial subflow") Fixes: b9fb176081fb ("selftests: mptcp: userspace pm send RM_ADDR for ID 0") Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: NipaLocal --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 7c121d5609143..553c5a4127f0d 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3952,7 +3952,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 2 2 - { speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns1 @@ -3985,7 +3985,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - { speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 @@ -4013,7 +4013,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - { speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 @@ -4034,7 +4034,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - { speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 @@ -4058,7 +4058,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 1 1 - { speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns1 From 243a60056baf919ff3d5ba3740c36fdb6454ee65 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 10 Nov 2025 19:23:44 +0100 Subject: [PATCH 795/867] selftests: mptcp: connect: trunc: read all recv data MPTCP Join "fastclose server" selftest is sometimes failing because the client output file doesn't have the expected size, e.g. 296B instead of 1024B. When looking at a packet trace when this happens, the server sent the expected 1024B in two parts -- 100B, then 924B -- then the MP_FASTCLOSE. It is then strange to see the client only receiving 296B, which would mean it only got a part of the second packet. The problem is then not on the networking side, but rather on the data reception side. When mptcp_connect is launched with '-f -1', it means the connection might stop before having sent everything, because a reset has been received. When this happens, the program was directly stopped. But it is also possible there are still some data to read, simply because the previous 'read' step was done with a buffer smaller than the pending data, see do_rnd_read(). In this case, it is important to read what's left in the kernel buffers before stopping without error like before. SIGPIPE is now ignored, not to quit the app before having read everything. Fixes: 6bf41020b72b ("selftests: mptcp: update and extend fastclose test-cases") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: NipaLocal --- .../selftests/net/mptcp/mptcp_connect.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index c030b08a71957..404a77bf366a8 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -710,8 +710,14 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len); if (bw < 0) { - if (cfg_rcv_trunc) - return 0; + /* expected reset, continue to read */ + if (cfg_rcv_trunc && + (errno == ECONNRESET || + errno == EPIPE)) { + fds.events &= ~POLLOUT; + continue; + } + perror("write"); return 111; } @@ -737,8 +743,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, } if (fds.revents & (POLLERR | POLLNVAL)) { - if (cfg_rcv_trunc) - return 0; + if (cfg_rcv_trunc) { + fds.events &= ~(POLLERR | POLLNVAL); + continue; + } fprintf(stderr, "Unexpected revents: " "POLLERR/POLLNVAL(%x)\n", fds.revents); return 5; @@ -1441,7 +1449,7 @@ static void parse_opts(int argc, char **argv) */ if (cfg_truncate < 0) { cfg_rcv_trunc = true; - signal(SIGPIPE, handle_signal); + signal(SIGPIPE, SIG_IGN); } break; case 'j': From 1e911ac72c92b7ad14c2bf16f8880f54219ab64f Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 10 Nov 2025 19:23:45 +0100 Subject: [PATCH 796/867] selftests: mptcp: join: properly kill background tasks The 'run_tests' function is executed in the background, but killing its associated PID would not kill the children tasks running in the background. To properly kill all background tasks, 'kill -- -PID' could be used, but this requires kill from procps-ng. Instead, all children tasks are listed using 'ps', and 'kill' is called with all PIDs of this group. Fixes: 31ee4ad86afd ("selftests: mptcp: join: stop transfer when check is done (part 1)") Cc: stable@vger.kernel.org Fixes: 04b57c9e096a ("selftests: mptcp: join: stop transfer when check is done (part 2)") Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: NipaLocal --- .../testing/selftests/net/mptcp/mptcp_join.sh | 18 ++++++++-------- .../testing/selftests/net/mptcp/mptcp_lib.sh | 21 +++++++++++++++++++ 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 553c5a4127f0d..f0efbf9856259 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3977,7 +3977,7 @@ userspace_tests() chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm create destroy subflow @@ -4005,7 +4005,7 @@ userspace_tests() chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm create id 0 subflow @@ -4026,7 +4026,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 2 2 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm remove initial subflow @@ -4050,7 +4050,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm send RM_ADDR for ID 0 @@ -4076,7 +4076,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi } @@ -4106,7 +4106,7 @@ endpoint_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags signal pm_nl_check_endpoint "modif is allowed" \ $ns2 10.0.2.2 id 1 flags signal - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT && @@ -4161,7 +4161,7 @@ endpoint_tests() chk_mptcp_info subflows 3 subflows 3 done - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid kill_events_pids chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1 @@ -4235,7 +4235,7 @@ endpoint_tests() wait_mpj $ns2 chk_subflow_nr "after re-re-add ID 0" 3 chk_mptcp_info subflows 3 subflows 3 - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid kill_events_pids chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1 @@ -4283,7 +4283,7 @@ endpoint_tests() wait_mpj $ns2 pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal wait_mpj $ns2 - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid join_syn_tx=3 join_connect_err=1 \ chk_join_nr 2 2 2 diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index d62e653d48b0f..f4388900016ab 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -350,6 +350,27 @@ mptcp_lib_kill_wait() { wait "${1}" 2>/dev/null } +# $1: PID +mptcp_lib_pid_list_children() { + local curr="${1}" + # evoke 'ps' only once + local pids="${2:-"$(ps o pid,ppid)"}" + + echo "${curr}" + + local pid + for pid in $(echo "${pids}" | awk "\$2 == ${curr} { print \$1 }"); do + mptcp_lib_pid_list_children "${pid}" "${pids}" + done +} + +# $1: PID +mptcp_lib_kill_group_wait() { + # Some users might not have procps-ng: cannot use "kill -- -PID" + mptcp_lib_pid_list_children "${1}" | xargs -r kill &>/dev/null + wait "${1}" 2>/dev/null +} + # $1: IP address mptcp_lib_is_v6() { [ -z "${1##*:*}" ] From 6461467628cc5fbbd705bfe64a3ea38e2c49b7d8 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 10 Nov 2025 13:55:34 -0700 Subject: [PATCH 797/867] net: netcp: ethss: Fix type of first parameter in hwtstamp stubs When building without CONFIG_TI_CPTS, there are a series of errors from -Wincompatible-pointer-types: drivers/net/ethernet/ti/netcp_ethss.c:3831:27: error: initialization of 'int (*)(void *, struct kernel_hwtstamp_config *)' from incompatible pointer type 'int (*)(struct gbe_intf *, struct kernel_hwtstamp_config *)' [-Wincompatible-pointer-types] 3831 | .hwtstamp_get = gbe_hwtstamp_get, | ^~~~~~~~~~~~~~~~ drivers/net/ethernet/ti/netcp_ethss.c:3831:27: note: (near initialization for 'gbe_module.hwtstamp_get') drivers/net/ethernet/ti/netcp_ethss.c:2758:19: note: 'gbe_hwtstamp_get' declared here 2758 | static inline int gbe_hwtstamp_get(struct gbe_intf *gbe_intf, | ^~~~~~~~~~~~~~~~ drivers/net/ethernet/ti/netcp_ethss.c:3832:27: error: initialization of 'int (*)(void *, struct kernel_hwtstamp_config *, struct netlink_ext_ack *)' from incompatible pointer type 'int (*)(struct gbe_intf *, struct kernel_hwtstamp_config *, struct netlink_ext_ack *)' [-Wincompatible-pointer-types] 3832 | .hwtstamp_set = gbe_hwtstamp_set, | ^~~~~~~~~~~~~~~~ drivers/net/ethernet/ti/netcp_ethss.c:3832:27: note: (near initialization for 'gbe_module.hwtstamp_set') drivers/net/ethernet/ti/netcp_ethss.c:2764:19: note: 'gbe_hwtstamp_set' declared here 2764 | static inline int gbe_hwtstamp_set(struct gbe_intf *gbe_intf, | ^~~~~~~~~~~~~~~~ In a recent conversion to ndo_hwtstamp, the type of the first parameter was updated for the CONFIG_TI_CPTS=y implementations of gbe_hwtstamp_get() and gbe_hwtstamp_set() but not the CONFIG_TI_CPTS=n ones. Update the type of the first parameter in the CONFIG_TI_CPTS=n stubs to resolve the errors. Fixes: 3f02b8272557 ("ti: netcp: convert to ndo_hwtstamp callbacks") Reviewed-by: Vadim Fedorenko Signed-off-by: Nathan Chancellor Signed-off-by: NipaLocal --- drivers/net/ethernet/ti/netcp_ethss.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 0ae44112812cf..4f6cc6cd1f030 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -2755,13 +2755,13 @@ static inline void gbe_unregister_cpts(struct gbe_priv *gbe_dev) { } -static inline int gbe_hwtstamp_get(struct gbe_intf *gbe_intf, +static inline int gbe_hwtstamp_get(void *intf_priv, struct kernel_hwtstamp_config *cfg) { return -EOPNOTSUPP; } -static inline int gbe_hwtstamp_set(struct gbe_intf *gbe_intf, +static inline int gbe_hwtstamp_set(void *intf_priv, struct kernel_hwtstamp_config *cfg, struct netlink_ext_ack *extack) { From ab65428e2749a49b8d51be08cd3cdb527d53aa95 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 10 Nov 2025 22:20:26 +0100 Subject: [PATCH 798/867] net: phy: fixed_phy: initialize the link status as up All callers initialize the link status as up. This change is in line with how of_phy_register_fixed_link() behaves. Signed-off-by: Heiner Kallweit Signed-off-by: NipaLocal --- drivers/net/phy/fixed_phy.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index 9bd6937411e43..715f0356f895f 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -174,13 +174,11 @@ struct phy_device *fixed_phy_register(const struct fixed_phy_status *status, } /* propagate the fixed link values to struct phy_device */ - phy->link = status->link; - if (status->link) { - phy->speed = status->speed; - phy->duplex = status->duplex; - phy->pause = status->pause; - phy->asym_pause = status->asym_pause; - } + phy->link = 1; + phy->speed = status->speed; + phy->duplex = status->duplex; + phy->pause = status->pause; + phy->asym_pause = status->asym_pause; of_node_get(np); phy->mdio.dev.of_node = np; @@ -224,7 +222,6 @@ EXPORT_SYMBOL_GPL(fixed_phy_register); struct phy_device *fixed_phy_register_100fd(void) { static const struct fixed_phy_status status = { - .link = true, .speed = SPEED_100, .duplex = DUPLEX_FULL, }; From bdff92372d767aa6dca124954914bbdc83cecd8f Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Tue, 11 Nov 2025 00:04:36 +0100 Subject: [PATCH 799/867] ipv6: clear RA flags when adding a static route When an IPv6 Router Advertisement (RA) is received for a prefix, the kernel creates the corresponding on-link route with flags RTF_ADDRCONF and RTF_PREFIX_RT configured and RTF_EXPIRES if lifetime is set. If later a user configures a static IPv6 address on the same prefix the kernel clears the RTF_EXPIRES flag but it doesn't clear the RTF_ADDRCONF and RTF_PREFIX_RT. When the next RA for that prefix is received, the kernel sees the route as RA-learned and wrongly configures back the lifetime. This is problematic because if the route expires, the static address won't have the corresponding on-link route. This fix clears the RTF_ADDRCONF and RTF_PREFIX_RT flags preventing that the lifetime is configured when the next RA arrives. If the static address is deleted, the route becomes RA-learned again. Fixes: 14ef37b6d00e ("ipv6: fix route lookup in addrconf_prefix_rcv()") Reported-by: Garri Djavadyan Closes: https://lore.kernel.org/netdev/ba807d39aca5b4dcf395cc11dca61a130a52cfd3.camel@gmail.com/ Signed-off-by: Fernando Fernandez Mancera Signed-off-by: NipaLocal --- net/ipv6/ip6_fib.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 02c16909f6182..2111af022d946 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1138,6 +1138,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, fib6_set_expires(iter, rt->expires); fib6_add_gc_list(iter); } + if (!(rt->fib6_flags & (RTF_ADDRCONF | RTF_PREFIX_RT))) { + iter->fib6_flags &= ~RTF_ADDRCONF; + iter->fib6_flags &= ~RTF_PREFIX_RT; + } if (rt->fib6_pmtu) fib6_metric_set(iter, RTAX_MTU, From 8d7eb1a3f535084bcefd900cd9454e6dba1eec08 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Tue, 11 Nov 2025 14:02:50 +0800 Subject: [PATCH 800/867] mptcp: disallow MPTCP subflows from sockmap The sockmap feature allows bpf syscall from userspace, or based on bpf sockops, replacing the sk_prot of sockets during protocol stack processing with sockmap's custom read/write interfaces. ''' tcp_rcv_state_process() subflow_syn_recv_sock() tcp_init_transfer(BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) bpf_skops_established <== sockops bpf_sock_map_update(sk) <== call bpf helper tcp_bpf_update_proto() <== update sk_prot ''' Consider two scenarios: 1. When the server has MPTCP enabled and the client also requests MPTCP, the sk passed to the BPF program is a subflow sk. Since subflows only handle partial data, replacing their sk_prot is meaningless and will cause traffic disruption. 2. When the server has MPTCP enabled but the client sends a TCP SYN without MPTCP, subflow_syn_recv_sock() performs a fallback on the subflow, replacing the subflow sk's sk_prot with the native sk_prot. ''' subflow_ulp_fallback() subflow_drop_ctx() mptcp_subflow_ops_undo_override() ''' Subsequently, accept::mptcp_stream_accept::mptcp_fallback_tcp_ops() converts the subflow to plain TCP. For the first case, we should prevent it from being combined with sockmap by setting sk_prot->psock_update_sk_prot to NULL, which will be blocked by sockmap's own flow. For the second case, since subflow_syn_recv_sock() has already restored sk_prot to native tcp_prot/tcpv6_prot, no further action is needed. Fixes: cec37a6e41aa ("mptcp: Handle MP_CAPABLE options for outgoing connections") Cc: Signed-off-by: Jiayuan Chen Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: NipaLocal --- net/mptcp/subflow.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 30961b3d17028..ddd0fc6fcf453 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -2144,6 +2144,10 @@ void __init mptcp_subflow_init(void) tcp_prot_override = tcp_prot; tcp_prot_override.release_cb = tcp_release_cb_override; tcp_prot_override.diag_destroy = tcp_abort_override; +#ifdef CONFIG_BPF_SYSCALL + /* Disable sockmap processing for subflows */ + tcp_prot_override.psock_update_sk_prot = NULL; +#endif #if IS_ENABLED(CONFIG_MPTCP_IPV6) /* In struct mptcp_subflow_request_sock, we assume the TCP request sock @@ -2180,6 +2184,10 @@ void __init mptcp_subflow_init(void) tcpv6_prot_override = tcpv6_prot; tcpv6_prot_override.release_cb = tcp_release_cb_override; tcpv6_prot_override.diag_destroy = tcp_abort_override; +#ifdef CONFIG_BPF_SYSCALL + /* Disable sockmap processing for subflows */ + tcpv6_prot_override.psock_update_sk_prot = NULL; +#endif #endif mptcp_diag_subflow_init(&subflow_ulp_ops); From 6be1486fb7b39e182737b0a8cee2a3751aa8992c Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Tue, 11 Nov 2025 14:02:51 +0800 Subject: [PATCH 801/867] net,mptcp: fix proto fallback detection with BPF The sockmap feature allows bpf syscall from userspace, or based on bpf sockops, replacing the sk_prot of sockets during protocol stack processing with sockmap's custom read/write interfaces. ''' tcp_rcv_state_process() syn_recv_sock()/subflow_syn_recv_sock() tcp_init_transfer(BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) bpf_skops_established <== sockops bpf_sock_map_update(sk) <== call bpf helper tcp_bpf_update_proto() <== update sk_prot ''' When the server has MPTCP enabled but the client sends a TCP SYN without MPTCP, subflow_syn_recv_sock() performs a fallback on the subflow, replacing the subflow sk's sk_prot with the native sk_prot. ''' subflow_syn_recv_sock() subflow_ulp_fallback() subflow_drop_ctx() mptcp_subflow_ops_undo_override() ''' Then, this subflow can be normally used by sockmap, which replaces the native sk_prot with sockmap's custom sk_prot. The issue occurs when the user executes accept::mptcp_stream_accept::mptcp_fallback_tcp_ops(). Here, it uses sk->sk_prot to compare with the native sk_prot, but this is incorrect when sockmap is used, as we may incorrectly set sk->sk_socket->ops. This fix uses the more generic sk_family for the comparison instead. Additionally, this also prevents a WARNING from occurring: result from ./scripts/decode_stacktrace.sh: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 337 at net/mptcp/protocol.c:68 mptcp_stream_accept \ (net/mptcp/protocol.c:4005) Modules linked in: ... PKRU: 55555554 Call Trace: do_accept (net/socket.c:1989) __sys_accept4 (net/socket.c:2028 net/socket.c:2057) __x64_sys_accept (net/socket.c:2067) x64_sys_call (arch/x86/entry/syscall_64.c:41) do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) RIP: 0033:0x7f87ac92b83d ---[ end trace 0000000000000000 ]--- Fixes: cec37a6e41aa ("mptcp: Handle MP_CAPABLE options for outgoing connections") Cc: Signed-off-by: Jiayuan Chen Reviewed-by: Jakub Sitnicki Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: NipaLocal --- net/mptcp/protocol.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 4cd5df01446e3..b5e5e130b158b 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -61,11 +61,13 @@ static u64 mptcp_wnd_end(const struct mptcp_sock *msk) static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk) { + unsigned short family = READ_ONCE(sk->sk_family); + #if IS_ENABLED(CONFIG_MPTCP_IPV6) - if (sk->sk_prot == &tcpv6_prot) + if (family == AF_INET6) return &inet6_stream_ops; #endif - WARN_ON_ONCE(sk->sk_prot != &tcp_prot); + WARN_ON_ONCE(family != AF_INET); return &inet_stream_ops; } From 5bec022c18342720bcdf0420c637f500ca2bef09 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Tue, 11 Nov 2025 14:02:52 +0800 Subject: [PATCH 802/867] selftests/bpf: Add mptcp test with sockmap Add test cases to verify that when MPTCP falls back to plain TCP sockets, they can properly work with sockmap. Additionally, add test cases to ensure that sockmap correctly rejects MPTCP sockets as expected. Signed-off-by: Jiayuan Chen Acked-by: Matthieu Baerts (NGI0) Signed-off-by: NipaLocal --- .../testing/selftests/bpf/prog_tests/mptcp.c | 141 ++++++++++++++++++ .../selftests/bpf/progs/mptcp_sockmap.c | 43 ++++++ 2 files changed, 184 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/mptcp_sockmap.c diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index f8eb7f9d4fd20..b976fe6263434 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -6,11 +6,14 @@ #include #include #include +#include #include "cgroup_helpers.h" #include "network_helpers.h" +#include "socket_helpers.h" #include "mptcp_sock.skel.h" #include "mptcpify.skel.h" #include "mptcp_subflow.skel.h" +#include "mptcp_sockmap.skel.h" #define NS_TEST "mptcp_ns" #define ADDR_1 "10.0.1.1" @@ -436,6 +439,142 @@ static void test_subflow(void) close(cgroup_fd); } +/* Test sockmap on MPTCP server handling non-mp-capable clients. */ +static void test_sockmap_with_mptcp_fallback(struct mptcp_sockmap *skel) +{ + int listen_fd = -1, client_fd1 = -1, client_fd2 = -1; + int server_fd1 = -1, server_fd2 = -1, sent, recvd; + char snd[9] = "123456789"; + char rcv[10]; + + /* start server with MPTCP enabled */ + listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0); + if (!ASSERT_OK_FD(listen_fd, "sockmap-fb:start_mptcp_server")) + return; + + skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd)); + skel->bss->sk_index = 0; + /* create client without MPTCP enabled */ + client_fd1 = connect_to_fd_opts(listen_fd, NULL); + if (!ASSERT_OK_FD(client_fd1, "sockmap-fb:connect_to_fd")) + goto end; + + server_fd1 = xaccept_nonblock(listen_fd, NULL, NULL); + skel->bss->sk_index = 1; + client_fd2 = connect_to_fd_opts(listen_fd, NULL); + if (!ASSERT_OK_FD(client_fd2, "sockmap-fb:connect_to_fd")) + goto end; + + server_fd2 = xaccept_nonblock(listen_fd, NULL, NULL); + /* test normal redirect behavior: data sent by client_fd1 can be + * received by client_fd2 + */ + skel->bss->redirect_idx = 1; + sent = xsend(client_fd1, snd, sizeof(snd), 0); + if (!ASSERT_EQ(sent, sizeof(snd), "sockmap-fb:xsend(client_fd1)")) + goto end; + + /* try to recv more bytes to avoid truncation check */ + recvd = recv_timeout(client_fd2, rcv, sizeof(rcv), MSG_DONTWAIT, 2); + if (!ASSERT_EQ(recvd, sizeof(snd), "sockmap-fb:recv(client_fd2)")) + goto end; + +end: + if (client_fd1 >= 0) + close(client_fd1); + if (client_fd2 >= 0) + close(client_fd2); + if (server_fd1 >= 0) + close(server_fd1); + if (server_fd2 >= 0) + close(server_fd2); + close(listen_fd); +} + +/* Test sockmap rejection of MPTCP sockets - both server and client sides. */ +static void test_sockmap_reject_mptcp(struct mptcp_sockmap *skel) +{ + int listen_fd = -1, server_fd = -1, client_fd1 = -1; + int err, zero = 0; + + /* start server with MPTCP enabled */ + listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0); + if (!ASSERT_OK_FD(listen_fd, "start_mptcp_server")) + return; + + skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd)); + skel->bss->sk_index = 0; + /* create client with MPTCP enabled */ + client_fd1 = connect_to_fd(listen_fd, 0); + if (!ASSERT_OK_FD(client_fd1, "connect_to_fd client_fd1")) + goto end; + + /* bpf_sock_map_update() called from sockops should reject MPTCP sk */ + if (!ASSERT_EQ(skel->bss->helper_ret, -EOPNOTSUPP, "should reject")) + goto end; + + server_fd = xaccept_nonblock(listen_fd, NULL, NULL); + err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map), + &zero, &server_fd, BPF_NOEXIST); + if (!ASSERT_EQ(err, -EOPNOTSUPP, "server should be disallowed")) + goto end; + + /* MPTCP client should also be disallowed */ + err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map), + &zero, &client_fd1, BPF_NOEXIST); + if (!ASSERT_EQ(err, -EOPNOTSUPP, "client should be disallowed")) + goto end; +end: + if (client_fd1 >= 0) + close(client_fd1); + if (server_fd >= 0) + close(server_fd); + close(listen_fd); +} + +static void test_mptcp_sockmap(void) +{ + struct mptcp_sockmap *skel; + struct netns_obj *netns; + int cgroup_fd, err; + + cgroup_fd = test__join_cgroup("/mptcp_sockmap"); + if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup: mptcp_sockmap")) + return; + + skel = mptcp_sockmap__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_load: mptcp_sockmap")) + goto close_cgroup; + + skel->links.mptcp_sockmap_inject = + bpf_program__attach_cgroup(skel->progs.mptcp_sockmap_inject, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.mptcp_sockmap_inject, "attach sockmap")) + goto skel_destroy; + + err = bpf_prog_attach(bpf_program__fd(skel->progs.mptcp_sockmap_redirect), + bpf_map__fd(skel->maps.sock_map), + BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach stream verdict")) + goto skel_destroy; + + netns = netns_new(NS_TEST, true); + if (!ASSERT_OK_PTR(netns, "netns_new: mptcp_sockmap")) + goto skel_destroy; + + if (endpoint_init("subflow") < 0) + goto close_netns; + + test_sockmap_with_mptcp_fallback(skel); + test_sockmap_reject_mptcp(skel); + +close_netns: + netns_free(netns); +skel_destroy: + mptcp_sockmap__destroy(skel); +close_cgroup: + close(cgroup_fd); +} + void test_mptcp(void) { if (test__start_subtest("base")) @@ -444,4 +583,6 @@ void test_mptcp(void) test_mptcpify(); if (test__start_subtest("subflow")) test_subflow(); + if (test__start_subtest("sockmap")) + test_mptcp_sockmap(); } diff --git a/tools/testing/selftests/bpf/progs/mptcp_sockmap.c b/tools/testing/selftests/bpf/progs/mptcp_sockmap.c new file mode 100644 index 0000000000000..d4eef0cbadb9f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mptcp_sockmap.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bpf_tracing_net.h" + +char _license[] SEC("license") = "GPL"; + +int sk_index; +int redirect_idx; +int trace_port; +int helper_ret; +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 100); +} sock_map SEC(".maps"); + +SEC("sockops") +int mptcp_sockmap_inject(struct bpf_sock_ops *skops) +{ + struct bpf_sock *sk; + + /* only accept specified connection */ + if (skops->local_port != trace_port || + skops->op != BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) + return 1; + + sk = skops->sk; + if (!sk) + return 1; + + /* update sk handler */ + helper_ret = bpf_sock_map_update(skops, &sock_map, &sk_index, BPF_NOEXIST); + + return 1; +} + +SEC("sk_skb/stream_verdict") +int mptcp_sockmap_redirect(struct __sk_buff *skb) +{ + /* redirect skb to the sk under sock_map[redirect_idx] */ + return bpf_sk_redirect_map(skb, &sock_map, redirect_idx, 0); +} From d528c8b22aff7920f6717d42b96508fc00799222 Mon Sep 17 00:00:00 2001 From: Chuang Wang Date: Tue, 11 Nov 2025 14:43:24 +0800 Subject: [PATCH 803/867] ipv4: route: Prevent rt_bind_exception() from rebinding stale fnhe The sit driver's packet transmission path calls: sit_tunnel_xmit() -> update_or_create_fnhe(), which lead to fnhe_remove_oldest() being called to delete entries exceeding FNHE_RECLAIM_DEPTH+random. The race window is between fnhe_remove_oldest() selecting fnheX for deletion and the subsequent kfree_rcu(). During this time, the concurrent path's __mkroute_output() -> find_exception() can fetch the soon-to-be-deleted fnheX, and rt_bind_exception() then binds it with a new dst using a dst_hold(). When the original fnheX is freed via RCU, the dst reference remains permanently leaked. CPU 0 CPU 1 __mkroute_output() find_exception() [fnheX] update_or_create_fnhe() fnhe_remove_oldest() [fnheX] rt_bind_exception() [bind dst] RCU callback [fnheX freed, dst leak] This issue manifests as a device reference count leak and a warning in dmesg when unregistering the net device: unregister_netdevice: waiting for sitX to become free. Usage count = N Ido Schimmel provided the simple test validation method [1]. The fix clears 'oldest->fnhe_daddr' before calling fnhe_flush_routes(). Since rt_bind_exception() checks this field, setting it to zero prevents the stale fnhe from being reused and bound to a new dst just before it is freed. [1] ip netns add ns1 ip -n ns1 link set dev lo up ip -n ns1 address add 192.0.2.1/32 dev lo ip -n ns1 link add name dummy1 up type dummy ip -n ns1 route add 192.0.2.2/32 dev dummy1 ip -n ns1 link add name gretap1 up arp off type gretap \ local 192.0.2.1 remote 192.0.2.2 ip -n ns1 route add 198.51.0.0/16 dev gretap1 taskset -c 0 ip netns exec ns1 mausezahn gretap1 \ -A 198.51.100.1 -B 198.51.0.0/16 -t udp -p 1000 -c 0 -q & taskset -c 2 ip netns exec ns1 mausezahn gretap1 \ -A 198.51.100.1 -B 198.51.0.0/16 -t udp -p 1000 -c 0 -q & sleep 10 ip netns pids ns1 | xargs kill ip netns del ns1 Cc: stable@vger.kernel.org Fixes: 67d6d681e15b ("ipv4: make exception cache less predictible") Signed-off-by: Chuang Wang Signed-off-by: NipaLocal --- net/ipv4/route.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6d27d3610c1cd..b549d6a573073 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -607,6 +607,11 @@ static void fnhe_remove_oldest(struct fnhe_hash_bucket *hash) oldest_p = fnhe_p; } } + + /* Clear oldest->fnhe_daddr to prevent this fnhe from being + * rebound with new dsts in rt_bind_exception(). + */ + oldest->fnhe_daddr = 0; fnhe_flush_routes(oldest); *oldest_p = oldest->fnhe_next; kfree_rcu(oldest, rcu); From 17464bd626fda622b419a4ca2a783578f558fe7a Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 11 Nov 2025 08:11:47 +0000 Subject: [PATCH 804/867] net: stmmac: loongson1: use PHY_INTF_SEL_x Use PHY_INTF_SEL_x definitions for phy_intf_sel bitfield. Signed-off-by: Russell King (Oracle) Signed-off-by: NipaLocal --- drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c index 32b5d1492e2e9..09e2af1d778a9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c @@ -38,8 +38,8 @@ #define GMAC_SHUT BIT(6) #define PHY_INTF_SELI GENMASK(30, 28) -#define PHY_INTF_MII FIELD_PREP(PHY_INTF_SELI, 0) -#define PHY_INTF_RMII FIELD_PREP(PHY_INTF_SELI, 4) +#define PHY_INTF_MII FIELD_PREP(PHY_INTF_SELI, PHY_INTF_SEL_GMII_MII) +#define PHY_INTF_RMII FIELD_PREP(PHY_INTF_SELI, PHY_INTF_SEL_RMII) struct ls1x_dwmac { struct plat_stmmacenet_data *plat_dat; From 8953d4b7d4cc96ec795d32b5396f2a32bce65afd Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 11 Nov 2025 08:11:52 +0000 Subject: [PATCH 805/867] net: stmmac: loongson1: use PHY_INTF_SEL_x directly Use the PHY_INTF_SEL_xx values directly in ls1c_dwmac_syscon_init(), converting them to the PHY_INTF_SELI bitfield when calling regmap_update_bits(). Signed-off-by: Russell King (Oracle) Signed-off-by: NipaLocal --- drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c index 09e2af1d778a9..5f9f66fbc1917 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c @@ -38,8 +38,6 @@ #define GMAC_SHUT BIT(6) #define PHY_INTF_SELI GENMASK(30, 28) -#define PHY_INTF_MII FIELD_PREP(PHY_INTF_SELI, PHY_INTF_SEL_GMII_MII) -#define PHY_INTF_RMII FIELD_PREP(PHY_INTF_SELI, PHY_INTF_SEL_RMII) struct ls1x_dwmac { struct plat_stmmacenet_data *plat_dat; @@ -140,15 +138,14 @@ static int ls1c_dwmac_syscon_init(struct platform_device *pdev, void *priv) struct ls1x_dwmac *dwmac = priv; struct plat_stmmacenet_data *plat = dwmac->plat_dat; struct regmap *regmap = dwmac->regmap; + int phy_intf_sel; switch (plat->phy_interface) { case PHY_INTERFACE_MODE_MII: - regmap_update_bits(regmap, LS1X_SYSCON1, PHY_INTF_SELI, - PHY_INTF_MII); + phy_intf_sel = PHY_INTF_SEL_GMII_MII; break; case PHY_INTERFACE_MODE_RMII: - regmap_update_bits(regmap, LS1X_SYSCON1, PHY_INTF_SELI, - PHY_INTF_RMII); + phy_intf_sel = PHY_INTF_SEL_RMII; break; default: dev_err(&pdev->dev, "Unsupported PHY-mode %u\n", @@ -156,6 +153,8 @@ static int ls1c_dwmac_syscon_init(struct platform_device *pdev, void *priv) return -EOPNOTSUPP; } + regmap_update_bits(regmap, LS1X_SYSCON1, PHY_INTF_SELI, + FIELD_PREP(PHY_INTF_SELI, phy_intf_sel)); regmap_update_bits(regmap, LS1X_SYSCON0, GMAC0_SHUT, 0); return 0; From 2fbf6c1aad7c010370d82cc605a155f99a44a617 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 11 Nov 2025 08:11:57 +0000 Subject: [PATCH 806/867] net: stmmac: loongson1: use stmmac_get_phy_intf_sel() Use stmmac_get_phy_intf_sel() to decode the PHY interface mode to the phy_intf_sel value, validate the result and use that to set the control register to select the operating mode for the DWMAC core. Note that this will allow GMII as well as MII as the phy_intf_sel value is the same for both. Signed-off-by: Russell King (Oracle) Signed-off-by: NipaLocal --- drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c index 5f9f66fbc1917..894ee66f5c9bc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c @@ -140,14 +140,9 @@ static int ls1c_dwmac_syscon_init(struct platform_device *pdev, void *priv) struct regmap *regmap = dwmac->regmap; int phy_intf_sel; - switch (plat->phy_interface) { - case PHY_INTERFACE_MODE_MII: - phy_intf_sel = PHY_INTF_SEL_GMII_MII; - break; - case PHY_INTERFACE_MODE_RMII: - phy_intf_sel = PHY_INTF_SEL_RMII; - break; - default: + phy_intf_sel = stmmac_get_phy_intf_sel(plat->phy_interface); + if (phy_intf_sel != PHY_INTF_SEL_GMII_MII && + phy_intf_sel != PHY_INTF_SEL_RMII) { dev_err(&pdev->dev, "Unsupported PHY-mode %u\n", plat->phy_interface); return -EOPNOTSUPP; From 1e411a966a6d527511566ff8eaf11f62a9786349 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 11 Nov 2025 08:12:02 +0000 Subject: [PATCH 807/867] net: stmmac: mediatek: use PHY_INTF_SEL_x Use PHY_INTF_SEL_x definitions for the fields that correspond to the phy_intf_sel inputs to the dwmac core. Signed-off-by: Russell King (Oracle) Signed-off-by: NipaLocal --- .../ethernet/stmicro/stmmac/dwmac-mediatek.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index f1b36f0a401de..dcdf28418fecc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -17,9 +17,6 @@ /* Peri Configuration register for mt2712 */ #define PERI_ETH_PHY_INTF_SEL 0x418 -#define PHY_INTF_MII 0 -#define PHY_INTF_RGMII 1 -#define PHY_INTF_RMII 4 #define RMII_CLK_SRC_RXC BIT(4) #define RMII_CLK_SRC_INTERNAL BIT(5) @@ -118,16 +115,16 @@ static int mt2712_set_interface(struct mediatek_dwmac_plat_data *plat) /* select phy interface in top control domain */ switch (plat->phy_mode) { case PHY_INTERFACE_MODE_MII: - intf_val |= PHY_INTF_MII; + intf_val |= PHY_INTF_SEL_GMII_MII; break; case PHY_INTERFACE_MODE_RMII: - intf_val |= (PHY_INTF_RMII | rmii_rxc | rmii_clk_from_mac); + intf_val |= PHY_INTF_SEL_RMII | rmii_rxc | rmii_clk_from_mac; break; case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_TXID: case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_ID: - intf_val |= PHY_INTF_RGMII; + intf_val |= PHY_INTF_SEL_RGMII; break; default: dev_err(plat->dev, "phy interface not supported\n"); @@ -297,17 +294,18 @@ static int mt8195_set_interface(struct mediatek_dwmac_plat_data *plat) /* select phy interface in top control domain */ switch (plat->phy_mode) { case PHY_INTERFACE_MODE_MII: - intf_val |= FIELD_PREP(MT8195_ETH_INTF_SEL, PHY_INTF_MII); + intf_val |= FIELD_PREP(MT8195_ETH_INTF_SEL, + PHY_INTF_SEL_GMII_MII); break; case PHY_INTERFACE_MODE_RMII: - intf_val |= (rmii_rxc | rmii_clk_from_mac); - intf_val |= FIELD_PREP(MT8195_ETH_INTF_SEL, PHY_INTF_RMII); + intf_val |= rmii_rxc | rmii_clk_from_mac; + intf_val |= FIELD_PREP(MT8195_ETH_INTF_SEL, PHY_INTF_SEL_RMII); break; case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_TXID: case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_ID: - intf_val |= FIELD_PREP(MT8195_ETH_INTF_SEL, PHY_INTF_RGMII); + intf_val |= FIELD_PREP(MT8195_ETH_INTF_SEL, PHY_INTF_SEL_RGMII); break; default: dev_err(plat->dev, "phy interface not supported\n"); From 36eec55a29837eefe1cb88de68ad6a657c4319ce Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 11 Nov 2025 08:12:07 +0000 Subject: [PATCH 808/867] net: stmmac: mediatek: use stmmac_get_phy_intf_sel() Use stmmac_get_phy_intf_sel() to decode the PHY interface mode to the phy_intf_sel value, validate the result, and pass that into the implementation specific ->dwmac_set_phy_interface() method. Use this to configure the PHY interface selection field. Signed-off-by: Russell King (Oracle) Signed-off-by: NipaLocal --- .../ethernet/stmicro/stmmac/dwmac-mediatek.c | 43 +++++++++++-------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index dcdf28418fecc..0f32732efb753 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -85,7 +85,8 @@ struct mediatek_dwmac_plat_data { }; struct mediatek_dwmac_variant { - int (*dwmac_set_phy_interface)(struct mediatek_dwmac_plat_data *plat); + int (*dwmac_set_phy_interface)(struct mediatek_dwmac_plat_data *plat, + u8 phy_intf_sel); int (*dwmac_set_delay)(struct mediatek_dwmac_plat_data *plat); /* clock ids to be requested */ @@ -106,25 +107,25 @@ static const char * const mt8195_dwmac_clk_l[] = { "axi", "apb", "mac_cg", "mac_main", "ptp_ref" }; -static int mt2712_set_interface(struct mediatek_dwmac_plat_data *plat) +static int mt2712_set_interface(struct mediatek_dwmac_plat_data *plat, + u8 phy_intf_sel) { int rmii_clk_from_mac = plat->rmii_clk_from_mac ? RMII_CLK_SRC_INTERNAL : 0; int rmii_rxc = plat->rmii_rxc ? RMII_CLK_SRC_RXC : 0; - u32 intf_val = 0; + u32 intf_val; + + intf_val = phy_intf_sel; /* select phy interface in top control domain */ switch (plat->phy_mode) { - case PHY_INTERFACE_MODE_MII: - intf_val |= PHY_INTF_SEL_GMII_MII; - break; case PHY_INTERFACE_MODE_RMII: - intf_val |= PHY_INTF_SEL_RMII | rmii_rxc | rmii_clk_from_mac; + intf_val |= rmii_rxc | rmii_clk_from_mac; break; + case PHY_INTERFACE_MODE_MII: case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_TXID: case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_ID: - intf_val |= PHY_INTF_SEL_RGMII; break; default: dev_err(plat->dev, "phy interface not supported\n"); @@ -285,27 +286,25 @@ static const struct mediatek_dwmac_variant mt2712_gmac_variant = { .tx_delay_max = 17600, }; -static int mt8195_set_interface(struct mediatek_dwmac_plat_data *plat) +static int mt8195_set_interface(struct mediatek_dwmac_plat_data *plat, + u8 phy_intf_sel) { int rmii_clk_from_mac = plat->rmii_clk_from_mac ? MT8195_RMII_CLK_SRC_INTERNAL : 0; int rmii_rxc = plat->rmii_rxc ? MT8195_RMII_CLK_SRC_RXC : 0; - u32 intf_val = 0; + u32 intf_val; + + intf_val = FIELD_PREP(MT8195_ETH_INTF_SEL, phy_intf_sel); /* select phy interface in top control domain */ switch (plat->phy_mode) { - case PHY_INTERFACE_MODE_MII: - intf_val |= FIELD_PREP(MT8195_ETH_INTF_SEL, - PHY_INTF_SEL_GMII_MII); - break; case PHY_INTERFACE_MODE_RMII: intf_val |= rmii_rxc | rmii_clk_from_mac; - intf_val |= FIELD_PREP(MT8195_ETH_INTF_SEL, PHY_INTF_SEL_RMII); break; + case PHY_INTERFACE_MODE_MII: case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_TXID: case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_ID: - intf_val |= FIELD_PREP(MT8195_ETH_INTF_SEL, PHY_INTF_SEL_RGMII); break; default: dev_err(plat->dev, "phy interface not supported\n"); @@ -525,10 +524,18 @@ static int mediatek_dwmac_init(struct device *dev, void *priv) { struct mediatek_dwmac_plat_data *plat = priv; const struct mediatek_dwmac_variant *variant = plat->variant; - int ret; + int phy_intf_sel, ret; if (variant->dwmac_set_phy_interface) { - ret = variant->dwmac_set_phy_interface(plat); + phy_intf_sel = stmmac_get_phy_intf_sel(plat->phy_mode); + if (phy_intf_sel != PHY_INTF_SEL_GMII_MII && + phy_intf_sel != PHY_INTF_SEL_RGMII && + phy_intf_sel != PHY_INTF_SEL_RMII) { + dev_err(plat->dev, "phy interface not supported\n"); + return phy_intf_sel < 0 ? phy_intf_sel : -EINVAL; + } + + ret = variant->dwmac_set_phy_interface(plat, phy_intf_sel); if (ret) { dev_err(dev, "failed to set phy interface, err = %d\n", ret); return ret; From 5a4858442e549cd8b29f9780eac7772c5059d00d Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 11 Nov 2025 08:12:12 +0000 Subject: [PATCH 809/867] net: stmmac: mediatek: simplify set_interface() methods Use the phy_intf_sel field value when deciding what other options to apply for the configuration register. Note that this will allow GMII as well as MII as the phy_intf_sel value is the same for both. Signed-off-by: Russell King (Oracle) Signed-off-by: NipaLocal --- .../ethernet/stmicro/stmmac/dwmac-mediatek.c | 50 +++++-------------- 1 file changed, 12 insertions(+), 38 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index 0f32732efb753..1f2d7d19ca56e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -110,26 +110,13 @@ static const char * const mt8195_dwmac_clk_l[] = { static int mt2712_set_interface(struct mediatek_dwmac_plat_data *plat, u8 phy_intf_sel) { - int rmii_clk_from_mac = plat->rmii_clk_from_mac ? RMII_CLK_SRC_INTERNAL : 0; - int rmii_rxc = plat->rmii_rxc ? RMII_CLK_SRC_RXC : 0; - u32 intf_val; + u32 intf_val = phy_intf_sel; - intf_val = phy_intf_sel; - - /* select phy interface in top control domain */ - switch (plat->phy_mode) { - case PHY_INTERFACE_MODE_RMII: - intf_val |= rmii_rxc | rmii_clk_from_mac; - break; - case PHY_INTERFACE_MODE_MII: - case PHY_INTERFACE_MODE_RGMII: - case PHY_INTERFACE_MODE_RGMII_TXID: - case PHY_INTERFACE_MODE_RGMII_RXID: - case PHY_INTERFACE_MODE_RGMII_ID: - break; - default: - dev_err(plat->dev, "phy interface not supported\n"); - return -EINVAL; + if (phy_intf_sel == PHY_INTF_SEL_RMII) { + if (plat->rmii_clk_from_mac) + intf_val |= RMII_CLK_SRC_INTERNAL; + if (plat->rmii_rxc) + intf_val |= RMII_CLK_SRC_RXC; } regmap_write(plat->peri_regmap, PERI_ETH_PHY_INTF_SEL, intf_val); @@ -289,26 +276,13 @@ static const struct mediatek_dwmac_variant mt2712_gmac_variant = { static int mt8195_set_interface(struct mediatek_dwmac_plat_data *plat, u8 phy_intf_sel) { - int rmii_clk_from_mac = plat->rmii_clk_from_mac ? MT8195_RMII_CLK_SRC_INTERNAL : 0; - int rmii_rxc = plat->rmii_rxc ? MT8195_RMII_CLK_SRC_RXC : 0; - u32 intf_val; + u32 intf_val = FIELD_PREP(MT8195_ETH_INTF_SEL, phy_intf_sel); - intf_val = FIELD_PREP(MT8195_ETH_INTF_SEL, phy_intf_sel); - - /* select phy interface in top control domain */ - switch (plat->phy_mode) { - case PHY_INTERFACE_MODE_RMII: - intf_val |= rmii_rxc | rmii_clk_from_mac; - break; - case PHY_INTERFACE_MODE_MII: - case PHY_INTERFACE_MODE_RGMII: - case PHY_INTERFACE_MODE_RGMII_TXID: - case PHY_INTERFACE_MODE_RGMII_RXID: - case PHY_INTERFACE_MODE_RGMII_ID: - break; - default: - dev_err(plat->dev, "phy interface not supported\n"); - return -EINVAL; + if (phy_intf_sel == PHY_INTF_SEL_RMII) { + if (plat->rmii_clk_from_mac) + intf_val |= MT8195_RMII_CLK_SRC_INTERNAL; + if (plat->rmii_rxc) + intf_val |= MT8195_RMII_CLK_SRC_RXC; } /* MT8195 only support external PHY */ From 7752c2267ba769dbc7fce882cd3c3949be237d78 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 11 Nov 2025 08:12:18 +0000 Subject: [PATCH 810/867] net: stmmac: starfive: use PHY_INTF_SEL_x to select PHY interface Use the common dwmac definitions for the PHY interface selection field. Signed-off-by: Russell King (Oracle) Signed-off-by: NipaLocal --- drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c index 6938dd2a79b77..1ef72576c6f1f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c @@ -15,8 +15,8 @@ #include "stmmac_platform.h" -#define STARFIVE_DWMAC_PHY_INFT_RGMII 0x1 -#define STARFIVE_DWMAC_PHY_INFT_RMII 0x4 +#define STARFIVE_DWMAC_PHY_INFT_RGMII PHY_INTF_SEL_RGMII +#define STARFIVE_DWMAC_PHY_INFT_RMII PHY_INTF_SEL_RMII #define STARFIVE_DWMAC_PHY_INFT_FIELD 0x7U #define JH7100_SYSMAIN_REGISTER49_DLYCHAIN 0xc8 From de601c9d10ad08371e418b647bb404ac886fd8a2 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 11 Nov 2025 08:12:23 +0000 Subject: [PATCH 811/867] net: stmmac: starfive: use stmmac_get_phy_intf_sel() Use stmmac_get_phy_intf_sel() to decode the PHY interface mode to the phy_intf_sel value, validate the result and use that to set the control register to select the operating mode for the DWMAC core. Signed-off-by: Russell King (Oracle) Signed-off-by: NipaLocal --- .../ethernet/stmicro/stmmac/dwmac-starfive.c | 24 +++++-------------- 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c index 1ef72576c6f1f..16b955a6d77ba 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c @@ -15,8 +15,6 @@ #include "stmmac_platform.h" -#define STARFIVE_DWMAC_PHY_INFT_RGMII PHY_INTF_SEL_RGMII -#define STARFIVE_DWMAC_PHY_INFT_RMII PHY_INTF_SEL_RMII #define STARFIVE_DWMAC_PHY_INFT_FIELD 0x7U #define JH7100_SYSMAIN_REGISTER49_DLYCHAIN 0xc8 @@ -35,25 +33,15 @@ static int starfive_dwmac_set_mode(struct plat_stmmacenet_data *plat_dat) struct starfive_dwmac *dwmac = plat_dat->bsp_priv; struct regmap *regmap; unsigned int args[2]; - unsigned int mode; + int phy_intf_sel; int err; - switch (plat_dat->phy_interface) { - case PHY_INTERFACE_MODE_RMII: - mode = STARFIVE_DWMAC_PHY_INFT_RMII; - break; - - case PHY_INTERFACE_MODE_RGMII: - case PHY_INTERFACE_MODE_RGMII_ID: - case PHY_INTERFACE_MODE_RGMII_RXID: - case PHY_INTERFACE_MODE_RGMII_TXID: - mode = STARFIVE_DWMAC_PHY_INFT_RGMII; - break; - - default: + phy_intf_sel = stmmac_get_phy_intf_sel(plat_dat->phy_interface); + if (phy_intf_sel != PHY_INTF_SEL_RGMII && + phy_intf_sel != PHY_INTF_SEL_RMII) { dev_err(dwmac->dev, "unsupported interface %s\n", phy_modes(plat_dat->phy_interface)); - return -EINVAL; + return phy_intf_sel < 0 ? phy_intf_sel : -EINVAL; } regmap = syscon_regmap_lookup_by_phandle_args(dwmac->dev->of_node, @@ -65,7 +53,7 @@ static int starfive_dwmac_set_mode(struct plat_stmmacenet_data *plat_dat) /* args[0]:offset args[1]: shift */ err = regmap_update_bits(regmap, args[0], STARFIVE_DWMAC_PHY_INFT_FIELD << args[1], - mode << args[1]); + phy_intf_sel << args[1]); if (err) return dev_err_probe(dwmac->dev, err, "error setting phy mode\n"); From 7144d4e924010a46e13eb00ad4db302a2e9038a0 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 11 Nov 2025 08:12:28 +0000 Subject: [PATCH 812/867] net: stmmac: stm32: use PHY_INTF_SEL_x to select PHY interface Convert dwmac-stm32 to use the PHY_INTF_SEL_x definitions. For stm32mp1, the original definitions used constant 0 (GMII, 0 << 21), BIT(21) (RGMII, 1 << 21) and BIT(23) (RMII, 4 << 21) to define these, but from the values it can be clearly seen that these are the PHY_INTF_SEL_x inputs to the dwmac. For stm32mp2, the original definitions cover a bitfield 6:4 in the SYSCFG Ethernet1 control register (according to documentation) and use the PHY_INTF_SEL_x values. Use the common dwmac definitions for the PHY interface selection field by adding the bitfield mask, and using FIELD_PREP() for the bitfield values. This removes this incorrect use of BIT(). Signed-off-by: Russell King (Oracle) Signed-off-by: NipaLocal --- .../net/ethernet/stmicro/stmmac/dwmac-stm32.c | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c index 6c179911ef3f1..1545772419d55 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c @@ -47,9 +47,13 @@ *------------------------------------------ */ #define SYSCFG_PMCR_ETH_SEL_MII BIT(20) -#define SYSCFG_PMCR_ETH_SEL_RGMII BIT(21) -#define SYSCFG_PMCR_ETH_SEL_RMII BIT(23) -#define SYSCFG_PMCR_ETH_SEL_GMII 0 +#define SYSCFG_PMCR_PHY_INTF_SEL_MASK GENMASK(23, 21) +#define SYSCFG_PMCR_ETH_SEL_RGMII \ + FIELD_PREP(SYSCFG_PMCR_PHY_INTF_SEL_MASK, PHY_INTF_SEL_RGMII) +#define SYSCFG_PMCR_ETH_SEL_RMII \ + FIELD_PREP(SYSCFG_PMCR_PHY_INTF_SEL_MASK, PHY_INTF_SEL_RMII) +#define SYSCFG_PMCR_ETH_SEL_GMII \ + FIELD_PREP(SYSCFG_PMCR_PHY_INTF_SEL_MASK, PHY_INTF_SEL_GMII_MII) #define SYSCFG_MCU_ETH_SEL_MII 0 #define SYSCFG_MCU_ETH_SEL_RMII 1 @@ -60,9 +64,13 @@ #define SYSCFG_ETHCR_ETH_CLK_SEL BIT(1) #define SYSCFG_ETHCR_ETH_REF_CLK_SEL BIT(0) -#define SYSCFG_ETHCR_ETH_SEL_MII 0 -#define SYSCFG_ETHCR_ETH_SEL_RGMII BIT(4) -#define SYSCFG_ETHCR_ETH_SEL_RMII BIT(6) +#define SYSCFG_ETHCR_ETH_SEL_MASK GENMASK(6, 4) +#define SYSCFG_ETHCR_ETH_SEL_MII FIELD_PREP(SYSCFG_ETHCR_ETH_SEL_MASK, \ + PHY_INTF_SEL_GMII_MII) +#define SYSCFG_ETHCR_ETH_SEL_RGMII FIELD_PREP(SYSCFG_ETHCR_ETH_SEL_MASK, \ + PHY_INTF_SEL_RGMII) +#define SYSCFG_ETHCR_ETH_SEL_RMII FIELD_PREP(SYSCFG_ETHCR_ETH_SEL_MASK, \ + PHY_INTF_SEL_RMII) /* STM32MPx register definitions * From bf94985b3dbae9f64dfb472f7348a4a0b3b9f527 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 11 Nov 2025 08:12:33 +0000 Subject: [PATCH 813/867] net: stmmac: stm32: use PHY_INTF_SEL_x directly Rather than defining separate constants for each, use the PHY_INTF_SEL_x definitions in the switch()es configuring the control register, and use one FIELD_PREP() to convert phy_intf_sel to the register value. Signed-off-by: Russell King (Oracle) Signed-off-by: NipaLocal --- .../net/ethernet/stmicro/stmmac/dwmac-stm32.c | 32 ++++++++----------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c index 1545772419d55..18d26f096f5f7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c @@ -48,30 +48,17 @@ */ #define SYSCFG_PMCR_ETH_SEL_MII BIT(20) #define SYSCFG_PMCR_PHY_INTF_SEL_MASK GENMASK(23, 21) -#define SYSCFG_PMCR_ETH_SEL_RGMII \ - FIELD_PREP(SYSCFG_PMCR_PHY_INTF_SEL_MASK, PHY_INTF_SEL_RGMII) -#define SYSCFG_PMCR_ETH_SEL_RMII \ - FIELD_PREP(SYSCFG_PMCR_PHY_INTF_SEL_MASK, PHY_INTF_SEL_RMII) -#define SYSCFG_PMCR_ETH_SEL_GMII \ - FIELD_PREP(SYSCFG_PMCR_PHY_INTF_SEL_MASK, PHY_INTF_SEL_GMII_MII) #define SYSCFG_MCU_ETH_SEL_MII 0 #define SYSCFG_MCU_ETH_SEL_RMII 1 /* STM32MP2 register definitions */ #define SYSCFG_MP2_ETH_MASK GENMASK(31, 0) +#define SYSCFG_ETHCR_ETH_SEL_MASK GENMASK(6, 4) #define SYSCFG_ETHCR_ETH_PTP_CLK_SEL BIT(2) #define SYSCFG_ETHCR_ETH_CLK_SEL BIT(1) #define SYSCFG_ETHCR_ETH_REF_CLK_SEL BIT(0) -#define SYSCFG_ETHCR_ETH_SEL_MASK GENMASK(6, 4) -#define SYSCFG_ETHCR_ETH_SEL_MII FIELD_PREP(SYSCFG_ETHCR_ETH_SEL_MASK, \ - PHY_INTF_SEL_GMII_MII) -#define SYSCFG_ETHCR_ETH_SEL_RGMII FIELD_PREP(SYSCFG_ETHCR_ETH_SEL_MASK, \ - PHY_INTF_SEL_RGMII) -#define SYSCFG_ETHCR_ETH_SEL_RMII FIELD_PREP(SYSCFG_ETHCR_ETH_SEL_MASK, \ - PHY_INTF_SEL_RMII) - /* STM32MPx register definitions * * Below table summarizes the clock requirement and clock sources for @@ -244,10 +231,12 @@ static int stm32mp1_configure_pmcr(struct plat_stmmacenet_data *plat_dat) { struct stm32_dwmac *dwmac = plat_dat->bsp_priv; u32 reg = dwmac->mode_reg; + u8 phy_intf_sel; int val = 0; switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_MII: + phy_intf_sel = PHY_INTF_SEL_GMII_MII; /* * STM32MP15xx supports both MII and GMII, STM32MP13xx MII only. * SYSCFG_PMCSETR ETH_SELMII is present only on STM32MP15xx and @@ -258,12 +247,12 @@ static int stm32mp1_configure_pmcr(struct plat_stmmacenet_data *plat_dat) val |= SYSCFG_PMCR_ETH_SEL_MII; break; case PHY_INTERFACE_MODE_GMII: - val = SYSCFG_PMCR_ETH_SEL_GMII; + phy_intf_sel = PHY_INTF_SEL_GMII_MII; if (dwmac->enable_eth_ck) val |= SYSCFG_PMCR_ETH_CLK_SEL; break; case PHY_INTERFACE_MODE_RMII: - val = SYSCFG_PMCR_ETH_SEL_RMII; + phy_intf_sel = PHY_INTF_SEL_RMII; if (dwmac->enable_eth_ck) val |= SYSCFG_PMCR_ETH_REF_CLK_SEL; break; @@ -271,7 +260,7 @@ static int stm32mp1_configure_pmcr(struct plat_stmmacenet_data *plat_dat) case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_TXID: - val = SYSCFG_PMCR_ETH_SEL_RGMII; + phy_intf_sel = PHY_INTF_SEL_RGMII; if (dwmac->enable_eth_ck) val |= SYSCFG_PMCR_ETH_CLK_SEL; break; @@ -284,6 +273,8 @@ static int stm32mp1_configure_pmcr(struct plat_stmmacenet_data *plat_dat) dev_dbg(dwmac->dev, "Mode %s", phy_modes(plat_dat->phy_interface)); + val |= FIELD_PREP(SYSCFG_PMCR_PHY_INTF_SEL_MASK, phy_intf_sel); + /* Shift value at correct ethernet MAC offset in SYSCFG_PMCSETR */ val <<= ffs(dwmac->mode_mask) - ffs(SYSCFG_MP1_ETH_MASK); @@ -299,6 +290,7 @@ static int stm32mp1_configure_pmcr(struct plat_stmmacenet_data *plat_dat) static int stm32mp2_configure_syscfg(struct plat_stmmacenet_data *plat_dat) { struct stm32_dwmac *dwmac = plat_dat->bsp_priv; + u8 phy_intf_sel = PHY_INTF_SEL_GMII_MII; u32 reg = dwmac->mode_reg; int val = 0; @@ -307,7 +299,7 @@ static int stm32mp2_configure_syscfg(struct plat_stmmacenet_data *plat_dat) /* ETH_REF_CLK_SEL bit in SYSCFG register is not applicable in MII mode */ break; case PHY_INTERFACE_MODE_RMII: - val = SYSCFG_ETHCR_ETH_SEL_RMII; + phy_intf_sel = PHY_INTF_SEL_RMII; if (dwmac->enable_eth_ck) { /* Internal clock ETH_CLK of 50MHz from RCC is used */ val |= SYSCFG_ETHCR_ETH_REF_CLK_SEL; @@ -317,7 +309,7 @@ static int stm32mp2_configure_syscfg(struct plat_stmmacenet_data *plat_dat) case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_TXID: - val = SYSCFG_ETHCR_ETH_SEL_RGMII; + phy_intf_sel = PHY_INTF_SEL_RGMII; fallthrough; case PHY_INTERFACE_MODE_GMII: if (dwmac->enable_eth_ck) { @@ -334,6 +326,8 @@ static int stm32mp2_configure_syscfg(struct plat_stmmacenet_data *plat_dat) dev_dbg(dwmac->dev, "Mode %s", phy_modes(plat_dat->phy_interface)); + val |= FIELD_PREP(SYSCFG_ETHCR_ETH_SEL_MASK, phy_intf_sel); + /* Select PTP (IEEE1588) clock selection from RCC (ck_ker_ethxptp) */ val |= SYSCFG_ETHCR_ETH_PTP_CLK_SEL; From 61aeead1cd24cd1f8aa0110329b069ff8081721b Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 11 Nov 2025 08:12:38 +0000 Subject: [PATCH 814/867] net: stmmac: stm32: use stmmac_get_phy_intf_sel() Use stmmac_get_phy_intf_sel() to decode the PHY interface mode to the phy_intf_sel value. As both configure functions would end up with the same code, call this from stm32mp1_set_mode(), validate the result and pass the resulting value into the stm32 configure function. Use this value to set the operating mode for the DWMAC core. Signed-off-by: Russell King (Oracle) Signed-off-by: NipaLocal --- .../net/ethernet/stmicro/stmmac/dwmac-stm32.c | 42 ++++++++++--------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c index 18d26f096f5f7..e1b260ed4790b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c @@ -227,16 +227,17 @@ static int stm32mp1_validate_ethck_rate(struct plat_stmmacenet_data *plat_dat) return -EINVAL; } -static int stm32mp1_configure_pmcr(struct plat_stmmacenet_data *plat_dat) +static int stm32mp1_configure_pmcr(struct plat_stmmacenet_data *plat_dat, + u8 phy_intf_sel) { struct stm32_dwmac *dwmac = plat_dat->bsp_priv; u32 reg = dwmac->mode_reg; - u8 phy_intf_sel; - int val = 0; + int val; + + val = FIELD_PREP(SYSCFG_PMCR_PHY_INTF_SEL_MASK, phy_intf_sel); switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_MII: - phy_intf_sel = PHY_INTF_SEL_GMII_MII; /* * STM32MP15xx supports both MII and GMII, STM32MP13xx MII only. * SYSCFG_PMCSETR ETH_SELMII is present only on STM32MP15xx and @@ -247,12 +248,10 @@ static int stm32mp1_configure_pmcr(struct plat_stmmacenet_data *plat_dat) val |= SYSCFG_PMCR_ETH_SEL_MII; break; case PHY_INTERFACE_MODE_GMII: - phy_intf_sel = PHY_INTF_SEL_GMII_MII; if (dwmac->enable_eth_ck) val |= SYSCFG_PMCR_ETH_CLK_SEL; break; case PHY_INTERFACE_MODE_RMII: - phy_intf_sel = PHY_INTF_SEL_RMII; if (dwmac->enable_eth_ck) val |= SYSCFG_PMCR_ETH_REF_CLK_SEL; break; @@ -260,7 +259,6 @@ static int stm32mp1_configure_pmcr(struct plat_stmmacenet_data *plat_dat) case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_TXID: - phy_intf_sel = PHY_INTF_SEL_RGMII; if (dwmac->enable_eth_ck) val |= SYSCFG_PMCR_ETH_CLK_SEL; break; @@ -273,8 +271,6 @@ static int stm32mp1_configure_pmcr(struct plat_stmmacenet_data *plat_dat) dev_dbg(dwmac->dev, "Mode %s", phy_modes(plat_dat->phy_interface)); - val |= FIELD_PREP(SYSCFG_PMCR_PHY_INTF_SEL_MASK, phy_intf_sel); - /* Shift value at correct ethernet MAC offset in SYSCFG_PMCSETR */ val <<= ffs(dwmac->mode_mask) - ffs(SYSCFG_MP1_ETH_MASK); @@ -287,19 +283,20 @@ static int stm32mp1_configure_pmcr(struct plat_stmmacenet_data *plat_dat) dwmac->mode_mask, val); } -static int stm32mp2_configure_syscfg(struct plat_stmmacenet_data *plat_dat) +static int stm32mp2_configure_syscfg(struct plat_stmmacenet_data *plat_dat, + u8 phy_intf_sel) { struct stm32_dwmac *dwmac = plat_dat->bsp_priv; - u8 phy_intf_sel = PHY_INTF_SEL_GMII_MII; u32 reg = dwmac->mode_reg; - int val = 0; + int val; + + val = FIELD_PREP(SYSCFG_ETHCR_ETH_SEL_MASK, phy_intf_sel); switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_MII: /* ETH_REF_CLK_SEL bit in SYSCFG register is not applicable in MII mode */ break; case PHY_INTERFACE_MODE_RMII: - phy_intf_sel = PHY_INTF_SEL_RMII; if (dwmac->enable_eth_ck) { /* Internal clock ETH_CLK of 50MHz from RCC is used */ val |= SYSCFG_ETHCR_ETH_REF_CLK_SEL; @@ -309,8 +306,6 @@ static int stm32mp2_configure_syscfg(struct plat_stmmacenet_data *plat_dat) case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_TXID: - phy_intf_sel = PHY_INTF_SEL_RGMII; - fallthrough; case PHY_INTERFACE_MODE_GMII: if (dwmac->enable_eth_ck) { /* Internal clock ETH_CLK of 125MHz from RCC is used */ @@ -326,8 +321,6 @@ static int stm32mp2_configure_syscfg(struct plat_stmmacenet_data *plat_dat) dev_dbg(dwmac->dev, "Mode %s", phy_modes(plat_dat->phy_interface)); - val |= FIELD_PREP(SYSCFG_ETHCR_ETH_SEL_MASK, phy_intf_sel); - /* Select PTP (IEEE1588) clock selection from RCC (ck_ker_ethxptp) */ val |= SYSCFG_ETHCR_ETH_PTP_CLK_SEL; @@ -339,7 +332,7 @@ static int stm32mp2_configure_syscfg(struct plat_stmmacenet_data *plat_dat) static int stm32mp1_set_mode(struct plat_stmmacenet_data *plat_dat) { struct stm32_dwmac *dwmac = plat_dat->bsp_priv; - int ret; + int phy_intf_sel, ret; ret = stm32mp1_select_ethck_external(plat_dat); if (ret) @@ -349,10 +342,19 @@ static int stm32mp1_set_mode(struct plat_stmmacenet_data *plat_dat) if (ret) return ret; + phy_intf_sel = stmmac_get_phy_intf_sel(plat_dat->phy_interface); + if (phy_intf_sel != PHY_INTF_SEL_GMII_MII && + phy_intf_sel != PHY_INTF_SEL_RGMII && + phy_intf_sel != PHY_INTF_SEL_RMII) { + dev_err(dwmac->dev, "Mode %s not supported\n", + phy_modes(plat_dat->phy_interface)); + return phy_intf_sel < 0 ? phy_intf_sel : -EINVAL; + } + if (!dwmac->ops->is_mp2) - return stm32mp1_configure_pmcr(plat_dat); + return stm32mp1_configure_pmcr(plat_dat, phy_intf_sel); else - return stm32mp2_configure_syscfg(plat_dat); + return stm32mp2_configure_syscfg(plat_dat, phy_intf_sel); } static int stm32mcu_set_mode(struct plat_stmmacenet_data *plat_dat) From 9b056ba801c0d6ad867a1643305df3bbbcc59c77 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 11 Nov 2025 08:12:43 +0000 Subject: [PATCH 815/867] net: stmmac: visconti: use PHY_INTF_SEL_x to select PHY interface Convert dwmac-visconti to use the PHY_INTF_SEL_x definitions. The original definitions used constant 0, BIT(0) (==1) and BIT(2) (==4) to define these, but the values of the bits corresponds with the PHY_INTF_SEL_x values, so it is highly likely that these are not individual bits, but the PHY_INTF_SEL_x bitfield. This removes this incorrect use of BIT(). Signed-off-by: Russell King (Oracle) Signed-off-by: NipaLocal --- drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c index bd65d42390543..7b6b048e1be05 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c @@ -42,9 +42,9 @@ #define ETHER_CLK_SEL_RX_TX_CLK_EN (ETHER_CLK_SEL_RX_CLK_EN | ETHER_CLK_SEL_TX_CLK_EN) -#define ETHER_CONFIG_INTF_MII 0 -#define ETHER_CONFIG_INTF_RGMII BIT(0) -#define ETHER_CONFIG_INTF_RMII BIT(2) +#define ETHER_CONFIG_INTF_MII PHY_INTF_SEL_GMII_MII +#define ETHER_CONFIG_INTF_RGMII PHY_INTF_SEL_RGMII +#define ETHER_CONFIG_INTF_RMII PHY_INTF_SEL_RMII struct visconti_eth { void __iomem *reg; From e4022134e755daa2450cce634d4a9141b3f7b6ff Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 11 Nov 2025 08:12:48 +0000 Subject: [PATCH 816/867] net: stmmac: visconti: use stmmac_get_phy_intf_sel() Use stmmac_get_phy_intf_sel() to decode the PHY interface mode to the phy_intf_sel value, validate the result and use that to set the control register to select the operating mode for the DWMAC core. Note that this will allow GMII as well as MII as the phy_intf_sel value is the same for both. Signed-off-by: Russell King (Oracle) Signed-off-by: NipaLocal --- .../ethernet/stmicro/stmmac/dwmac-visconti.c | 26 +++++-------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c index 7b6b048e1be05..9497b13a57539 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c @@ -42,10 +42,6 @@ #define ETHER_CLK_SEL_RX_TX_CLK_EN (ETHER_CLK_SEL_RX_CLK_EN | ETHER_CLK_SEL_TX_CLK_EN) -#define ETHER_CONFIG_INTF_MII PHY_INTF_SEL_GMII_MII -#define ETHER_CONFIG_INTF_RGMII PHY_INTF_SEL_RGMII -#define ETHER_CONFIG_INTF_RMII PHY_INTF_SEL_RMII - struct visconti_eth { void __iomem *reg; struct clk *phy_ref_clk; @@ -150,22 +146,12 @@ static int visconti_eth_init_hw(struct platform_device *pdev, struct plat_stmmac { struct visconti_eth *dwmac = plat_dat->bsp_priv; unsigned int clk_sel_val; - u32 phy_intf_sel; - - switch (plat_dat->phy_interface) { - case PHY_INTERFACE_MODE_RGMII: - case PHY_INTERFACE_MODE_RGMII_ID: - case PHY_INTERFACE_MODE_RGMII_RXID: - case PHY_INTERFACE_MODE_RGMII_TXID: - phy_intf_sel = ETHER_CONFIG_INTF_RGMII; - break; - case PHY_INTERFACE_MODE_MII: - phy_intf_sel = ETHER_CONFIG_INTF_MII; - break; - case PHY_INTERFACE_MODE_RMII: - phy_intf_sel = ETHER_CONFIG_INTF_RMII; - break; - default: + int phy_intf_sel; + + phy_intf_sel = stmmac_get_phy_intf_sel(plat_dat->phy_interface); + if (phy_intf_sel != PHY_INTF_SEL_GMII_MII && + phy_intf_sel != PHY_INTF_SEL_RGMII && + phy_intf_sel != PHY_INTF_SEL_RMII) { dev_err(&pdev->dev, "Unsupported phy-mode (%d)\n", plat_dat->phy_interface); return -EOPNOTSUPP; } From ee467b25e4674e5c505fad4277baf2b47509032e Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Tue, 11 Nov 2025 17:08:28 +0800 Subject: [PATCH 817/867] virtio-net: fix incorrect flags recording in big mode The purpose of commit 703eec1b2422 ("virtio_net: fixing XDP for fully checksummed packets handling") is to record the flags in advance, as their value may be overwritten in the XDP case. However, the flags recorded under big mode are incorrect, because in big mode, the passed buf does not point to the rx buffer, but rather to the page of the submitted buffer. This commit fixes this issue. For the small mode, the commit c11a49d58ad2 ("virtio_net: Fix mismatched buf address when unmapping for small packets") fixed it. Tested-by: Alyssa Ross Fixes: 703eec1b2422 ("virtio_net: fixing XDP for fully checksummed packets handling") Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: NipaLocal --- drivers/net/virtio_net.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 9220eea24875f..cfa006b886887 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2631,22 +2631,28 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, return; } - /* 1. Save the flags early, as the XDP program might overwrite them. + /* About the flags below: + * 1. Save the flags early, as the XDP program might overwrite them. * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID * stay valid after XDP processing. * 2. XDP doesn't work with partially checksummed packets (refer to * virtnet_xdp_set()), so packets marked as * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing. */ - flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; - if (vi->mergeable_rx_bufs) + if (vi->mergeable_rx_bufs) { + flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); - else if (vi->big_packets) + } else if (vi->big_packets) { + void *p = page_address((struct page *)buf); + + flags = ((struct virtio_net_common_hdr *)p)->hdr.flags; skb = receive_big(dev, vi, rq, buf, len, stats); - else + } else { + flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); + } if (unlikely(!skb)) return; From 6a46b482df6dc5cb36ef1c608856d7ce59ca3b92 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Tue, 11 Nov 2025 09:10:45 +0000 Subject: [PATCH 818/867] net: phy: mscc: Simplify LED mode update using phy_modify() The vsc85xx_led_cntl_set() function currently performs a manual read-modify-write sequence protected by the PHY lock to update the LED mode register (MSCC_PHY_LED_MODE_SEL). Replace this sequence with a call to phy_modify(), which already handles read-modify-write operations with proper locking inside the PHY core. Signed-off-by: Lad Prabhakar Reviewed-by: Andrew Lunn Signed-off-by: NipaLocal --- drivers/net/phy/mscc/mscc_main.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index 8678ebf89cca5..032050ec0bc90 100644 --- a/drivers/net/phy/mscc/mscc_main.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -177,17 +177,10 @@ static int vsc85xx_led_cntl_set(struct phy_device *phydev, u8 led_num, u8 mode) { - int rc; - u16 reg_val; - - mutex_lock(&phydev->lock); - reg_val = phy_read(phydev, MSCC_PHY_LED_MODE_SEL); - reg_val &= ~LED_MODE_SEL_MASK(led_num); - reg_val |= LED_MODE_SEL(led_num, (u16)mode); - rc = phy_write(phydev, MSCC_PHY_LED_MODE_SEL, reg_val); - mutex_unlock(&phydev->lock); + u16 mask = LED_MODE_SEL_MASK(led_num); + u16 val = LED_MODE_SEL(led_num, mode); - return rc; + return phy_modify(phydev, MSCC_PHY_LED_MODE_SEL, mask, val); } static int vsc85xx_mdix_get(struct phy_device *phydev, u8 *mdix) From a1a8bbacaeb7c420b9cb59cde40b171e8832e96d Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Tue, 11 Nov 2025 09:10:46 +0000 Subject: [PATCH 819/867] net: phy: mscc: Consolidate probe functions into a common helper Unify the probe implementations of the VSC85xx PHY family into a single vsc85xx_probe_common() helper. The existing probe functions for the vsc85xx, vsc8514, vsc8574, and vsc8584 variants contained almost identical initialization logic, differing only in configuration parameters such as the number of LEDs, supported LED modes, hardware statistics, and PTP support. Introduce a vsc85xx_probe_config structure to describe the per-variant parameters, and move all common setup code into the shared helper. Each variant's probe function now defines a constant configuration instance and calls vsc85xx_probe_common(). Also mark the default LED mode array parameter as const to match its usage. Signed-off-by: Lad Prabhakar Reviewed-by: Andrew Lunn Signed-off-by: NipaLocal --- drivers/net/phy/mscc/mscc_main.c | 201 ++++++++++++++++--------------- 1 file changed, 106 insertions(+), 95 deletions(-) diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index 032050ec0bc90..0ae0199d28bbc 100644 --- a/drivers/net/phy/mscc/mscc_main.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -22,6 +22,24 @@ #include "mscc_serdes.h" #include "mscc.h" +struct vsc85xx_probe_config { + const struct vsc85xx_hw_stat *hw_stats; + u8 nleds; + u16 supp_led_modes; + size_t nstats; + bool use_package; + size_t shared_size; + bool has_ptp; + bool check_rate_magic; +}; + +static const u32 vsc85xx_default_led_modes_4[] = { + VSC8531_LINK_1000_ACTIVITY, + VSC8531_LINK_100_ACTIVITY, + VSC8531_LINK_ACTIVITY, + VSC8531_DUPLEX_COLLISION +}; + static const struct vsc85xx_hw_stat vsc85xx_hw_stats[] = { { .string = "phy_receive_errors", @@ -436,7 +454,7 @@ static int vsc85xx_dt_led_mode_get(struct phy_device *phydev, #endif /* CONFIG_OF_MDIO */ static int vsc85xx_dt_led_modes_get(struct phy_device *phydev, - u32 *default_mode) + const u32 *default_mode) { struct vsc8531_private *priv = phydev->priv; char led_dt_prop[28]; @@ -2211,12 +2229,12 @@ static int vsc85xx_config_inband(struct phy_device *phydev, unsigned int modes) reg_val); } -static int vsc8514_probe(struct phy_device *phydev) +static int vsc85xx_probe_common(struct phy_device *phydev, + const struct vsc85xx_probe_config *cfg, + const u32 *default_led_mode) { struct vsc8531_private *vsc8531; - u32 default_mode[4] = {VSC8531_LINK_1000_ACTIVITY, - VSC8531_LINK_100_ACTIVITY, VSC8531_LINK_ACTIVITY, - VSC8531_DUPLEX_COLLISION}; + int ret; vsc8531 = devm_kzalloc(&phydev->mdio.dev, sizeof(*vsc8531), GFP_KERNEL); if (!vsc8531) @@ -2224,119 +2242,112 @@ static int vsc8514_probe(struct phy_device *phydev) phydev->priv = vsc8531; - vsc8584_get_base_addr(phydev); - devm_phy_package_join(&phydev->mdio.dev, phydev, - vsc8531->base_addr, 0); - - vsc8531->nleds = 4; - vsc8531->supp_led_modes = VSC85XX_SUPP_LED_MODES; - vsc8531->hw_stats = vsc85xx_hw_stats; - vsc8531->nstats = ARRAY_SIZE(vsc85xx_hw_stats); - vsc8531->stats = devm_kcalloc(&phydev->mdio.dev, vsc8531->nstats, - sizeof(u64), GFP_KERNEL); - if (!vsc8531->stats) - return -ENOMEM; - - return vsc85xx_dt_led_modes_get(phydev, default_mode); -} - -static int vsc8574_probe(struct phy_device *phydev) -{ - struct vsc8531_private *vsc8531; - u32 default_mode[4] = {VSC8531_LINK_1000_ACTIVITY, - VSC8531_LINK_100_ACTIVITY, VSC8531_LINK_ACTIVITY, - VSC8531_DUPLEX_COLLISION}; + /* Check rate magic if needed (only for non-package PHYs) */ + if (cfg->check_rate_magic) { + ret = vsc85xx_edge_rate_magic_get(phydev); + if (ret < 0) + return ret; - vsc8531 = devm_kzalloc(&phydev->mdio.dev, sizeof(*vsc8531), GFP_KERNEL); - if (!vsc8531) - return -ENOMEM; + vsc8531->rate_magic = ret; + } - phydev->priv = vsc8531; + /* Set up package if needed */ + if (cfg->use_package) { + vsc8584_get_base_addr(phydev); + devm_phy_package_join(&phydev->mdio.dev, phydev, + vsc8531->base_addr, cfg->shared_size); + } - vsc8584_get_base_addr(phydev); - devm_phy_package_join(&phydev->mdio.dev, phydev, - vsc8531->base_addr, 0); + /* Configure LED settings */ + vsc8531->nleds = cfg->nleds; + vsc8531->supp_led_modes = cfg->supp_led_modes; - vsc8531->nleds = 4; - vsc8531->supp_led_modes = VSC8584_SUPP_LED_MODES; - vsc8531->hw_stats = vsc8584_hw_stats; - vsc8531->nstats = ARRAY_SIZE(vsc8584_hw_stats); + /* Configure hardware stats */ + vsc8531->hw_stats = cfg->hw_stats; + vsc8531->nstats = cfg->nstats; vsc8531->stats = devm_kcalloc(&phydev->mdio.dev, vsc8531->nstats, sizeof(u64), GFP_KERNEL); if (!vsc8531->stats) return -ENOMEM; - return vsc85xx_dt_led_modes_get(phydev, default_mode); -} - -static int vsc8584_probe(struct phy_device *phydev) -{ - struct vsc8531_private *vsc8531; - u32 default_mode[4] = {VSC8531_LINK_1000_ACTIVITY, - VSC8531_LINK_100_ACTIVITY, VSC8531_LINK_ACTIVITY, - VSC8531_DUPLEX_COLLISION}; - int ret; - - vsc8531 = devm_kzalloc(&phydev->mdio.dev, sizeof(*vsc8531), GFP_KERNEL); - if (!vsc8531) - return -ENOMEM; - - phydev->priv = vsc8531; - - vsc8584_get_base_addr(phydev); - devm_phy_package_join(&phydev->mdio.dev, phydev, vsc8531->base_addr, - sizeof(struct vsc85xx_shared_private)); - - vsc8531->nleds = 4; - vsc8531->supp_led_modes = VSC8584_SUPP_LED_MODES; - vsc8531->hw_stats = vsc8584_hw_stats; - vsc8531->nstats = ARRAY_SIZE(vsc8584_hw_stats); - vsc8531->stats = devm_kcalloc(&phydev->mdio.dev, vsc8531->nstats, - sizeof(u64), GFP_KERNEL); - if (!vsc8531->stats) - return -ENOMEM; + /* PTP setup for VSC8584 */ + if (cfg->has_ptp) { + if (phy_package_probe_once(phydev)) { + ret = vsc8584_ptp_probe_once(phydev); + if (ret) + return ret; + } - if (phy_package_probe_once(phydev)) { - ret = vsc8584_ptp_probe_once(phydev); + ret = vsc8584_ptp_probe(phydev); if (ret) return ret; } - ret = vsc8584_ptp_probe(phydev); - if (ret) - return ret; + /* Parse LED modes from device tree */ + return vsc85xx_dt_led_modes_get(phydev, default_led_mode); +} - return vsc85xx_dt_led_modes_get(phydev, default_mode); +static int vsc8514_probe(struct phy_device *phydev) +{ + static const struct vsc85xx_probe_config vsc8514_cfg = { + .nleds = 4, + .supp_led_modes = VSC85XX_SUPP_LED_MODES, + .hw_stats = vsc85xx_hw_stats, + .nstats = ARRAY_SIZE(vsc85xx_hw_stats), + .use_package = true, + .shared_size = 0, + .has_ptp = false, + .check_rate_magic = false, + }; + + return vsc85xx_probe_common(phydev, &vsc8514_cfg, vsc85xx_default_led_modes_4); } -static int vsc85xx_probe(struct phy_device *phydev) +static int vsc8574_probe(struct phy_device *phydev) { - struct vsc8531_private *vsc8531; - int rate_magic; - u32 default_mode[2] = {VSC8531_LINK_1000_ACTIVITY, - VSC8531_LINK_100_ACTIVITY}; + static const struct vsc85xx_probe_config vsc8574_cfg = { + .nleds = 4, + .supp_led_modes = VSC8584_SUPP_LED_MODES, + .hw_stats = vsc8584_hw_stats, + .nstats = ARRAY_SIZE(vsc8584_hw_stats), + .use_package = true, + .shared_size = 0, + .has_ptp = false, + .check_rate_magic = false, + }; - rate_magic = vsc85xx_edge_rate_magic_get(phydev); - if (rate_magic < 0) - return rate_magic; + return vsc85xx_probe_common(phydev, &vsc8574_cfg, vsc85xx_default_led_modes_4); +} - vsc8531 = devm_kzalloc(&phydev->mdio.dev, sizeof(*vsc8531), GFP_KERNEL); - if (!vsc8531) - return -ENOMEM; +static int vsc8584_probe(struct phy_device *phydev) +{ + static const struct vsc85xx_probe_config vsc8584_cfg = { + .nleds = 4, + .supp_led_modes = VSC8584_SUPP_LED_MODES, + .hw_stats = vsc8584_hw_stats, + .nstats = ARRAY_SIZE(vsc8584_hw_stats), + .use_package = true, + .shared_size = sizeof(struct vsc85xx_shared_private), + .has_ptp = true, + .check_rate_magic = false, + }; - phydev->priv = vsc8531; + return vsc85xx_probe_common(phydev, &vsc8584_cfg, vsc85xx_default_led_modes_4); +} - vsc8531->rate_magic = rate_magic; - vsc8531->nleds = 2; - vsc8531->supp_led_modes = VSC85XX_SUPP_LED_MODES; - vsc8531->hw_stats = vsc85xx_hw_stats; - vsc8531->nstats = ARRAY_SIZE(vsc85xx_hw_stats); - vsc8531->stats = devm_kcalloc(&phydev->mdio.dev, vsc8531->nstats, - sizeof(u64), GFP_KERNEL); - if (!vsc8531->stats) - return -ENOMEM; +static int vsc85xx_probe(struct phy_device *phydev) +{ + static const struct vsc85xx_probe_config vsc85xx_cfg = { + .nleds = 2, + .supp_led_modes = VSC85XX_SUPP_LED_MODES, + .hw_stats = vsc85xx_hw_stats, + .nstats = ARRAY_SIZE(vsc85xx_hw_stats), + .use_package = false, + .has_ptp = false, + .check_rate_magic = true, + }; - return vsc85xx_dt_led_modes_get(phydev, default_mode); + return vsc85xx_probe_common(phydev, &vsc85xx_cfg, vsc85xx_default_led_modes_4); } static void vsc85xx_remove(struct phy_device *phydev) From 04a0559b571e2beba8f33c66a0629fe8b2d2b8bd Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Tue, 11 Nov 2025 09:10:47 +0000 Subject: [PATCH 820/867] net: phy: mscc: Add support for PHY LED control Add support for the PHY LED controller in the MSCC VSC85xx driver. The implementation provides LED brightness and hardware control through the LED subsystem and integrates with the standard 'netdev' trigger. Introduce new register definitions for the LED behavior register (MSCC_PHY_LED_BEHAVIOR = 30) and the LED combine disable bits, which control whether LEDs indicate link-only or combined link and activity status. Implement a helper, vsc8541_led_combine_disable_set(), to update these bits safely using phy_modify(). Add support for LED brightness control and hardware mode configuration. The new callbacks implement the standard LED class operations, allowing user control through sysfs. The brightness control maps to PHY LED force on/off modes. The hardware control get and set functions translate between the PHY-specific LED mode encodings and the LED subsystem TRIGGER_NETDEV_* rules. The combine feature is managed automatically based on the selected rules. When both RX and TX activity are disabled, the combine feature is turned off, causing LEDs to indicate link-only status. When either RX or TX activity is enabled, the combine feature remains active and LEDs indicate combined link and activity. Register the LED callbacks for all VSC85xx PHY variants so that the LED subsystem can manage their indicators consistently. Existing device tree LED configuration and default behavior are preserved. Signed-off-by: Lad Prabhakar Reviewed-by: Andrew Lunn Signed-off-by: NipaLocal --- drivers/net/phy/mscc/mscc.h | 4 + drivers/net/phy/mscc/mscc_main.c | 246 +++++++++++++++++++++++++++++++ 2 files changed, 250 insertions(+) diff --git a/drivers/net/phy/mscc/mscc.h b/drivers/net/phy/mscc/mscc.h index 2eef5956b9cc5..65c9d7bd93150 100644 --- a/drivers/net/phy/mscc/mscc.h +++ b/drivers/net/phy/mscc/mscc.h @@ -85,6 +85,10 @@ enum rgmii_clock_delay { #define LED_MODE_SEL_MASK(x) (GENMASK(3, 0) << LED_MODE_SEL_POS(x)) #define LED_MODE_SEL(x, mode) (((mode) << LED_MODE_SEL_POS(x)) & LED_MODE_SEL_MASK(x)) +#define MSCC_PHY_LED_BEHAVIOR 30 +#define LED_COMBINE_DIS_MASK(x) BIT(x) +#define LED_COMBINE_DIS(x, dis) (((dis) ? 1 : 0) << (x)) + #define MSCC_EXT_PAGE_CSR_CNTL_17 17 #define MSCC_EXT_PAGE_CSR_CNTL_18 18 diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index 0ae0199d28bbc..28d5588076084 100644 --- a/drivers/net/phy/mscc/mscc_main.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -201,6 +201,15 @@ static int vsc85xx_led_cntl_set(struct phy_device *phydev, return phy_modify(phydev, MSCC_PHY_LED_MODE_SEL, mask, val); } +static int vsc85xx_led_combine_disable_set(struct phy_device *phydev, + u8 led_num, bool combine_disable) +{ + u16 mask = LED_COMBINE_DIS_MASK(led_num); + u16 val = LED_COMBINE_DIS(led_num, combine_disable); + + return phy_modify(phydev, MSCC_PHY_LED_BEHAVIOR, mask, val); +} + static int vsc85xx_mdix_get(struct phy_device *phydev, u8 *mdix) { u16 reg_val; @@ -2234,6 +2243,7 @@ static int vsc85xx_probe_common(struct phy_device *phydev, const u32 *default_led_mode) { struct vsc8531_private *vsc8531; + struct device_node *np; int ret; vsc8531 = devm_kzalloc(&phydev->mdio.dev, sizeof(*vsc8531), GFP_KERNEL); @@ -2283,10 +2293,186 @@ static int vsc85xx_probe_common(struct phy_device *phydev, return ret; } + /* + * Check for LED configuration in device tree if available + * or fall back to default `vsc8531,led-x-mode` DT properties. + */ + np = of_get_child_by_name(phydev->mdio.dev.of_node, "leds"); + if (np) { + of_node_put(np); + + /* Force to defaults */ + for (unsigned int i = 0; i < vsc8531->nleds; i++) + vsc8531->leds_mode[i] = default_led_mode[i]; + + return 0; + } + /* Parse LED modes from device tree */ return vsc85xx_dt_led_modes_get(phydev, default_led_mode); } +static int vsc85xx_led_brightness_set(struct phy_device *phydev, + u8 index, enum led_brightness value) +{ + struct vsc8531_private *vsc8531 = phydev->priv; + + if (index >= vsc8531->nleds) + return -EINVAL; + + return vsc85xx_led_cntl_set(phydev, index, value == LED_OFF ? + VSC8531_FORCE_LED_OFF : VSC8531_FORCE_LED_ON); +} + +static int vsc85xx_led_hw_is_supported(struct phy_device *phydev, u8 index, + unsigned long rules) +{ + static const unsigned long supported = BIT(TRIGGER_NETDEV_LINK_1000) | + BIT(TRIGGER_NETDEV_LINK_100) | + BIT(TRIGGER_NETDEV_LINK_10) | + BIT(TRIGGER_NETDEV_LINK) | + BIT(TRIGGER_NETDEV_RX) | + BIT(TRIGGER_NETDEV_TX); + struct vsc8531_private *vsc8531 = phydev->priv; + + if (index >= vsc8531->nleds) + return -EINVAL; + + if (rules & ~supported) + return -EOPNOTSUPP; + + return 0; +} + +static int vsc85xx_led_hw_control_get(struct phy_device *phydev, u8 index, + unsigned long *rules) +{ + struct vsc8531_private *vsc8531 = phydev->priv; + u8 mode, behavior; + int rc; + + if (index >= vsc8531->nleds) + return -EINVAL; + + rc = phy_read(phydev, MSCC_PHY_LED_MODE_SEL); + if (rc < 0) + return rc; + mode = (rc & LED_MODE_SEL_MASK(index)) >> LED_MODE_SEL_POS(index); + + rc = phy_read(phydev, MSCC_PHY_LED_BEHAVIOR); + if (rc < 0) + return rc; + behavior = (rc & LED_COMBINE_DIS_MASK(index)) >> index; + + switch (mode) { + case VSC8531_LINK_ACTIVITY: + case VSC8531_ACTIVITY: + *rules = BIT(TRIGGER_NETDEV_LINK); + break; + + case VSC8531_LINK_1000_ACTIVITY: + *rules = BIT(TRIGGER_NETDEV_LINK_1000) | + BIT(TRIGGER_NETDEV_LINK); + break; + + case VSC8531_LINK_100_ACTIVITY: + *rules = BIT(TRIGGER_NETDEV_LINK_100) | + BIT(TRIGGER_NETDEV_LINK); + break; + + case VSC8531_LINK_10_ACTIVITY: + *rules = BIT(TRIGGER_NETDEV_LINK_10) | + BIT(TRIGGER_NETDEV_LINK); + break; + + case VSC8531_LINK_100_1000_ACTIVITY: + *rules = BIT(TRIGGER_NETDEV_LINK_1000) | + BIT(TRIGGER_NETDEV_LINK_100) | + BIT(TRIGGER_NETDEV_LINK); + break; + + case VSC8531_LINK_10_1000_ACTIVITY: + *rules = BIT(TRIGGER_NETDEV_LINK_1000) | + BIT(TRIGGER_NETDEV_LINK_10) | + BIT(TRIGGER_NETDEV_LINK); + break; + + case VSC8531_LINK_10_100_ACTIVITY: + *rules = BIT(TRIGGER_NETDEV_LINK_100) | + BIT(TRIGGER_NETDEV_LINK_10) | + BIT(TRIGGER_NETDEV_LINK); + break; + + default: + *rules = 0; + break; + } + + if (!behavior && *rules) + *rules |= BIT(TRIGGER_NETDEV_RX) | BIT(TRIGGER_NETDEV_TX); + + return 0; +} + +static int vsc85xx_led_hw_control_set(struct phy_device *phydev, u8 index, + unsigned long rules) +{ + struct vsc8531_private *vsc8531 = phydev->priv; + u8 mode = VSC8531_FORCE_LED_ON; + bool combine_disable = false; + bool has_rx, has_tx; + int ret; + + if (index >= vsc8531->nleds) + return -EINVAL; + + if (rules & BIT(TRIGGER_NETDEV_LINK)) + mode = VSC8531_LINK_ACTIVITY; + + if (rules & BIT(TRIGGER_NETDEV_LINK_10)) + mode = VSC8531_LINK_10_ACTIVITY; + + if (rules & BIT(TRIGGER_NETDEV_LINK_100)) + mode = VSC8531_LINK_100_ACTIVITY; + + if (rules & BIT(TRIGGER_NETDEV_LINK_1000)) + mode = VSC8531_LINK_1000_ACTIVITY; + + if (rules & BIT(TRIGGER_NETDEV_LINK_100) && + rules & BIT(TRIGGER_NETDEV_LINK_1000)) + mode = VSC8531_LINK_100_1000_ACTIVITY; + + if (rules & BIT(TRIGGER_NETDEV_LINK_10) && + rules & BIT(TRIGGER_NETDEV_LINK_1000)) + mode = VSC8531_LINK_10_1000_ACTIVITY; + + if (rules & BIT(TRIGGER_NETDEV_LINK_10) && + rules & BIT(TRIGGER_NETDEV_LINK_100)) + mode = VSC8531_LINK_10_100_ACTIVITY; + + /* + * The VSC85xx PHYs provides an option to control LED behavior. By + * default, the LEDx combine function is enabled, meaning the LED + * will be on when there is link/activity or duplex/collision. If + * the combine function is disabled, the LED will be on only for + * link or duplex. + * + * To control this behavior, we check the selected rules. If both + * RX and TX activity are not selected, the LED combine function + * is disabled; otherwise, it remains enabled. + */ + has_rx = !!(rules & BIT(TRIGGER_NETDEV_RX)); + has_tx = !!(rules & BIT(TRIGGER_NETDEV_TX)); + if (!has_rx && !has_tx) + combine_disable = true; + + ret = vsc85xx_led_combine_disable_set(phydev, index, combine_disable); + if (ret < 0) + return ret; + + return vsc85xx_led_cntl_set(phydev, index, mode); +} + static int vsc8514_probe(struct phy_device *phydev) { static const struct vsc85xx_probe_config vsc8514_cfg = { @@ -2380,6 +2566,10 @@ static struct phy_driver vsc85xx_driver[] = { .get_sset_count = &vsc85xx_get_sset_count, .get_strings = &vsc85xx_get_strings, .get_stats = &vsc85xx_get_stats, + .led_brightness_set = vsc85xx_led_brightness_set, + .led_hw_is_supported = vsc85xx_led_hw_is_supported, + .led_hw_control_get = vsc85xx_led_hw_control_get, + .led_hw_control_set = vsc85xx_led_hw_control_set, }, { .phy_id = PHY_ID_VSC8502, @@ -2404,6 +2594,10 @@ static struct phy_driver vsc85xx_driver[] = { .get_sset_count = &vsc85xx_get_sset_count, .get_strings = &vsc85xx_get_strings, .get_stats = &vsc85xx_get_stats, + .led_brightness_set = vsc85xx_led_brightness_set, + .led_hw_is_supported = vsc85xx_led_hw_is_supported, + .led_hw_control_get = vsc85xx_led_hw_control_get, + .led_hw_control_set = vsc85xx_led_hw_control_set, }, { .phy_id = PHY_ID_VSC8504, @@ -2431,6 +2625,10 @@ static struct phy_driver vsc85xx_driver[] = { .get_stats = &vsc85xx_get_stats, .inband_caps = vsc85xx_inband_caps, .config_inband = vsc85xx_config_inband, + .led_brightness_set = vsc85xx_led_brightness_set, + .led_hw_is_supported = vsc85xx_led_hw_is_supported, + .led_hw_control_get = vsc85xx_led_hw_control_get, + .led_hw_control_set = vsc85xx_led_hw_control_set, }, { .phy_id = PHY_ID_VSC8514, @@ -2456,6 +2654,10 @@ static struct phy_driver vsc85xx_driver[] = { .get_stats = &vsc85xx_get_stats, .inband_caps = vsc85xx_inband_caps, .config_inband = vsc85xx_config_inband, + .led_brightness_set = vsc85xx_led_brightness_set, + .led_hw_is_supported = vsc85xx_led_hw_is_supported, + .led_hw_control_get = vsc85xx_led_hw_control_get, + .led_hw_control_set = vsc85xx_led_hw_control_set, }, { .phy_id = PHY_ID_VSC8530, @@ -2480,6 +2682,10 @@ static struct phy_driver vsc85xx_driver[] = { .get_sset_count = &vsc85xx_get_sset_count, .get_strings = &vsc85xx_get_strings, .get_stats = &vsc85xx_get_stats, + .led_brightness_set = vsc85xx_led_brightness_set, + .led_hw_is_supported = vsc85xx_led_hw_is_supported, + .led_hw_control_get = vsc85xx_led_hw_control_get, + .led_hw_control_set = vsc85xx_led_hw_control_set, }, { .phy_id = PHY_ID_VSC8531, @@ -2504,6 +2710,10 @@ static struct phy_driver vsc85xx_driver[] = { .get_sset_count = &vsc85xx_get_sset_count, .get_strings = &vsc85xx_get_strings, .get_stats = &vsc85xx_get_stats, + .led_brightness_set = vsc85xx_led_brightness_set, + .led_hw_is_supported = vsc85xx_led_hw_is_supported, + .led_hw_control_get = vsc85xx_led_hw_control_get, + .led_hw_control_set = vsc85xx_led_hw_control_set, }, { .phy_id = PHY_ID_VSC8540, @@ -2528,6 +2738,10 @@ static struct phy_driver vsc85xx_driver[] = { .get_sset_count = &vsc85xx_get_sset_count, .get_strings = &vsc85xx_get_strings, .get_stats = &vsc85xx_get_stats, + .led_brightness_set = vsc85xx_led_brightness_set, + .led_hw_is_supported = vsc85xx_led_hw_is_supported, + .led_hw_control_get = vsc85xx_led_hw_control_get, + .led_hw_control_set = vsc85xx_led_hw_control_set, }, { .phy_id = PHY_ID_VSC8541, @@ -2552,6 +2766,10 @@ static struct phy_driver vsc85xx_driver[] = { .get_sset_count = &vsc85xx_get_sset_count, .get_strings = &vsc85xx_get_strings, .get_stats = &vsc85xx_get_stats, + .led_brightness_set = vsc85xx_led_brightness_set, + .led_hw_is_supported = vsc85xx_led_hw_is_supported, + .led_hw_control_get = vsc85xx_led_hw_control_get, + .led_hw_control_set = vsc85xx_led_hw_control_set, }, { .phy_id = PHY_ID_VSC8552, @@ -2578,6 +2796,10 @@ static struct phy_driver vsc85xx_driver[] = { .get_stats = &vsc85xx_get_stats, .inband_caps = vsc85xx_inband_caps, .config_inband = vsc85xx_config_inband, + .led_brightness_set = vsc85xx_led_brightness_set, + .led_hw_is_supported = vsc85xx_led_hw_is_supported, + .led_hw_control_get = vsc85xx_led_hw_control_get, + .led_hw_control_set = vsc85xx_led_hw_control_set, }, { PHY_ID_MATCH_EXACT(PHY_ID_VSC856X), @@ -2601,6 +2823,10 @@ static struct phy_driver vsc85xx_driver[] = { .get_stats = &vsc85xx_get_stats, .inband_caps = vsc85xx_inband_caps, .config_inband = vsc85xx_config_inband, + .led_brightness_set = vsc85xx_led_brightness_set, + .led_hw_is_supported = vsc85xx_led_hw_is_supported, + .led_hw_control_get = vsc85xx_led_hw_control_get, + .led_hw_control_set = vsc85xx_led_hw_control_set, }, { .phy_id = PHY_ID_VSC8572, @@ -2629,6 +2855,10 @@ static struct phy_driver vsc85xx_driver[] = { .get_stats = &vsc85xx_get_stats, .inband_caps = vsc85xx_inband_caps, .config_inband = vsc85xx_config_inband, + .led_brightness_set = vsc85xx_led_brightness_set, + .led_hw_is_supported = vsc85xx_led_hw_is_supported, + .led_hw_control_get = vsc85xx_led_hw_control_get, + .led_hw_control_set = vsc85xx_led_hw_control_set, }, { .phy_id = PHY_ID_VSC8574, @@ -2657,6 +2887,10 @@ static struct phy_driver vsc85xx_driver[] = { .get_stats = &vsc85xx_get_stats, .inband_caps = vsc85xx_inband_caps, .config_inband = vsc85xx_config_inband, + .led_brightness_set = vsc85xx_led_brightness_set, + .led_hw_is_supported = vsc85xx_led_hw_is_supported, + .led_hw_control_get = vsc85xx_led_hw_control_get, + .led_hw_control_set = vsc85xx_led_hw_control_set, }, { PHY_ID_MATCH_EXACT(PHY_ID_VSC8575), @@ -2682,6 +2916,10 @@ static struct phy_driver vsc85xx_driver[] = { .get_stats = &vsc85xx_get_stats, .inband_caps = vsc85xx_inband_caps, .config_inband = vsc85xx_config_inband, + .led_brightness_set = vsc85xx_led_brightness_set, + .led_hw_is_supported = vsc85xx_led_hw_is_supported, + .led_hw_control_get = vsc85xx_led_hw_control_get, + .led_hw_control_set = vsc85xx_led_hw_control_set, }, { PHY_ID_MATCH_EXACT(PHY_ID_VSC8582), @@ -2707,6 +2945,10 @@ static struct phy_driver vsc85xx_driver[] = { .get_stats = &vsc85xx_get_stats, .inband_caps = vsc85xx_inband_caps, .config_inband = vsc85xx_config_inband, + .led_brightness_set = vsc85xx_led_brightness_set, + .led_hw_is_supported = vsc85xx_led_hw_is_supported, + .led_hw_control_get = vsc85xx_led_hw_control_get, + .led_hw_control_set = vsc85xx_led_hw_control_set, }, { PHY_ID_MATCH_EXACT(PHY_ID_VSC8584), @@ -2733,6 +2975,10 @@ static struct phy_driver vsc85xx_driver[] = { .link_change_notify = &vsc85xx_link_change_notify, .inband_caps = vsc85xx_inband_caps, .config_inband = vsc85xx_config_inband, + .led_brightness_set = vsc85xx_led_brightness_set, + .led_hw_is_supported = vsc85xx_led_hw_is_supported, + .led_hw_control_get = vsc85xx_led_hw_control_get, + .led_hw_control_set = vsc85xx_led_hw_control_set, } }; From 1269099337d4dbd9f429f3e9af73b3ab0a0f6140 Mon Sep 17 00:00:00 2001 From: javen Date: Tue, 11 Nov 2025 17:28:51 +0800 Subject: [PATCH 821/867] r8169: add support for RTL8125K This adds support for chip RTL8125K. Its XID is 0x68a. It is basically based on the one with XID 0x688, but with different firmware file. Signed-off-by: javen Signed-off-by: NipaLocal --- drivers/net/ethernet/realtek/r8169_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 2a4d9b5488103..0b96b6aa4214c 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -57,6 +57,7 @@ #define FIRMWARE_8125B_2 "rtl_nic/rtl8125b-2.fw" #define FIRMWARE_8125D_1 "rtl_nic/rtl8125d-1.fw" #define FIRMWARE_8125D_2 "rtl_nic/rtl8125d-2.fw" +#define FIRMWARE_8125K_1 "rtl_nic/rtl8125k-1.fw" #define FIRMWARE_8125BP_2 "rtl_nic/rtl8125bp-2.fw" #define FIRMWARE_8126A_2 "rtl_nic/rtl8126a-2.fw" #define FIRMWARE_8126A_3 "rtl_nic/rtl8126a-3.fw" @@ -110,6 +111,7 @@ static const struct rtl_chip_info { { 0x7cf, 0x681, RTL_GIGA_MAC_VER_66, "RTL8125BP", FIRMWARE_8125BP_2 }, /* 8125D family. */ + { 0x7cf, 0x68a, RTL_GIGA_MAC_VER_64, "RTL8125K", FIRMWARE_8125K_1 }, { 0x7cf, 0x689, RTL_GIGA_MAC_VER_64, "RTL8125D", FIRMWARE_8125D_2 }, { 0x7cf, 0x688, RTL_GIGA_MAC_VER_64, "RTL8125D", FIRMWARE_8125D_1 }, @@ -770,6 +772,7 @@ MODULE_FIRMWARE(FIRMWARE_8125A_3); MODULE_FIRMWARE(FIRMWARE_8125B_2); MODULE_FIRMWARE(FIRMWARE_8125D_1); MODULE_FIRMWARE(FIRMWARE_8125D_2); +MODULE_FIRMWARE(FIRMWARE_8125K_1); MODULE_FIRMWARE(FIRMWARE_8125BP_2); MODULE_FIRMWARE(FIRMWARE_8126A_2); MODULE_FIRMWARE(FIRMWARE_8126A_3); From 6b81dff0bf217d4cdaf6691431e0f726526ed41b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Nov 2025 09:31:50 +0000 Subject: [PATCH 822/867] net_sched: make room for (struct qdisc_skb_cb)->pkt_segs Add a new u16 field, next to pkt_len : pkt_segs This will cache shinfo->gso_segs to speed up qdisc deqeue(). Move slave_dev_queue_mapping at the end of qdisc_skb_cb, and move three bits from tc_skb_cb : - post_ct - post_ct_snat - post_ct_dnat Signed-off-by: Eric Dumazet Signed-off-by: NipaLocal --- include/net/sch_generic.h | 18 +++++++++--------- net/core/dev.c | 2 +- net/sched/act_ct.c | 8 ++++---- net/sched/cls_api.c | 6 +++--- net/sched/cls_flower.c | 2 +- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 94966692ccdf5..9cd8b5d4b2369 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -429,13 +429,16 @@ struct tcf_proto { }; struct qdisc_skb_cb { - struct { - unsigned int pkt_len; - u16 slave_dev_queue_mapping; - u16 tc_classid; - }; + unsigned int pkt_len; + u16 pkt_segs; + u16 tc_classid; #define QDISC_CB_PRIV_LEN 20 unsigned char data[QDISC_CB_PRIV_LEN]; + + u16 slave_dev_queue_mapping; + u8 post_ct:1; + u8 post_ct_snat:1; + u8 post_ct_dnat:1; }; typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); @@ -1064,11 +1067,8 @@ struct tc_skb_cb { struct qdisc_skb_cb qdisc_cb; u32 drop_reason; - u16 zone; /* Only valid if post_ct = true */ + u16 zone; /* Only valid if qdisc_skb_cb(skb)->post_ct = true */ u16 mru; - u8 post_ct:1; - u8 post_ct_snat:1; - u8 post_ct_dnat:1; }; static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb) diff --git a/net/core/dev.c b/net/core/dev.c index 69515edd17bc6..46ce6c6107805 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4355,7 +4355,7 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb, return ret; tc_skb_cb(skb)->mru = 0; - tc_skb_cb(skb)->post_ct = false; + qdisc_skb_cb(skb)->post_ct = false; tcf_set_drop_reason(skb, *drop_reason); mini_qdisc_bstats_cpu_update(miniq, skb); diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 6749a4a9a9cd0..2b6ac7069dc16 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -948,9 +948,9 @@ static int tcf_ct_act_nat(struct sk_buff *skb, return err & NF_VERDICT_MASK; if (action & BIT(NF_NAT_MANIP_SRC)) - tc_skb_cb(skb)->post_ct_snat = 1; + qdisc_skb_cb(skb)->post_ct_snat = 1; if (action & BIT(NF_NAT_MANIP_DST)) - tc_skb_cb(skb)->post_ct_dnat = 1; + qdisc_skb_cb(skb)->post_ct_dnat = 1; return err; #else @@ -986,7 +986,7 @@ TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, tcf_action_update_bstats(&c->common, skb); if (clear) { - tc_skb_cb(skb)->post_ct = false; + qdisc_skb_cb(skb)->post_ct = false; ct = nf_ct_get(skb, &ctinfo); if (ct) { nf_ct_put(ct); @@ -1097,7 +1097,7 @@ TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, out_push: skb_push_rcsum(skb, nh_ofs); - tc_skb_cb(skb)->post_ct = true; + qdisc_skb_cb(skb)->post_ct = true; tc_skb_cb(skb)->zone = p->zone; out_clear: if (defrag) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index f751cd5eeac8d..ebca4b926dcf7 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1872,9 +1872,9 @@ int tcf_classify(struct sk_buff *skb, } ext->chain = last_executed_chain; ext->mru = cb->mru; - ext->post_ct = cb->post_ct; - ext->post_ct_snat = cb->post_ct_snat; - ext->post_ct_dnat = cb->post_ct_dnat; + ext->post_ct = qdisc_skb_cb(skb)->post_ct; + ext->post_ct_snat = qdisc_skb_cb(skb)->post_ct_snat; + ext->post_ct_dnat = qdisc_skb_cb(skb)->post_ct_dnat; ext->zone = cb->zone; } } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 099ff6a3e1f51..7669371c1354c 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -326,7 +326,7 @@ TC_INDIRECT_SCOPE int fl_classify(struct sk_buff *skb, struct tcf_result *res) { struct cls_fl_head *head = rcu_dereference_bh(tp->root); - bool post_ct = tc_skb_cb(skb)->post_ct; + bool post_ct = qdisc_skb_cb(skb)->post_ct; u16 zone = tc_skb_cb(skb)->zone; struct fl_flow_key skb_key; struct fl_flow_mask *mask; From 01cfdbc113c57536a31c64bd2ca5870070b0ac5a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Nov 2025 09:31:51 +0000 Subject: [PATCH 823/867] net: init shinfo->gso_segs from qdisc_pkt_len_init() Qdisc use shinfo->gso_segs for their pkts stats in bstats_update(), but this field needs to be initialized for SKB_GSO_DODGY users. Signed-off-by: Eric Dumazet Signed-off-by: NipaLocal --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 46ce6c6107805..dba9eef8bd83d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4071,7 +4071,7 @@ EXPORT_SYMBOL_GPL(validate_xmit_skb_list); static void qdisc_pkt_len_init(struct sk_buff *skb) { - const struct skb_shared_info *shinfo = skb_shinfo(skb); + struct skb_shared_info *shinfo = skb_shinfo(skb); qdisc_skb_cb(skb)->pkt_len = skb->len; @@ -4112,6 +4112,7 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) if (payload <= 0) return; gso_segs = DIV_ROUND_UP(payload, shinfo->gso_size); + shinfo->gso_segs = gso_segs; } qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len; } From edad0cc121f5ed148fa2593e74386d41096e0d5a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Nov 2025 09:31:52 +0000 Subject: [PATCH 824/867] net_sched: initialize qdisc_skb_cb(skb)->pkt_segs in qdisc_pkt_len_init() qdisc_pkt_len_init() is currently initalizing qdisc_skb_cb(skb)->pkt_len. Add qdisc_skb_cb(skb)->pkt_segs initialization and rename this function to qdisc_pkt_len_segs_init(). Signed-off-by: Eric Dumazet Signed-off-by: NipaLocal --- net/core/dev.c | 15 +++++++++++---- net/sched/sch_cake.c | 2 +- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index dba9eef8bd83d..895c3e37e686f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4069,17 +4069,23 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d } EXPORT_SYMBOL_GPL(validate_xmit_skb_list); -static void qdisc_pkt_len_init(struct sk_buff *skb) +static void qdisc_pkt_len_segs_init(struct sk_buff *skb) { struct skb_shared_info *shinfo = skb_shinfo(skb); + u16 gso_segs; qdisc_skb_cb(skb)->pkt_len = skb->len; + if (!shinfo->gso_size) { + qdisc_skb_cb(skb)->pkt_segs = 1; + return; + } + + qdisc_skb_cb(skb)->pkt_segs = gso_segs = shinfo->gso_segs; /* To get more precise estimation of bytes sent on wire, * we add to pkt_len the headers size of all segments */ - if (shinfo->gso_size && skb_transport_header_was_set(skb)) { - u16 gso_segs = shinfo->gso_segs; + if (skb_transport_header_was_set(skb)) { unsigned int hdr_len; /* mac layer + network layer */ @@ -4113,6 +4119,7 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) return; gso_segs = DIV_ROUND_UP(payload, shinfo->gso_size); shinfo->gso_segs = gso_segs; + qdisc_skb_cb(skb)->pkt_segs = gso_segs; } qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len; } @@ -4738,7 +4745,7 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) skb_update_prio(skb); - qdisc_pkt_len_init(skb); + qdisc_pkt_len_segs_init(skb); tcx_set_ingress(skb, false); #ifdef CONFIG_NET_EGRESS if (static_branch_unlikely(&egress_needed_key)) { diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 32bacfc314c26..9213129f0de10 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1406,7 +1406,7 @@ static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb) if (!shinfo->gso_size) return cake_calc_overhead(q, len, off); - /* borrowed from qdisc_pkt_len_init() */ + /* borrowed from qdisc_pkt_len_segs_init() */ if (!skb->encapsulation) hdr_len = skb_transport_offset(skb); else From 710f9810086e8b3194363ca1f903561463110b51 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Nov 2025 09:31:53 +0000 Subject: [PATCH 825/867] net: use qdisc_pkt_len_segs_init() in sch_handle_ingress() sch_handle_ingress() sets qdisc_skb_cb(skb)->pkt_len. We also need to initialize qdisc_skb_cb(skb)->pkt_segs. Signed-off-by: Eric Dumazet Signed-off-by: NipaLocal --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 895c3e37e686f..e19eb4e9d77c2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4434,7 +4434,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, *pt_prev = NULL; } - qdisc_skb_cb(skb)->pkt_len = skb->len; + qdisc_pkt_len_segs_init(skb); tcx_set_ingress(skb, true); if (static_branch_unlikely(&tcx_needed_key)) { From a84b801d28a89e246fbd1f93edbfef446b95b433 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Nov 2025 09:31:54 +0000 Subject: [PATCH 826/867] net_sched: use qdisc_skb_cb(skb)->pkt_segs in bstats_update() Avoid up to two cache line misses in qdisc dequeue() to fetch skb_shinfo(skb)->gso_segs/gso_size while qdisc spinlock is held. This gives a 5 % improvement in a TX intensive workload. Signed-off-by: Eric Dumazet Signed-off-by: NipaLocal --- include/net/sch_generic.h | 13 ++++++++++--- net/sched/sch_cake.c | 1 + net/sched/sch_dualpi2.c | 1 + net/sched/sch_netem.c | 1 + net/sched/sch_qfq.c | 2 +- net/sched/sch_taprio.c | 1 + net/sched/sch_tbf.c | 1 + 7 files changed, 16 insertions(+), 4 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 9cd8b5d4b2369..cdf7a58ebcf5e 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -829,6 +829,15 @@ static inline unsigned int qdisc_pkt_len(const struct sk_buff *skb) return qdisc_skb_cb(skb)->pkt_len; } +static inline unsigned int qdisc_pkt_segs(const struct sk_buff *skb) +{ + u32 pkt_segs = qdisc_skb_cb(skb)->pkt_segs; + + DEBUG_NET_WARN_ON_ONCE(pkt_segs != + (skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1)); + return pkt_segs; +} + /* additional qdisc xmit flags (NET_XMIT_MASK in linux/netdevice.h) */ enum net_xmit_qdisc_t { __NET_XMIT_STOLEN = 0x00010000, @@ -870,9 +879,7 @@ static inline void _bstats_update(struct gnet_stats_basic_sync *bstats, static inline void bstats_update(struct gnet_stats_basic_sync *bstats, const struct sk_buff *skb) { - _bstats_update(bstats, - qdisc_pkt_len(skb), - skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1); + _bstats_update(bstats, qdisc_pkt_len(skb), qdisc_pkt_segs(skb)); } static inline void qdisc_bstats_cpu_update(struct Qdisc *sch, diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 9213129f0de10..a20880034aa5e 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1800,6 +1800,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, skb_list_walk_safe(segs, segs, nskb) { skb_mark_not_on_list(segs); qdisc_skb_cb(segs)->pkt_len = segs->len; + qdisc_skb_cb(segs)->pkt_segs = 1; cobalt_set_enqueue_time(segs, now); get_cobalt_cb(segs)->adjusted_len = cake_overhead(q, segs); diff --git a/net/sched/sch_dualpi2.c b/net/sched/sch_dualpi2.c index 4b975feb52b1f..6d7e6389758dc 100644 --- a/net/sched/sch_dualpi2.c +++ b/net/sched/sch_dualpi2.c @@ -475,6 +475,7 @@ static int dualpi2_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, * (3) Enqueue fragment & set ts in dualpi2_enqueue_skb */ qdisc_skb_cb(nskb)->pkt_len = nskb->len; + qdisc_skb_cb(nskb)->pkt_segs = 1; dualpi2_skb_cb(nskb)->classified = dualpi2_skb_cb(skb)->classified; dualpi2_skb_cb(nskb)->ect = dualpi2_skb_cb(skb)->ect; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index eafc316ae319e..32a5f33040461 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -429,6 +429,7 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff *segs; netdev_features_t features = netif_skb_features(skb); + qdisc_skb_cb(skb)->pkt_segs = 1; segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); if (IS_ERR_OR_NULL(segs)) { diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 2255355e51d35..d920f57dc6d76 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1250,7 +1250,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, } } - gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; + gso_segs = qdisc_pkt_segs(skb); err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { pr_debug("qfq_enqueue: enqueue failed %d\n", err); diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 39b735386996e..300d577b32869 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -595,6 +595,7 @@ static int taprio_enqueue_segmented(struct sk_buff *skb, struct Qdisc *sch, skb_list_walk_safe(segs, segs, nskb) { skb_mark_not_on_list(segs); qdisc_skb_cb(segs)->pkt_len = segs->len; + qdisc_skb_cb(segs)->pkt_segs = 1; slen += segs->len; /* FIXME: we should be segmenting to a smaller size diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 4c977f049670a..f2340164f579a 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -221,6 +221,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch, skb_mark_not_on_list(segs); seg_len = segs->len; qdisc_skb_cb(segs)->pkt_len = seg_len; + qdisc_skb_cb(segs)->pkt_segs = 1; ret = qdisc_enqueue(segs, q->qdisc, to_free); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) From 23da358d80dae743d0f21373fa03e3a5e7b5c7e5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Nov 2025 09:31:55 +0000 Subject: [PATCH 827/867] net_sched: cake: use qdisc_pkt_segs() Use new qdisc_pkt_segs() to avoid a cache line miss in cake_enqueue() for non GSO packets. cake_overhead() does not have to recompute it. Signed-off-by: Eric Dumazet Signed-off-by: NipaLocal --- net/sched/sch_cake.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index a20880034aa5e..5948a149129c6 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1398,12 +1398,12 @@ static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb) const struct skb_shared_info *shinfo = skb_shinfo(skb); unsigned int hdr_len, last_len = 0; u32 off = skb_network_offset(skb); + u16 segs = qdisc_pkt_segs(skb); u32 len = qdisc_pkt_len(skb); - u16 segs = 1; q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8); - if (!shinfo->gso_size) + if (segs == 1) return cake_calc_overhead(q, len, off); /* borrowed from qdisc_pkt_len_segs_init() */ @@ -1430,12 +1430,6 @@ static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb) hdr_len += sizeof(struct udphdr); } - if (unlikely(shinfo->gso_type & SKB_GSO_DODGY)) - segs = DIV_ROUND_UP(skb->len - hdr_len, - shinfo->gso_size); - else - segs = shinfo->gso_segs; - len = shinfo->gso_size + hdr_len; last_len = skb->len - shinfo->gso_size * (segs - 1); @@ -1788,7 +1782,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (unlikely(len > b->max_skblen)) b->max_skblen = len; - if (skb_is_gso(skb) && q->rate_flags & CAKE_FLAG_SPLIT_GSO) { + if (qdisc_pkt_segs(skb) > 1 && q->rate_flags & CAKE_FLAG_SPLIT_GSO) { struct sk_buff *segs, *nskb; netdev_features_t features = netif_skb_features(skb); unsigned int slen = 0, numsegs = 0; From 748b28688e5cb7f89e8b6ef8edb797b22973c604 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Nov 2025 09:31:56 +0000 Subject: [PATCH 828/867] net_sched: add Qdisc_read_mostly and Qdisc_write groups It is possible to reorg Qdisc to avoid always dirtying 2 cache lines in fast path by reducing this to a single dirtied cache line. In current layout, we change only four/six fields in the first cache line: - q.spinlock - q.qlen - bstats.bytes - bstats.packets - some Qdisc also change q.next/q.prev In the second cache line we change in the fast path: - running - state - qstats.backlog /* --- cacheline 2 boundary (128 bytes) --- */ struct sk_buff_head gso_skb __attribute__((__aligned__(64))); /* 0x80 0x18 */ struct qdisc_skb_head q; /* 0x98 0x18 */ struct gnet_stats_basic_sync bstats __attribute__((__aligned__(16))); /* 0xb0 0x10 */ /* --- cacheline 3 boundary (192 bytes) --- */ struct gnet_stats_queue qstats; /* 0xc0 0x14 */ bool running; /* 0xd4 0x1 */ /* XXX 3 bytes hole, try to pack */ unsigned long state; /* 0xd8 0x8 */ struct Qdisc * next_sched; /* 0xe0 0x8 */ struct sk_buff_head skb_bad_txq; /* 0xe8 0x18 */ /* --- cacheline 4 boundary (256 bytes) --- */ Reorganize things to have a first cache line mostly read, then a mostly written one. This gives a ~3% increase of performance under tx stress. Note that there is an additional hole because @qstats now spans over a third cache line. /* --- cacheline 2 boundary (128 bytes) --- */ __u8 __cacheline_group_begin__Qdisc_read_mostly[0] __attribute__((__aligned__(64))); /* 0x80 0 */ struct sk_buff_head gso_skb; /* 0x80 0x18 */ struct Qdisc * next_sched; /* 0x98 0x8 */ struct sk_buff_head skb_bad_txq; /* 0xa0 0x18 */ __u8 __cacheline_group_end__Qdisc_read_mostly[0]; /* 0xb8 0 */ /* XXX 8 bytes hole, try to pack */ /* --- cacheline 3 boundary (192 bytes) --- */ __u8 __cacheline_group_begin__Qdisc_write[0] __attribute__((__aligned__(64))); /* 0xc0 0 */ struct qdisc_skb_head q; /* 0xc0 0x18 */ unsigned long state; /* 0xd8 0x8 */ struct gnet_stats_basic_sync bstats __attribute__((__aligned__(16))); /* 0xe0 0x10 */ bool running; /* 0xf0 0x1 */ /* XXX 3 bytes hole, try to pack */ struct gnet_stats_queue qstats; /* 0xf4 0x14 */ /* --- cacheline 4 boundary (256 bytes) was 8 bytes ago --- */ __u8 __cacheline_group_end__Qdisc_write[0]; /* 0x108 0 */ /* XXX 56 bytes hole, try to pack */ Signed-off-by: Eric Dumazet Signed-off-by: NipaLocal --- include/net/sch_generic.h | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index cdf7a58ebcf5e..79501499dafba 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -103,17 +103,24 @@ struct Qdisc { int pad; refcount_t refcnt; - /* - * For performance sake on SMP, we put highly modified fields at the end - */ - struct sk_buff_head gso_skb ____cacheline_aligned_in_smp; - struct qdisc_skb_head q; - struct gnet_stats_basic_sync bstats; - struct gnet_stats_queue qstats; - bool running; /* must be written under qdisc spinlock */ - unsigned long state; - struct Qdisc *next_sched; - struct sk_buff_head skb_bad_txq; + /* Cache line potentially dirtied in dequeue() or __netif_reschedule(). */ + __cacheline_group_begin(Qdisc_read_mostly) ____cacheline_aligned; + struct sk_buff_head gso_skb; + struct Qdisc *next_sched; + struct sk_buff_head skb_bad_txq; + __cacheline_group_end(Qdisc_read_mostly); + + /* Fields dirtied in dequeue() fast path. */ + __cacheline_group_begin(Qdisc_write) ____cacheline_aligned; + struct qdisc_skb_head q; + unsigned long state; + struct gnet_stats_basic_sync bstats; + bool running; /* must be written under qdisc spinlock */ + + /* Note : we only change qstats.backlog in fast path. */ + struct gnet_stats_queue qstats; + __cacheline_group_end(Qdisc_write); + atomic_long_t defer_count ____cacheline_aligned_in_smp; struct llist_head defer_list; From 801c087279d24744e43bdb4d70c52d2f7d6ff574 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Nov 2025 09:31:57 +0000 Subject: [PATCH 829/867] net_sched: sch_fq: move qdisc_bstats_update() to fq_dequeue_skb() Group together changes to qdisc fields to reduce chances of false sharing if another cpu attempts to acquire the qdisc spinlock. qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; qdisc_bstats_update(sch, skb); Signed-off-by: Eric Dumazet Signed-off-by: NipaLocal --- net/sched/sch_fq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index fee922da2f99c..0b0ca1aa9251f 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -497,6 +497,7 @@ static void fq_dequeue_skb(struct Qdisc *sch, struct fq_flow *flow, skb_mark_not_on_list(skb); qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; + qdisc_bstats_update(sch, skb); } static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb) @@ -776,7 +777,6 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) f->time_next_packet = now + len; } out: - qdisc_bstats_update(sch, skb); return skb; } From 1fb06154f0eceeb97f56d3e4b94df42e8d302e52 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Nov 2025 09:31:58 +0000 Subject: [PATCH 830/867] net_sched: sch_fq: prefetch one skb ahead in dequeue() prefetch the skb that we are likely to dequeue at the next dequeue(). Also call fq_dequeue_skb() a bit sooner in fq_dequeue(). This reduces the window between read of q.qlen and changes of fields in the cache line that could be dirtied by another cpu trying to queue a packet. Signed-off-by: Eric Dumazet Signed-off-by: NipaLocal --- net/sched/sch_fq.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 0b0ca1aa9251f..6e5f2f4f24154 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -480,7 +480,10 @@ static void fq_erase_head(struct Qdisc *sch, struct fq_flow *flow, struct sk_buff *skb) { if (skb == flow->head) { - flow->head = skb->next; + struct sk_buff *next = skb->next; + + prefetch(next); + flow->head = next; } else { rb_erase(&skb->rbnode, &flow->t_root); skb->dev = qdisc_dev(sch); @@ -712,6 +715,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) goto begin; } prefetch(&skb->end); + fq_dequeue_skb(sch, f, skb); if ((s64)(now - time_next_packet - q->ce_threshold) > 0) { INET_ECN_set_ce(skb); q->stat_ce_mark++; @@ -719,7 +723,6 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) if (--f->qlen == 0) q->inactive_flows++; q->band_pkt_count[fq_skb_cb(skb)->band]--; - fq_dequeue_skb(sch, f, skb); } else { head->first = f->next; /* force a pass through old_flows to prevent starvation */ From 583021cc478cea1568217d6335ffa304ea339ad8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Nov 2025 09:31:59 +0000 Subject: [PATCH 831/867] net: prefech skb->priority in __dev_xmit_skb() Most qdiscs need to read skb->priority at enqueue time(). In commit 100dfa74cad9 ("net: dev_queue_xmit() llist adoption") I added a prefetch(next), lets add another one for the second half of skb. Note that skb->priority and skb->hash share a common cache line, so this patch helps qdiscs needing both fields. Signed-off-by: Eric Dumazet Signed-off-by: NipaLocal --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/dev.c b/net/core/dev.c index e19eb4e9d77c2..53e2496dc4292 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4246,6 +4246,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, llist_for_each_entry_safe(skb, next, ll_list, ll_node) { prefetch(next); + prefetch(&next->priority); skb_mark_not_on_list(skb); rc = dev_qdisc_enqueue(skb, q, &to_free, txq); count++; From 5ff246fe75175e888c6c9fd61becdfa68790fba6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Nov 2025 09:32:00 +0000 Subject: [PATCH 832/867] net: annotate a data-race in __dev_xmit_skb() q->limit is read locklessly, add a READ_ONCE(). Fixes: 100dfa74cad9 ("net: dev_queue_xmit() llist adoption") Signed-off-by: Eric Dumazet Signed-off-by: NipaLocal --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 53e2496dc4292..10042139dbb05 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4194,7 +4194,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, do { if (first_n && !defer_count) { defer_count = atomic_long_inc_return(&q->defer_count); - if (unlikely(defer_count > q->limit)) { + if (unlikely(defer_count > READ_ONCE(q->limit))) { kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_DROP); return NET_XMIT_DROP; } From 6556065d914ba2df60d5c760957e235ca36b6d74 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Nov 2025 09:32:01 +0000 Subject: [PATCH 833/867] net_sched: add tcf_kfree_skb_list() helper Using kfree_skb_list_reason() to free list of skbs from qdisc operations seems wrong as each skb might have a different drop reason. Cleanup __dev_xmit_skb() to call tcf_kfree_skb_list() once in preparation of the following patch. Signed-off-by: Eric Dumazet Signed-off-by: NipaLocal --- include/net/sch_generic.h | 11 +++++++++++ net/core/dev.c | 15 +++++---------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 79501499dafba..b8092d0378a0c 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1105,6 +1105,17 @@ static inline void tcf_set_drop_reason(const struct sk_buff *skb, tc_skb_cb(skb)->drop_reason = reason; } +static inline void tcf_kfree_skb_list(struct sk_buff *skb) +{ + while (unlikely(skb)) { + struct sk_buff *next = skb->next; + + prefetch(next); + kfree_skb_reason(skb, tcf_get_drop_reason(skb)); + skb = next; + } +} + /* Instead of calling kfree_skb() while root qdisc lock is held, * queue the skb for future freeing at end of __dev_xmit_skb() */ diff --git a/net/core/dev.c b/net/core/dev.c index 10042139dbb05..e865cdb9b6966 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4162,7 +4162,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, __qdisc_run(q); qdisc_run_end(q); - goto no_lock_out; + goto free_skbs; } qdisc_bstats_cpu_update(q, skb); @@ -4176,12 +4176,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, rc = dev_qdisc_enqueue(skb, q, &to_free, txq); qdisc_run(q); - -no_lock_out: - if (unlikely(to_free)) - kfree_skb_list_reason(to_free, - tcf_get_drop_reason(to_free)); - return rc; + goto free_skbs; } /* Open code llist_add(&skb->ll_node, &q->defer_list) + queue limit. @@ -4257,9 +4252,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, } unlock: spin_unlock(root_lock); - if (unlikely(to_free)) - kfree_skb_list_reason(to_free, - tcf_get_drop_reason(to_free)); + +free_skbs: + tcf_kfree_skb_list(to_free); return rc; } From 143e3449bdd8d490443fa0a30945274a79310de2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Nov 2025 09:32:02 +0000 Subject: [PATCH 834/867] net_sched: add qdisc_dequeue_drop() helper Some qdisc like cake, codel, fq_codel might drop packets in their dequeue() method. This is currently problematic because dequeue() runs with the qdisc spinlock held. Freeing skbs can be extremely expensive. Add qdisc_dequeue_drop() method and a new TCQ_F_DEQUEUE_DROPS so that these qdiscs can opt-in to defer the skb frees after the socket spinlock is released. TCQ_F_DEQUEUE_DROPS is an attempt to not penalize other qdiscs with an extra cache line miss. Signed-off-by: Eric Dumazet Signed-off-by: NipaLocal --- include/net/pkt_sched.h | 5 +++-- include/net/sch_generic.h | 30 +++++++++++++++++++++++++++--- net/core/dev.c | 22 +++++++++++++--------- 3 files changed, 43 insertions(+), 14 deletions(-) diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 4678db45832a1..e703c507d0daa 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -114,12 +114,13 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, void __qdisc_run(struct Qdisc *q); -static inline void qdisc_run(struct Qdisc *q) +static inline struct sk_buff *qdisc_run(struct Qdisc *q) { if (qdisc_run_begin(q)) { __qdisc_run(q); - qdisc_run_end(q); + return qdisc_run_end(q); } + return NULL; } extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index b8092d0378a0c..c3a7268b567e0 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -88,6 +88,8 @@ struct Qdisc { #define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */ #define TCQ_F_NOLOCK 0x100 /* qdisc does not require locking */ #define TCQ_F_OFFLOADED 0x200 /* qdisc is offloaded to HW */ +#define TCQ_F_DEQUEUE_DROPS 0x400 /* ->dequeue() can drop packets in q->to_free */ + u32 limit; const struct Qdisc_ops *ops; struct qdisc_size_table __rcu *stab; @@ -119,6 +121,8 @@ struct Qdisc { /* Note : we only change qstats.backlog in fast path. */ struct gnet_stats_queue qstats; + + struct sk_buff *to_free; __cacheline_group_end(Qdisc_write); @@ -218,8 +222,10 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) return true; } -static inline void qdisc_run_end(struct Qdisc *qdisc) +static inline struct sk_buff *qdisc_run_end(struct Qdisc *qdisc) { + struct sk_buff *to_free = NULL; + if (qdisc->flags & TCQ_F_NOLOCK) { spin_unlock(&qdisc->seqlock); @@ -232,9 +238,16 @@ static inline void qdisc_run_end(struct Qdisc *qdisc) if (unlikely(test_bit(__QDISC_STATE_MISSED, &qdisc->state))) __netif_schedule(qdisc); - } else { - WRITE_ONCE(qdisc->running, false); + return NULL; + } + + if (qdisc->flags & TCQ_F_DEQUEUE_DROPS) { + to_free = qdisc->to_free; + if (to_free) + qdisc->to_free = NULL; } + WRITE_ONCE(qdisc->running, false); + return to_free; } static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) @@ -1116,6 +1129,17 @@ static inline void tcf_kfree_skb_list(struct sk_buff *skb) } } +static inline void qdisc_dequeue_drop(struct Qdisc *q, struct sk_buff *skb, + enum skb_drop_reason reason) +{ + DEBUG_NET_WARN_ON_ONCE(!(q->flags & TCQ_F_DEQUEUE_DROPS)); + DEBUG_NET_WARN_ON_ONCE(q->flags & TCQ_F_NOLOCK); + + tcf_set_drop_reason(skb, reason); + skb->next = q->to_free; + q->to_free = skb; +} + /* Instead of calling kfree_skb() while root qdisc lock is held, * queue the skb for future freeing at end of __dev_xmit_skb() */ diff --git a/net/core/dev.c b/net/core/dev.c index e865cdb9b6966..9094c0fb8c689 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4141,7 +4141,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, struct net_device *dev, struct netdev_queue *txq) { - struct sk_buff *next, *to_free = NULL; + struct sk_buff *next, *to_free = NULL, *to_free2 = NULL; spinlock_t *root_lock = qdisc_lock(q); struct llist_node *ll_list, *first_n; unsigned long defer_count = 0; @@ -4160,7 +4160,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, if (unlikely(!nolock_qdisc_is_empty(q))) { rc = dev_qdisc_enqueue(skb, q, &to_free, txq); __qdisc_run(q); - qdisc_run_end(q); + to_free2 = qdisc_run_end(q); goto free_skbs; } @@ -4170,12 +4170,13 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, !nolock_qdisc_is_empty(q)) __qdisc_run(q); - qdisc_run_end(q); - return NET_XMIT_SUCCESS; + to_free2 = qdisc_run_end(q); + rc = NET_XMIT_SUCCESS; + goto free_skbs; } rc = dev_qdisc_enqueue(skb, q, &to_free, txq); - qdisc_run(q); + to_free2 = qdisc_run(q); goto free_skbs; } @@ -4234,7 +4235,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_bstats_update(q, skb); if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) __qdisc_run(q); - qdisc_run_end(q); + to_free2 = qdisc_run_end(q); rc = NET_XMIT_SUCCESS; } else { int count = 0; @@ -4246,7 +4247,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, rc = dev_qdisc_enqueue(skb, q, &to_free, txq); count++; } - qdisc_run(q); + to_free2 = qdisc_run(q); if (count != 1) rc = NET_XMIT_SUCCESS; } @@ -4255,6 +4256,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, free_skbs: tcf_kfree_skb_list(to_free); + tcf_kfree_skb_list(to_free2); return rc; } @@ -5747,8 +5749,9 @@ static __latent_entropy void net_tx_action(void) rcu_read_lock(); while (head) { - struct Qdisc *q = head; spinlock_t *root_lock = NULL; + struct sk_buff *to_free; + struct Qdisc *q = head; head = head->next_sched; @@ -5775,9 +5778,10 @@ static __latent_entropy void net_tx_action(void) } clear_bit(__QDISC_STATE_SCHED, &q->state); - qdisc_run(q); + to_free = qdisc_run(q); if (root_lock) spin_unlock(root_lock); + tcf_kfree_skb_list(to_free); } rcu_read_unlock(); From e9f213ae3992a74c782d38e643478537fbd353c6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Nov 2025 09:32:03 +0000 Subject: [PATCH 835/867] net_sched: use qdisc_dequeue_drop() in cake, codel, fq_codel cake, codel and fq_codel can drop many packets from dequeue(). Use qdisc_dequeue_drop() so that the freeing can happen outside of the qdisc spinlock scope. Add TCQ_F_DEQUEUE_DROPS to sch->flags. Signed-off-by: Eric Dumazet Signed-off-by: NipaLocal --- net/sched/sch_cake.c | 4 +++- net/sched/sch_codel.c | 4 +++- net/sched/sch_fq_codel.c | 5 ++++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 5948a149129c6..0ea9440f68c60 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -2183,7 +2183,7 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch) b->tin_dropped++; qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb)); qdisc_qstats_drop(sch); - kfree_skb_reason(skb, reason); + qdisc_dequeue_drop(sch, skb, reason); if (q->rate_flags & CAKE_FLAG_INGRESS) goto retry; } @@ -2724,6 +2724,8 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt, int i, j, err; sch->limit = 10240; + sch->flags |= TCQ_F_DEQUEUE_DROPS; + q->tin_mode = CAKE_DIFFSERV_DIFFSERV3; q->flow_mode = CAKE_FLOW_TRIPLE; diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index fa0314679e434..c6551578f1cf8 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -52,7 +52,7 @@ static void drop_func(struct sk_buff *skb, void *ctx) { struct Qdisc *sch = ctx; - kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_CONGESTED); + qdisc_dequeue_drop(sch, skb, SKB_DROP_REASON_QDISC_CONGESTED); qdisc_qstats_drop(sch); } @@ -182,6 +182,8 @@ static int codel_init(struct Qdisc *sch, struct nlattr *opt, else sch->flags &= ~TCQ_F_CAN_BYPASS; + sch->flags |= TCQ_F_DEQUEUE_DROPS; + return 0; } diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index a141423929394..dc187c7f06b10 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -275,7 +275,7 @@ static void drop_func(struct sk_buff *skb, void *ctx) { struct Qdisc *sch = ctx; - kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_CONGESTED); + qdisc_dequeue_drop(sch, skb, SKB_DROP_REASON_QDISC_CONGESTED); qdisc_qstats_drop(sch); } @@ -519,6 +519,9 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt, sch->flags |= TCQ_F_CAN_BYPASS; else sch->flags &= ~TCQ_F_CAN_BYPASS; + + sch->flags |= TCQ_F_DEQUEUE_DROPS; + return 0; alloc_failure: From 788e73c954ed8f6e0fd28848f39a546dec814206 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Tue, 11 Nov 2025 10:07:26 +0000 Subject: [PATCH 836/867] net: stmmac: Add generic suspend/resume helper for PCI-based controllers Most glue driver for PCI-based DWMAC controllers utilize similar platform suspend/resume routines. Add a generic implementation to reduce duplicated code. Signed-off-by: Yao Zi Reviewed-by: Russell King (Oracle) Signed-off-by: NipaLocal --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 8 ++++ drivers/net/ethernet/stmicro/stmmac/Makefile | 1 + .../ethernet/stmicro/stmmac/stmmac_libpci.c | 48 +++++++++++++++++++ .../ethernet/stmicro/stmmac/stmmac_libpci.h | 12 +++++ 4 files changed, 69 insertions(+) create mode 100644 drivers/net/ethernet/stmicro/stmmac/stmmac_libpci.c create mode 100644 drivers/net/ethernet/stmicro/stmmac/stmmac_libpci.h diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 87c5bea6c2a24..1350f16f7138d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -349,6 +349,14 @@ config DWMAC_VISCONTI endif +config STMMAC_LIBPCI + tristate "STMMAC PCI helper library" + depends on PCI + default y + help + This selects the PCI bus helpers for the stmmac driver. If you + have a controller with PCI interface, say Y or M here. + config DWMAC_INTEL tristate "Intel GMAC support" default X86 diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 1681a8a283135..7bf528731034b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -44,6 +44,7 @@ obj-$(CONFIG_DWMAC_VISCONTI) += dwmac-visconti.o stmmac-platform-objs:= stmmac_platform.o dwmac-altr-socfpga-objs := dwmac-socfpga.o +obj-$(CONFIG_STMMAC_LIBPCI) += stmmac_libpci.o obj-$(CONFIG_STMMAC_PCI) += stmmac-pci.o obj-$(CONFIG_DWMAC_INTEL) += dwmac-intel.o obj-$(CONFIG_DWMAC_LOONGSON) += dwmac-loongson.o diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_libpci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_libpci.c new file mode 100644 index 0000000000000..5c5dd502f79a6 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_libpci.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PCI bus helpers for STMMAC driver + * Copyright (C) 2025 Yao Zi + */ + +#include +#include + +#include "stmmac_libpci.h" + +int stmmac_pci_plat_suspend(struct device *dev, void *bsp_priv) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + ret = pci_save_state(pdev); + if (ret) + return ret; + + pci_disable_device(pdev); + pci_wake_from_d3(pdev, true); + + return 0; +} +EXPORT_SYMBOL_GPL(stmmac_pci_plat_suspend); + +int stmmac_pci_plat_resume(struct device *dev, void *bsp_priv) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + pci_restore_state(pdev); + pci_set_power_state(pdev, PCI_D0); + + ret = pci_enable_device(pdev); + if (ret) + return ret; + + pci_set_master(pdev); + + return 0; +} +EXPORT_SYMBOL_GPL(stmmac_pci_plat_resume); + +MODULE_DESCRIPTION("STMMAC PCI helper library"); +MODULE_AUTHOR("Yao Zi "); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_libpci.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_libpci.h new file mode 100644 index 0000000000000..71553184f9823 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_libpci.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2025 Yao Zi + */ + +#ifndef __STMMAC_LIBPCI_H__ +#define __STMMAC_LIBPCI_H__ + +int stmmac_pci_plat_suspend(struct device *dev, void *bsp_priv); +int stmmac_pci_plat_resume(struct device *dev, void *bsp_priv); + +#endif /* __STMMAC_LIBPCI_H__ */ From 04e16a188080cfb95cc303fd88909c370143d848 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Tue, 11 Nov 2025 10:07:27 +0000 Subject: [PATCH 837/867] net: stmmac: loongson: Use generic PCI suspend/resume routines Convert glue driver for Loongson DWMAC controller to use the generic platform suspend/resume routines for PCI controllers, instead of implementing its own one. Signed-off-by: Yao Zi Reviewed-by: Russell King (Oracle) Signed-off-by: NipaLocal --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 6 +++- .../ethernet/stmicro/stmmac/dwmac-loongson.c | 36 ++----------------- 2 files changed, 8 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 1350f16f7138d..d2bff28fe4097 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -367,15 +367,19 @@ config DWMAC_INTEL This selects the Intel platform specific bus support for the stmmac driver. This driver is used for Intel Quark/EHL/TGL. +if STMMAC_LIBPCI + config DWMAC_LOONGSON tristate "Loongson PCI DWMAC support" default MACH_LOONGSON64 - depends on (MACH_LOONGSON64 || COMPILE_TEST) && STMMAC_ETH && PCI + depends on MACH_LOONGSON64 || COMPILE_TEST depends on COMMON_CLK help This selects the LOONGSON PCI bus support for the stmmac driver, Support for ethernet controller on Loongson-2K1000 SoC and LS7A1000 bridge. +endif + config STMMAC_PCI tristate "STMMAC PCI bus support" depends on STMMAC_ETH && PCI diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index 2a3ac0136cdbc..584dc4ff83209 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -8,6 +8,7 @@ #include #include #include "stmmac.h" +#include "stmmac_libpci.h" #include "dwmac_dma.h" #include "dwmac1000.h" @@ -525,37 +526,6 @@ static int loongson_dwmac_fix_reset(struct stmmac_priv *priv, void __iomem *ioad 10000, 2000000); } -static int loongson_dwmac_suspend(struct device *dev, void *bsp_priv) -{ - struct pci_dev *pdev = to_pci_dev(dev); - int ret; - - ret = pci_save_state(pdev); - if (ret) - return ret; - - pci_disable_device(pdev); - pci_wake_from_d3(pdev, true); - return 0; -} - -static int loongson_dwmac_resume(struct device *dev, void *bsp_priv) -{ - struct pci_dev *pdev = to_pci_dev(dev); - int ret; - - pci_restore_state(pdev); - pci_set_power_state(pdev, PCI_D0); - - ret = pci_enable_device(pdev); - if (ret) - return ret; - - pci_set_master(pdev); - - return 0; -} - static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct plat_stmmacenet_data *plat; @@ -600,8 +570,8 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id plat->bsp_priv = ld; plat->setup = loongson_dwmac_setup; plat->fix_soc_reset = loongson_dwmac_fix_reset; - plat->suspend = loongson_dwmac_suspend; - plat->resume = loongson_dwmac_resume; + plat->suspend = stmmac_pci_plat_suspend; + plat->resume = stmmac_pci_plat_resume; ld->dev = &pdev->dev; ld->loongson_id = readl(res.addr + GMAC_VERSION) & 0xff; From b2e5a60c4b9633475d33f49bae29b616aa251084 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Tue, 11 Nov 2025 10:11:58 +0000 Subject: [PATCH 838/867] net: stmmac: pci: Use generic PCI suspend/resume routines Convert STMMAC PCI glue driver to use the generic platform suspend/resume routines for PCI controllers, instead of implementing its own one. Signed-off-by: Yao Zi Reviewed-by: Russell King (Oracle) Signed-off-by: NipaLocal --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 6 ++-- .../net/ethernet/stmicro/stmmac/stmmac_pci.c | 36 ++----------------- 2 files changed, 6 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index d2bff28fe4097..00df980fd4e0e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -378,11 +378,8 @@ config DWMAC_LOONGSON This selects the LOONGSON PCI bus support for the stmmac driver, Support for ethernet controller on Loongson-2K1000 SoC and LS7A1000 bridge. -endif - config STMMAC_PCI tristate "STMMAC PCI bus support" - depends on STMMAC_ETH && PCI depends on COMMON_CLK help This selects the platform specific bus support for the stmmac driver. @@ -392,4 +389,7 @@ config STMMAC_PCI If you have a controller with this interface, say Y or M here. If unsure, say N. + +endif + endif diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 94b3a3b272706..fa92be672c548 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -14,6 +14,7 @@ #include #include "stmmac.h" +#include "stmmac_libpci.h" struct stmmac_pci_info { int (*setup)(struct pci_dev *pdev, struct plat_stmmacenet_data *plat); @@ -139,37 +140,6 @@ static const struct stmmac_pci_info snps_gmac5_pci_info = { .setup = snps_gmac5_default_data, }; -static int stmmac_pci_suspend(struct device *dev, void *bsp_priv) -{ - struct pci_dev *pdev = to_pci_dev(dev); - int ret; - - ret = pci_save_state(pdev); - if (ret) - return ret; - - pci_disable_device(pdev); - pci_wake_from_d3(pdev, true); - return 0; -} - -static int stmmac_pci_resume(struct device *dev, void *bsp_priv) -{ - struct pci_dev *pdev = to_pci_dev(dev); - int ret; - - pci_restore_state(pdev); - pci_set_power_state(pdev, PCI_D0); - - ret = pci_enable_device(pdev); - if (ret) - return ret; - - pci_set_master(pdev); - - return 0; -} - /** * stmmac_pci_probe * @@ -249,8 +219,8 @@ static int stmmac_pci_probe(struct pci_dev *pdev, plat->safety_feat_cfg->prtyen = 1; plat->safety_feat_cfg->tmouten = 1; - plat->suspend = stmmac_pci_suspend; - plat->resume = stmmac_pci_resume; + plat->suspend = stmmac_pci_plat_suspend; + plat->resume = stmmac_pci_plat_resume; return stmmac_dvr_probe(&pdev->dev, plat, &res); } From e26f9e0229a910859b4065a1766493f8a4b952d8 Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 11 Nov 2025 15:45:18 +0530 Subject: [PATCH 839/867] net: ti: icssg-prueth: Add functions to create and destroy Rx/Tx queues Each port for a given ICSSG instance has their own set of Tx and Rx queues. Add functions to create and destroy these queues, which will be further used while performing ndo_bpf operations to set up XSK Tx/Rx queues for a given port. In the destroy Rx queue sequence add teardown wait to ensure that all the descriptors including the TDCM (teardown completion marker) have been serviced and freed to avoid any sort of descriptor leaks. Reviewed-by: Jacob Keller Signed-off-by: Meghana Malladi Signed-off-by: NipaLocal --- drivers/net/ethernet/ti/icssg/icssg_common.c | 10 +- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 177 ++++++++++++------- drivers/net/ethernet/ti/icssg/icssg_prueth.h | 2 + 3 files changed, 127 insertions(+), 62 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 0eed29d6187a6..94021751b6b78 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -719,8 +719,10 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) return ret; } - if (cppi5_desc_is_tdcm(desc_dma)) /* Teardown ? */ + if (cppi5_desc_is_tdcm(desc_dma)) { + complete(&emac->tdown_complete); return 0; + } desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma); swdata = cppi5_hdesc_get_swdata(desc_rx); @@ -804,7 +806,7 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) return ret; } -static void prueth_rx_cleanup(void *data, dma_addr_t desc_dma) +void prueth_rx_cleanup(void *data, dma_addr_t desc_dma) { struct prueth_rx_chn *rx_chn = data; struct cppi5_host_desc_t *desc_rx; @@ -822,6 +824,7 @@ static void prueth_rx_cleanup(void *data, dma_addr_t desc_dma) k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); } +EXPORT_SYMBOL_GPL(prueth_rx_cleanup); static int prueth_tx_ts_cookie_get(struct prueth_emac *emac) { @@ -1025,7 +1028,7 @@ enum netdev_tx icssg_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev } EXPORT_SYMBOL_GPL(icssg_ndo_start_xmit); -static void prueth_tx_cleanup(void *data, dma_addr_t desc_dma) +void prueth_tx_cleanup(void *data, dma_addr_t desc_dma) { struct prueth_tx_chn *tx_chn = data; struct cppi5_host_desc_t *desc_tx; @@ -1051,6 +1054,7 @@ static void prueth_tx_cleanup(void *data, dma_addr_t desc_dma) prueth_xmit_free(tx_chn, desc_tx); } +EXPORT_SYMBOL_GPL(prueth_tx_cleanup); irqreturn_t prueth_rx_irq(int irq, void *dev_id) { diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 57a7d1ceab088..b66ffbfb499c7 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -735,6 +735,114 @@ static int icssg_update_vlan_mcast(struct net_device *vdev, int vid, return 0; } +static void prueth_destroy_txq(struct prueth_emac *emac) +{ + int ret, i; + + atomic_set(&emac->tdown_cnt, emac->tx_ch_num); + /* ensure new tdown_cnt value is visible */ + smp_mb__after_atomic(); + /* tear down and disable UDMA channels */ + reinit_completion(&emac->tdown_complete); + for (i = 0; i < emac->tx_ch_num; i++) + k3_udma_glue_tdown_tx_chn(emac->tx_chns[i].tx_chn, false); + + ret = wait_for_completion_timeout(&emac->tdown_complete, + msecs_to_jiffies(1000)); + if (!ret) + netdev_err(emac->ndev, "tx teardown timeout\n"); + + for (i = 0; i < emac->tx_ch_num; i++) { + napi_disable(&emac->tx_chns[i].napi_tx); + hrtimer_cancel(&emac->tx_chns[i].tx_hrtimer); + k3_udma_glue_reset_tx_chn(emac->tx_chns[i].tx_chn, + &emac->tx_chns[i], + prueth_tx_cleanup); + k3_udma_glue_disable_tx_chn(emac->tx_chns[i].tx_chn); + } +} + +static void prueth_destroy_rxq(struct prueth_emac *emac) +{ + int i, ret; + + /* tear down and disable UDMA channels */ + reinit_completion(&emac->tdown_complete); + k3_udma_glue_tdown_rx_chn(emac->rx_chns.rx_chn, true); + + /* When RX DMA Channel Teardown is initiated, it will result in an + * interrupt and a Teardown Completion Marker (TDCM) is queued into + * the RX Completion queue. Acknowledging the interrupt involves + * popping the TDCM descriptor from the RX Completion queue via the + * RX NAPI Handler. To avoid timing out when waiting for the TDCM to + * be popped, schedule the RX NAPI handler to run immediately. + */ + if (!napi_if_scheduled_mark_missed(&emac->napi_rx)) { + if (napi_schedule_prep(&emac->napi_rx)) + __napi_schedule(&emac->napi_rx); + } + + ret = wait_for_completion_timeout(&emac->tdown_complete, + msecs_to_jiffies(1000)); + if (!ret) + netdev_err(emac->ndev, "rx teardown timeout\n"); + + for (i = 0; i < PRUETH_MAX_RX_FLOWS; i++) { + napi_disable(&emac->napi_rx); + hrtimer_cancel(&emac->rx_hrtimer); + k3_udma_glue_reset_rx_chn(emac->rx_chns.rx_chn, i, + &emac->rx_chns, + prueth_rx_cleanup); + } + + prueth_destroy_xdp_rxqs(emac); + k3_udma_glue_disable_rx_chn(emac->rx_chns.rx_chn); +} + +static int prueth_create_txq(struct prueth_emac *emac) +{ + int ret, i; + + for (i = 0; i < emac->tx_ch_num; i++) { + ret = k3_udma_glue_enable_tx_chn(emac->tx_chns[i].tx_chn); + if (ret) + goto reset_tx_chan; + napi_enable(&emac->tx_chns[i].napi_tx); + } + return 0; + +reset_tx_chan: + /* Since interface is not yet up, there is wouldn't be + * any SKB for completion. So set false to free_skb + */ + prueth_reset_tx_chan(emac, i, false); + return ret; +} + +static int prueth_create_rxq(struct prueth_emac *emac) +{ + int ret; + + ret = prueth_prepare_rx_chan(emac, &emac->rx_chns, PRUETH_MAX_PKT_SIZE); + if (ret) + return ret; + + ret = k3_udma_glue_enable_rx_chn(emac->rx_chns.rx_chn); + if (ret) + goto reset_rx_chn; + + ret = prueth_create_xdp_rxqs(emac); + if (ret) + goto reset_rx_chn; + + napi_enable(&emac->napi_rx); + return 0; + +reset_rx_chn: + prueth_reset_rx_chan(&emac->rx_chns, PRUETH_MAX_RX_FLOWS, false); + return ret; +} + /** * emac_ndo_open - EMAC device open * @ndev: network adapter device @@ -746,7 +854,7 @@ static int icssg_update_vlan_mcast(struct net_device *vdev, int vid, static int emac_ndo_open(struct net_device *ndev) { struct prueth_emac *emac = netdev_priv(ndev); - int ret, i, num_data_chn = emac->tx_ch_num; + int ret, num_data_chn = emac->tx_ch_num; struct icssg_flow_cfg __iomem *flow_cfg; struct prueth *prueth = emac->prueth; int slice = prueth_emac_slice(emac); @@ -819,28 +927,13 @@ static int emac_ndo_open(struct net_device *ndev) goto stop; /* Prepare RX */ - ret = prueth_prepare_rx_chan(emac, &emac->rx_chns, PRUETH_MAX_PKT_SIZE); + ret = prueth_create_rxq(emac); if (ret) goto free_tx_ts_irq; - ret = prueth_create_xdp_rxqs(emac); - if (ret) - goto reset_rx_chn; - - ret = k3_udma_glue_enable_rx_chn(emac->rx_chns.rx_chn); + ret = prueth_create_txq(emac); if (ret) - goto destroy_xdp_rxqs; - - for (i = 0; i < emac->tx_ch_num; i++) { - ret = k3_udma_glue_enable_tx_chn(emac->tx_chns[i].tx_chn); - if (ret) - goto reset_tx_chan; - } - - /* Enable NAPI in Tx and Rx direction */ - for (i = 0; i < emac->tx_ch_num; i++) - napi_enable(&emac->tx_chns[i].napi_tx); - napi_enable(&emac->napi_rx); + goto destroy_rxq; /* start PHY */ phy_start(ndev->phydev); @@ -851,15 +944,8 @@ static int emac_ndo_open(struct net_device *ndev) return 0; -reset_tx_chan: - /* Since interface is not yet up, there is wouldn't be - * any SKB for completion. So set false to free_skb - */ - prueth_reset_tx_chan(emac, i, false); -destroy_xdp_rxqs: - prueth_destroy_xdp_rxqs(emac); -reset_rx_chn: - prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, false); +destroy_rxq: + prueth_destroy_rxq(emac); free_tx_ts_irq: free_irq(emac->tx_ts_irq, emac); stop: @@ -889,9 +975,6 @@ static int emac_ndo_stop(struct net_device *ndev) { struct prueth_emac *emac = netdev_priv(ndev); struct prueth *prueth = emac->prueth; - int rx_flow = PRUETH_RX_FLOW_DATA; - int max_rx_flows; - int ret, i; /* inform the upper layers. */ netif_tx_stop_all_queues(ndev); @@ -905,32 +988,8 @@ static int emac_ndo_stop(struct net_device *ndev) else __dev_mc_unsync(ndev, icssg_prueth_del_mcast); - atomic_set(&emac->tdown_cnt, emac->tx_ch_num); - /* ensure new tdown_cnt value is visible */ - smp_mb__after_atomic(); - /* tear down and disable UDMA channels */ - reinit_completion(&emac->tdown_complete); - for (i = 0; i < emac->tx_ch_num; i++) - k3_udma_glue_tdown_tx_chn(emac->tx_chns[i].tx_chn, false); - - ret = wait_for_completion_timeout(&emac->tdown_complete, - msecs_to_jiffies(1000)); - if (!ret) - netdev_err(ndev, "tx teardown timeout\n"); - - prueth_reset_tx_chan(emac, emac->tx_ch_num, true); - for (i = 0; i < emac->tx_ch_num; i++) { - napi_disable(&emac->tx_chns[i].napi_tx); - hrtimer_cancel(&emac->tx_chns[i].tx_hrtimer); - } - - max_rx_flows = PRUETH_MAX_RX_FLOWS; - k3_udma_glue_tdown_rx_chn(emac->rx_chns.rx_chn, true); - - prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, true); - prueth_destroy_xdp_rxqs(emac); - napi_disable(&emac->napi_rx); - hrtimer_cancel(&emac->rx_hrtimer); + prueth_destroy_txq(emac); + prueth_destroy_rxq(emac); cancel_work_sync(&emac->rx_mode_work); @@ -943,10 +1002,10 @@ static int emac_ndo_stop(struct net_device *ndev) free_irq(emac->tx_ts_irq, emac); - free_irq(emac->rx_chns.irq[rx_flow], emac); + free_irq(emac->rx_chns.irq[PRUETH_RX_FLOW_DATA], emac); prueth_ndev_del_tx_napi(emac, emac->tx_ch_num); - prueth_cleanup_rx_chns(emac, &emac->rx_chns, max_rx_flows); + prueth_cleanup_rx_chns(emac, &emac->rx_chns, PRUETH_MAX_RX_FLOWS); prueth_cleanup_tx_chns(emac); prueth->emacs_initialized--; diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index f0fa9688d9a08..5cc90b66035a1 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -501,5 +501,7 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, struct xdp_frame *xdpf, struct page *page, unsigned int q_idx); +void prueth_rx_cleanup(void *data, dma_addr_t desc_dma); +void prueth_tx_cleanup(void *data, dma_addr_t desc_dma); #endif /* __NET_TI_ICSSG_PRUETH_H */ From c4665b2433d4fc8a6d5478aa9cd18ebca300bac7 Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 11 Nov 2025 15:45:19 +0530 Subject: [PATCH 840/867] net: ti: icssg-prueth: Add XSK pool helpers Implement XSK NDOs (setup, wakeup) and create XSK Rx and Tx queues. xsk_qid stores the queue id for a given port which has been registered for zero copy AF_XDP and used to acquire UMEM pointer if registered. Based on the xsk_qid and the xsk_pool (umem) the driver is either in copy or zero copy mode. In case of copy mode the xsk_qid value will be invalid and will be set to valid queue id when enabling zero copy. To enable zero copy, the Rx queues are destroyed, i.e., descriptors pushed to fq and cq are freed to remap them to xdp buffers from the umem. Reviewed-by: Jacob Keller Signed-off-by: Meghana Malladi Signed-off-by: NipaLocal --- drivers/net/ethernet/ti/icssg/icssg_common.c | 2 +- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 138 +++++++++++++++++++ drivers/net/ethernet/ti/icssg/icssg_prueth.h | 10 ++ 3 files changed, 149 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 94021751b6b78..cc52cff70d7e5 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -754,7 +754,7 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) } pa = page_address(page); - if (emac->xdp_prog) { + if (prueth_xdp_is_enabled(emac)) { xdp_init_buff(&xdp, PAGE_SIZE, &rx_chn->xdp_rxq); xdp_prepare_buff(&xdp, pa, PRUETH_HEADROOM, pkt_len, false); diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index b66ffbfb499c7..e4c3b6b152ea6 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -47,6 +47,9 @@ NETIF_F_HW_HSR_TAG_INS | \ NETIF_F_HW_HSR_TAG_RM) +#define PRUETH_RX_DMA_ATTR (DMA_ATTR_SKIP_CPU_SYNC |\ + DMA_ATTR_WEAK_ORDERING) + /* CTRLMMR_ICSSG_RGMII_CTRL register bits */ #define ICSSG_CTRL_RGMII_ID_MODE BIT(24) @@ -735,6 +738,20 @@ static int icssg_update_vlan_mcast(struct net_device *vdev, int vid, return 0; } +static void prueth_set_xsk_pool(struct prueth_emac *emac, u16 queue_id) +{ + struct prueth_tx_chn *tx_chn = &emac->tx_chns[queue_id]; + struct prueth_rx_chn *rx_chn = &emac->rx_chns; + + if (emac->xsk_qid != queue_id) { + rx_chn->xsk_pool = NULL; + tx_chn->xsk_pool = NULL; + } else { + rx_chn->xsk_pool = xsk_get_pool_from_qid(emac->ndev, queue_id); + tx_chn->xsk_pool = xsk_get_pool_from_qid(emac->ndev, queue_id); + } +} + static void prueth_destroy_txq(struct prueth_emac *emac) { int ret, i; @@ -875,6 +892,7 @@ static int emac_ndo_open(struct net_device *ndev) return ret; } + emac->xsk_qid = -EINVAL; init_completion(&emac->cmd_complete); ret = prueth_init_tx_chns(emac); if (ret) { @@ -1200,6 +1218,109 @@ static int emac_xdp_setup(struct prueth_emac *emac, struct netdev_bpf *bpf) return 0; } +static int prueth_xsk_pool_enable(struct prueth_emac *emac, + struct xsk_buff_pool *pool, u16 queue_id) +{ + struct prueth_rx_chn *rx_chn = &emac->rx_chns; + u32 frame_size; + int ret; + + if (queue_id >= PRUETH_MAX_RX_FLOWS || + queue_id >= emac->tx_ch_num) { + netdev_err(emac->ndev, "Invalid XSK queue ID %d\n", queue_id); + return -EINVAL; + } + + frame_size = xsk_pool_get_rx_frame_size(pool); + if (frame_size < PRUETH_MAX_PKT_SIZE) + return -EOPNOTSUPP; + + ret = xsk_pool_dma_map(pool, rx_chn->dma_dev, PRUETH_RX_DMA_ATTR); + if (ret) { + netdev_err(emac->ndev, "Failed to map XSK pool: %d\n", ret); + return ret; + } + + if (netif_running(emac->ndev)) { + /* stop packets from wire for graceful teardown */ + ret = icssg_set_port_state(emac, ICSSG_EMAC_PORT_DISABLE); + if (ret) + return ret; + prueth_destroy_rxq(emac); + } + + emac->xsk_qid = queue_id; + prueth_set_xsk_pool(emac, queue_id); + + if (netif_running(emac->ndev)) { + ret = prueth_create_rxq(emac); + if (ret) { + netdev_err(emac->ndev, "Failed to create RX queue: %d\n", ret); + return ret; + } + ret = icssg_set_port_state(emac, ICSSG_EMAC_PORT_FORWARD); + if (ret) { + prueth_destroy_rxq(emac); + return ret; + } + ret = prueth_xsk_wakeup(emac->ndev, queue_id, XDP_WAKEUP_RX); + if (ret) + return ret; + } + + return 0; +} + +static int prueth_xsk_pool_disable(struct prueth_emac *emac, u16 queue_id) +{ + struct xsk_buff_pool *pool; + int ret; + + if (queue_id >= PRUETH_MAX_RX_FLOWS || + queue_id >= emac->tx_ch_num) { + netdev_err(emac->ndev, "Invalid XSK queue ID %d\n", queue_id); + return -EINVAL; + } + + if (emac->xsk_qid != queue_id) { + netdev_err(emac->ndev, "XSK queue ID %d not registered\n", queue_id); + return -EINVAL; + } + + pool = xsk_get_pool_from_qid(emac->ndev, queue_id); + if (!pool) { + netdev_err(emac->ndev, "No XSK pool registered for queue %d\n", queue_id); + return -EINVAL; + } + + if (netif_running(emac->ndev)) { + /* stop packets from wire for graceful teardown */ + ret = icssg_set_port_state(emac, ICSSG_EMAC_PORT_DISABLE); + if (ret) + return ret; + prueth_destroy_rxq(emac); + } + + xsk_pool_dma_unmap(pool, PRUETH_RX_DMA_ATTR); + emac->xsk_qid = -EINVAL; + prueth_set_xsk_pool(emac, queue_id); + + if (netif_running(emac->ndev)) { + ret = prueth_create_rxq(emac); + if (ret) { + netdev_err(emac->ndev, "Failed to create RX queue: %d\n", ret); + return ret; + } + ret = icssg_set_port_state(emac, ICSSG_EMAC_PORT_FORWARD); + if (ret) { + prueth_destroy_rxq(emac); + return ret; + } + } + + return 0; +} + /** * emac_ndo_bpf - implements ndo_bpf for icssg_prueth * @ndev: network adapter device @@ -1214,11 +1335,27 @@ static int emac_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf) switch (bpf->command) { case XDP_SETUP_PROG: return emac_xdp_setup(emac, bpf); + case XDP_SETUP_XSK_POOL: + return bpf->xsk.pool ? + prueth_xsk_pool_enable(emac, bpf->xsk.pool, bpf->xsk.queue_id) : + prueth_xsk_pool_disable(emac, bpf->xsk.queue_id); default: return -EINVAL; } } +int prueth_xsk_wakeup(struct net_device *ndev, u32 qid, u32 flags) +{ + struct prueth_emac *emac = netdev_priv(ndev); + + if (qid >= PRUETH_MAX_RX_FLOWS || qid >= emac->tx_ch_num) { + netdev_err(ndev, "Invalid XSK queue ID %d\n", qid); + return -EINVAL; + } + + return 0; +} + static const struct net_device_ops emac_netdev_ops = { .ndo_open = emac_ndo_open, .ndo_stop = emac_ndo_stop, @@ -1237,6 +1374,7 @@ static const struct net_device_ops emac_netdev_ops = { .ndo_xdp_xmit = emac_xdp_xmit, .ndo_hwtstamp_get = icssg_ndo_get_ts_config, .ndo_hwtstamp_set = icssg_ndo_set_ts_config, + .ndo_xsk_wakeup = prueth_xsk_wakeup, }; static int prueth_netdev_init(struct prueth *prueth, diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index 5cc90b66035a1..a5e3774b03887 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -38,6 +38,8 @@ #include #include #include +#include +#include #include "icssg_config.h" #include "icss_iep.h" @@ -126,6 +128,7 @@ struct prueth_tx_chn { char name[32]; struct hrtimer tx_hrtimer; unsigned long tx_pace_timeout_ns; + struct xsk_buff_pool *xsk_pool; }; struct prueth_rx_chn { @@ -138,6 +141,7 @@ struct prueth_rx_chn { char name[32]; struct page_pool *pg_pool; struct xdp_rxq_info xdp_rxq; + struct xsk_buff_pool *xsk_pool; }; enum prueth_swdata_type { @@ -241,6 +245,7 @@ struct prueth_emac { struct netdev_hw_addr_list vlan_mcast_list[MAX_VLAN_ID]; struct bpf_prog *xdp_prog; struct xdp_attachment_info xdpi; + int xsk_qid; }; /* The buf includes headroom compatible with both skb and xdpf */ @@ -503,5 +508,10 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, unsigned int q_idx); void prueth_rx_cleanup(void *data, dma_addr_t desc_dma); void prueth_tx_cleanup(void *data, dma_addr_t desc_dma); +int prueth_xsk_wakeup(struct net_device *ndev, u32 qid, u32 flags); +static inline bool prueth_xdp_is_enabled(struct prueth_emac *emac) +{ + return !!READ_ONCE(emac->xdp_prog); +} #endif /* __NET_TI_ICSSG_PRUETH_H */ From 494e20cf54bb82573ff8921282d2f39619b50ebf Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 11 Nov 2025 15:45:20 +0530 Subject: [PATCH 841/867] net: ti: icssg-prueth: Add AF_XDP zero copy for TX Use xsk_pool inside tx_chn to check if a given Tx queue id is registered for xsk zero copy, which gets populated during xsk enable If xsk_pool is set, get frames from the pool in NAPI context and submit them to the Tx channel. Tx completion is also handled in the NAPI context. Use PRUETH_SWDATA_XSK to recycle xsk buffers back to the umem pool. Add XDP_WAKEUP_TX support to enable xsk_wakeup for Tx. Reviewed-by: Jacob Keller Signed-off-by: Meghana Malladi Signed-off-by: NipaLocal --- drivers/net/ethernet/ti/icssg/icssg_common.c | 112 ++++++++++++++++++- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 13 +++ drivers/net/ethernet/ti/icssg/icssg_prueth.h | 2 + 3 files changed, 125 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index cc52cff70d7e5..d7469ad457fd7 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -93,15 +93,91 @@ void prueth_ndev_del_tx_napi(struct prueth_emac *emac, int num) } EXPORT_SYMBOL_GPL(prueth_ndev_del_tx_napi); +static int emac_xsk_xmit_zc(struct prueth_emac *emac, + unsigned int q_idx) +{ + struct prueth_tx_chn *tx_chn = &emac->tx_chns[q_idx]; + struct xsk_buff_pool *pool = tx_chn->xsk_pool; + struct net_device *ndev = emac->ndev; + struct cppi5_host_desc_t *host_desc; + dma_addr_t dma_desc, dma_buf; + struct prueth_swdata *swdata; + struct xdp_desc xdp_desc; + int num_tx = 0, pkt_len; + int descs_avail, ret; + u32 *epib; + int i; + + descs_avail = k3_cppi_desc_pool_avail(tx_chn->desc_pool); + /* ensure that TX ring is not filled up by XDP, always MAX_SKB_FRAGS + * will be available for normal TX path and queue is stopped there if + * necessary + */ + if (descs_avail <= MAX_SKB_FRAGS) + return 0; + + descs_avail -= MAX_SKB_FRAGS; + + for (i = 0; i < descs_avail; i++) { + if (!xsk_tx_peek_desc(pool, &xdp_desc)) + break; + + dma_buf = xsk_buff_raw_get_dma(pool, xdp_desc.addr); + pkt_len = xdp_desc.len; + xsk_buff_raw_dma_sync_for_device(pool, dma_buf, pkt_len); + + host_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool); + if (unlikely(!host_desc)) + break; + + cppi5_hdesc_init(host_desc, CPPI5_INFO0_HDESC_EPIB_PRESENT, + PRUETH_NAV_PS_DATA_SIZE); + cppi5_hdesc_set_pkttype(host_desc, 0); + epib = host_desc->epib; + epib[0] = 0; + epib[1] = 0; + cppi5_hdesc_set_pktlen(host_desc, pkt_len); + cppi5_desc_set_tags_ids(&host_desc->hdr, 0, + (emac->port_id | (q_idx << 8))); + + k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &dma_buf); + cppi5_hdesc_attach_buf(host_desc, dma_buf, pkt_len, dma_buf, + pkt_len); + + swdata = cppi5_hdesc_get_swdata(host_desc); + swdata->type = PRUETH_SWDATA_XSK; + + dma_desc = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, + host_desc); + ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, + host_desc, dma_desc); + + if (ret) { + ndev->stats.tx_errors++; + k3_cppi_desc_pool_free(tx_chn->desc_pool, host_desc); + break; + } + + num_tx++; + } + + xsk_tx_release(tx_chn->xsk_pool); + return num_tx; +} + void prueth_xmit_free(struct prueth_tx_chn *tx_chn, struct cppi5_host_desc_t *desc) { struct cppi5_host_desc_t *first_desc, *next_desc; dma_addr_t buf_dma, next_desc_dma; + struct prueth_swdata *swdata; u32 buf_dma_len; first_desc = desc; next_desc = first_desc; + swdata = cppi5_hdesc_get_swdata(first_desc); + if (swdata->type == PRUETH_SWDATA_XSK) + goto free_pool; cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len); k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma); @@ -126,6 +202,7 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn, k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc); } +free_pool: k3_cppi_desc_pool_free(tx_chn->desc_pool, first_desc); } EXPORT_SYMBOL_GPL(prueth_xmit_free); @@ -139,7 +216,9 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn, struct prueth_swdata *swdata; struct prueth_tx_chn *tx_chn; unsigned int total_bytes = 0; + int xsk_frames_done = 0; struct xdp_frame *xdpf; + unsigned int pkt_len; struct sk_buff *skb; dma_addr_t desc_dma; int res, num_tx = 0; @@ -176,6 +255,11 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn, total_bytes += xdpf->len; xdp_return_frame(xdpf); break; + case PRUETH_SWDATA_XSK: + pkt_len = cppi5_hdesc_get_pktlen(desc_tx); + dev_sw_netstats_tx_add(ndev, 1, pkt_len); + xsk_frames_done++; + break; default: prueth_xmit_free(tx_chn, desc_tx); ndev->stats.tx_dropped++; @@ -204,6 +288,18 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn, __netif_tx_unlock(netif_txq); } + if (tx_chn->xsk_pool) { + if (xsk_frames_done) + xsk_tx_completed(tx_chn->xsk_pool, xsk_frames_done); + + if (xsk_uses_need_wakeup(tx_chn->xsk_pool)) + xsk_set_tx_need_wakeup(tx_chn->xsk_pool); + + netif_txq = netdev_get_tx_queue(ndev, chn); + txq_trans_cond_update(netif_txq); + emac_xsk_xmit_zc(emac, chn); + } + return num_tx; } @@ -212,7 +308,10 @@ static enum hrtimer_restart emac_tx_timer_callback(struct hrtimer *timer) struct prueth_tx_chn *tx_chns = container_of(timer, struct prueth_tx_chn, tx_hrtimer); - enable_irq(tx_chns->irq); + if (tx_chns->irq_disabled) { + tx_chns->irq_disabled = false; + enable_irq(tx_chns->irq); + } return HRTIMER_NORESTART; } @@ -235,7 +334,10 @@ static int emac_napi_tx_poll(struct napi_struct *napi_tx, int budget) ns_to_ktime(tx_chn->tx_pace_timeout_ns), HRTIMER_MODE_REL_PINNED); } else { - enable_irq(tx_chn->irq); + if (tx_chn->irq_disabled) { + tx_chn->irq_disabled = false; + enable_irq(tx_chn->irq); + } } } @@ -246,6 +348,7 @@ static irqreturn_t prueth_tx_irq(int irq, void *dev_id) { struct prueth_tx_chn *tx_chn = dev_id; + tx_chn->irq_disabled = true; disable_irq_nosync(irq); napi_schedule(&tx_chn->napi_tx); @@ -1032,6 +1135,7 @@ void prueth_tx_cleanup(void *data, dma_addr_t desc_dma) { struct prueth_tx_chn *tx_chn = data; struct cppi5_host_desc_t *desc_tx; + struct xsk_buff_pool *xsk_pool; struct prueth_swdata *swdata; struct xdp_frame *xdpf; struct sk_buff *skb; @@ -1048,6 +1152,10 @@ void prueth_tx_cleanup(void *data, dma_addr_t desc_dma) xdpf = swdata->data.xdpf; xdp_return_frame(xdpf); break; + case PRUETH_SWDATA_XSK: + xsk_pool = tx_chn->xsk_pool; + xsk_tx_completed(xsk_pool, 1); + break; default: break; } diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index e4c3b6b152ea6..bdce5a40defef 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -1347,12 +1347,25 @@ static int emac_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf) int prueth_xsk_wakeup(struct net_device *ndev, u32 qid, u32 flags) { struct prueth_emac *emac = netdev_priv(ndev); + struct prueth_tx_chn *tx_chn = &emac->tx_chns[qid]; if (qid >= PRUETH_MAX_RX_FLOWS || qid >= emac->tx_ch_num) { netdev_err(ndev, "Invalid XSK queue ID %d\n", qid); return -EINVAL; } + if (!tx_chn->xsk_pool) { + netdev_err(ndev, "XSK pool not registered for queue %d\n", qid); + return -EINVAL; + } + + if (flags & XDP_WAKEUP_TX) { + if (!napi_if_scheduled_mark_missed(&tx_chn->napi_tx)) { + if (likely(napi_schedule_prep(&tx_chn->napi_tx))) + __napi_schedule(&tx_chn->napi_tx); + } + } + return 0; } diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index a5e3774b03887..67339cdf2ddf4 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -129,6 +129,7 @@ struct prueth_tx_chn { struct hrtimer tx_hrtimer; unsigned long tx_pace_timeout_ns; struct xsk_buff_pool *xsk_pool; + bool irq_disabled; }; struct prueth_rx_chn { @@ -150,6 +151,7 @@ enum prueth_swdata_type { PRUETH_SWDATA_PAGE, PRUETH_SWDATA_CMD, PRUETH_SWDATA_XDPF, + PRUETH_SWDATA_XSK, }; struct prueth_swdata { From b2caef8334b328e3033cdb8d9c6df1d42685432a Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 11 Nov 2025 15:45:21 +0530 Subject: [PATCH 842/867] net: ti: icssg-prueth: Make emac_run_xdp function independent of page emac_run_xdp function runs xdp program, at a given hook point in the Rx path of the driver in NAPI context and returns XDP return codes. In zero copy mode the driver receives packets using UMEM frames instead of pages (native XDP). Decouple the usage of page in this function. Reviewed-by: Jacob Keller Signed-off-by: Meghana Malladi Signed-off-by: NipaLocal --- drivers/net/ethernet/ti/icssg/icssg_common.c | 26 ++++++++++++-------- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 3 ++- drivers/net/ethernet/ti/icssg/icssg_prueth.h | 9 +++++-- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index d7469ad457fd7..b88cfe99e8b7f 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -647,15 +647,15 @@ void emac_rx_timestamp(struct prueth_emac *emac, * emac_xmit_xdp_frame - transmits an XDP frame * @emac: emac device * @xdpf: data to transmit - * @page: page from page pool if already DMA mapped * @q_idx: queue id + * @buff_type: Type of buffer to be transmitted * * Return: XDP state */ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, struct xdp_frame *xdpf, - struct page *page, - unsigned int q_idx) + unsigned int q_idx, + enum prueth_tx_buff_type buff_type) { struct cppi5_host_desc_t *first_desc; struct net_device *ndev = emac->ndev; @@ -663,6 +663,7 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, struct prueth_tx_chn *tx_chn; dma_addr_t desc_dma, buf_dma; struct prueth_swdata *swdata; + struct page *page; u32 *epib; int ret; @@ -679,7 +680,12 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, return ICSSG_XDP_CONSUMED; /* drop */ } - if (page) { /* already DMA mapped by page_pool */ + if (buff_type == PRUETH_TX_BUFF_TYPE_XDP_TX) { /* already DMA mapped by page_pool */ + page = virt_to_head_page(xdpf->data); + if (unlikely(!page)) { + netdev_err(ndev, "xdp tx: failed to get page from xdpf\n"); + goto drop_free_descs; + } buf_dma = page_pool_get_dma_addr(page); buf_dma += xdpf->headroom + sizeof(struct xdp_frame); } else { /* Map the linear buffer */ @@ -734,13 +740,11 @@ EXPORT_SYMBOL_GPL(emac_xmit_xdp_frame); * emac_run_xdp - run an XDP program * @emac: emac device * @xdp: XDP buffer containing the frame - * @page: page with RX data if already DMA mapped * @len: Rx descriptor packet length * * Return: XDP state */ -static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, - struct page *page, u32 *len) +static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, u32 *len) { struct net_device *ndev = emac->ndev; struct netdev_queue *netif_txq; @@ -767,7 +771,8 @@ static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, q_idx = cpu % emac->tx_ch_num; netif_txq = netdev_get_tx_queue(ndev, q_idx); __netif_tx_lock(netif_txq, cpu); - result = emac_xmit_xdp_frame(emac, xdpf, page, q_idx); + result = emac_xmit_xdp_frame(emac, xdpf, q_idx, + PRUETH_TX_BUFF_TYPE_XDP_TX); __netif_tx_unlock(netif_txq); if (result == ICSSG_XDP_CONSUMED) { ndev->stats.tx_dropped++; @@ -792,7 +797,8 @@ static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, fallthrough; /* handle aborts by dropping packet */ case XDP_DROP: ndev->stats.rx_dropped++; - page_pool_recycle_direct(emac->rx_chns.pg_pool, page); + page_pool_recycle_direct(emac->rx_chns.pg_pool, + virt_to_head_page(xdp->data)); return ICSSG_XDP_CONSUMED; } } @@ -861,7 +867,7 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) xdp_init_buff(&xdp, PAGE_SIZE, &rx_chn->xdp_rxq); xdp_prepare_buff(&xdp, pa, PRUETH_HEADROOM, pkt_len, false); - *xdp_state = emac_run_xdp(emac, &xdp, page, &pkt_len); + *xdp_state = emac_run_xdp(emac, &xdp, &pkt_len); if (*xdp_state != ICSSG_XDP_PASS) goto requeue; headroom = xdp.data - xdp.data_hard_start; diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index bdce5a40defef..bb8d42ba0102f 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -1185,7 +1185,8 @@ static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frame __netif_tx_lock(netif_txq, cpu); for (i = 0; i < n; i++) { xdpf = frames[i]; - err = emac_xmit_xdp_frame(emac, xdpf, NULL, q_idx); + err = emac_xmit_xdp_frame(emac, xdpf, q_idx, + PRUETH_TX_BUFF_TYPE_XDP_NDO); if (err != ICSSG_XDP_TX) { ndev->stats.tx_dropped++; break; diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index 67339cdf2ddf4..3147a1d8f59a9 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -154,6 +154,11 @@ enum prueth_swdata_type { PRUETH_SWDATA_XSK, }; +enum prueth_tx_buff_type { + PRUETH_TX_BUFF_TYPE_XDP_TX, + PRUETH_TX_BUFF_TYPE_XDP_NDO, +}; + struct prueth_swdata { enum prueth_swdata_type type; union prueth_data { @@ -506,8 +511,8 @@ void prueth_put_cores(struct prueth *prueth, int slice); u64 icssg_ts_to_ns(u32 hi_sw, u32 hi, u32 lo, u32 cycle_time_ns); u32 emac_xmit_xdp_frame(struct prueth_emac *emac, struct xdp_frame *xdpf, - struct page *page, - unsigned int q_idx); + unsigned int q_idx, + enum prueth_tx_buff_type buff_type); void prueth_rx_cleanup(void *data, dma_addr_t desc_dma); void prueth_tx_cleanup(void *data, dma_addr_t desc_dma); int prueth_xsk_wakeup(struct net_device *ndev, u32 qid, u32 flags); From f75c887a0852c2b5cce97c2c0539ecd0a3dfe085 Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 11 Nov 2025 15:45:22 +0530 Subject: [PATCH 843/867] net: ti: icssg-prueth: Add AF_XDP zero copy for RX Use xsk_pool inside rx_chn to check if a given Rx queue id is registered for xsk zero copy, which gets populated during xsk enable. Update prueth_create_xdp_rxqs to register and support two different memory models (xsk and page) for a given Rx queue, if registered for zero copy. If xsk_pool is registered, allocate buffers from UMEM and map them to the hardware Rx descriptors. In NAPI context, run the XDP program for each packet and process the xsk buffer according to the XDP result codes. Also allocate new set of buffers from UMEM for the next batch of NAPI Rx processing. Add XDK_WAKEUP_RX support to support xsk wakeup for Rx. Move prueth_create_page_pool to prueth_init_rx_chns to avoid freeing and re-allocating the system memory every time there is a transition from zero copy to copy and prevents any type of memory fragmentation or leak. Reviewed-by: Jacob Keller Signed-off-by: Meghana Malladi Signed-off-by: NipaLocal --- drivers/net/ethernet/ti/icssg/icssg_common.c | 325 +++++++++++++++---- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 60 +++- drivers/net/ethernet/ti/icssg/icssg_prueth.h | 2 + 3 files changed, 315 insertions(+), 72 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index b88cfe99e8b7f..074afe1733086 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -465,6 +465,29 @@ int prueth_init_tx_chns(struct prueth_emac *emac) } EXPORT_SYMBOL_GPL(prueth_init_tx_chns); +static struct page_pool *prueth_create_page_pool(struct prueth_emac *emac, + struct device *dma_dev, + int size) +{ + struct page_pool_params pp_params = { 0 }; + struct page_pool *pool; + + pp_params.order = 0; + pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; + pp_params.pool_size = size; + pp_params.nid = dev_to_node(emac->prueth->dev); + pp_params.dma_dir = DMA_BIDIRECTIONAL; + pp_params.dev = dma_dev; + pp_params.napi = &emac->napi_rx; + pp_params.max_len = PAGE_SIZE; + + pool = page_pool_create(&pp_params); + if (IS_ERR(pool)) + netdev_err(emac->ndev, "cannot create rx page pool\n"); + + return pool; +} + int prueth_init_rx_chns(struct prueth_emac *emac, struct prueth_rx_chn *rx_chn, char *name, u32 max_rflows, @@ -474,6 +497,7 @@ int prueth_init_rx_chns(struct prueth_emac *emac, struct device *dev = emac->prueth->dev; struct net_device *ndev = emac->ndev; u32 fdqring_id, hdesc_size; + struct page_pool *pool; int i, ret = 0, slice; int flow_id_base; @@ -516,6 +540,14 @@ int prueth_init_rx_chns(struct prueth_emac *emac, goto fail; } + pool = prueth_create_page_pool(emac, rx_chn->dma_dev, rx_chn->descs_num); + if (IS_ERR(pool)) { + ret = PTR_ERR(pool); + goto fail; + } + + rx_chn->pg_pool = pool; + flow_id_base = k3_udma_glue_rx_get_flow_id_base(rx_chn->rx_chn); if (emac->is_sr1 && !strcmp(name, "rxmgm")) { emac->rx_mgm_flow_id_base = flow_id_base; @@ -797,12 +829,190 @@ static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, u32 *len fallthrough; /* handle aborts by dropping packet */ case XDP_DROP: ndev->stats.rx_dropped++; - page_pool_recycle_direct(emac->rx_chns.pg_pool, - virt_to_head_page(xdp->data)); return ICSSG_XDP_CONSUMED; } } +static int prueth_dma_rx_push_mapped_zc(struct prueth_emac *emac, + struct prueth_rx_chn *rx_chn, + struct xdp_buff *xdp) +{ + struct net_device *ndev = emac->ndev; + struct cppi5_host_desc_t *desc_rx; + struct prueth_swdata *swdata; + dma_addr_t desc_dma; + dma_addr_t buf_dma; + int buf_len; + + buf_dma = xsk_buff_xdp_get_dma(xdp); + desc_rx = k3_cppi_desc_pool_alloc(rx_chn->desc_pool); + if (!desc_rx) { + netdev_err(ndev, "rx push: failed to allocate descriptor\n"); + return -ENOMEM; + } + desc_dma = k3_cppi_desc_pool_virt2dma(rx_chn->desc_pool, desc_rx); + + cppi5_hdesc_init(desc_rx, CPPI5_INFO0_HDESC_EPIB_PRESENT, + PRUETH_NAV_PS_DATA_SIZE); + k3_udma_glue_rx_dma_to_cppi5_addr(rx_chn->rx_chn, &buf_dma); + buf_len = xsk_pool_get_rx_frame_size(rx_chn->xsk_pool); + cppi5_hdesc_attach_buf(desc_rx, buf_dma, buf_len, buf_dma, buf_len); + swdata = cppi5_hdesc_get_swdata(desc_rx); + swdata->type = PRUETH_SWDATA_XSK; + swdata->data.xdp = xdp; + + return k3_udma_glue_push_rx_chn(rx_chn->rx_chn, PRUETH_RX_FLOW_DATA, + desc_rx, desc_dma); + + return 0; +} + +static int prueth_rx_alloc_zc(struct prueth_emac *emac, int budget) +{ + struct prueth_rx_chn *rx_chn = &emac->rx_chns; + struct xdp_buff *xdp; + int i, ret; + + for (i = 0; i < budget; i++) { + xdp = xsk_buff_alloc(rx_chn->xsk_pool); + if (!xdp) + break; + + ret = prueth_dma_rx_push_mapped_zc(emac, rx_chn, xdp); + if (ret) { + netdev_err(emac->ndev, "rx alloc: failed to map descriptors to xdp buff\n"); + xsk_buff_free(xdp); + break; + } + } + + return i; +} + +static void emac_dispatch_skb_zc(struct prueth_emac *emac, struct xdp_buff *xdp, u32 *psdata) +{ + unsigned int headroom = xdp->data - xdp->data_hard_start; + unsigned int pkt_len = xdp->data_end - xdp->data; + struct net_device *ndev = emac->ndev; + struct sk_buff *skb; + + skb = napi_alloc_skb(&emac->napi_rx, xdp->data_end - xdp->data_hard_start); + if (unlikely(!skb)) { + ndev->stats.rx_dropped++; + return; + } + + skb_reserve(skb, headroom); + skb_put(skb, pkt_len); + skb->dev = ndev; + + /* RX HW timestamp */ + if (emac->rx_ts_enabled) + emac_rx_timestamp(emac, skb, psdata); + + if (emac->prueth->is_switch_mode) + skb->offload_fwd_mark = emac->offload_fwd_mark; + skb->protocol = eth_type_trans(skb, ndev); + + skb_mark_for_recycle(skb); + napi_gro_receive(&emac->napi_rx, skb); + ndev->stats.rx_bytes += pkt_len; + ndev->stats.rx_packets++; +} + +static int emac_rx_packet_zc(struct prueth_emac *emac, u32 flow_id, + int budget) +{ + struct prueth_rx_chn *rx_chn = &emac->rx_chns; + u32 buf_dma_len, pkt_len, port_id = 0; + struct net_device *ndev = emac->ndev; + struct cppi5_host_desc_t *desc_rx; + struct prueth_swdata *swdata; + dma_addr_t desc_dma, buf_dma; + struct xdp_buff *xdp; + int xdp_status = 0; + int count = 0; + u32 *psdata; + int ret; + + while (count < budget) { + ret = k3_udma_glue_pop_rx_chn(rx_chn->rx_chn, flow_id, &desc_dma); + if (ret) { + if (ret != -ENODATA) + netdev_err(ndev, "rx pop: failed: %d\n", ret); + break; + } + + if (cppi5_desc_is_tdcm(desc_dma)) { + complete(&emac->tdown_complete); + break; + } + + desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma); + swdata = cppi5_hdesc_get_swdata(desc_rx); + if (swdata->type != PRUETH_SWDATA_XSK) { + netdev_err(ndev, "rx_pkt: invalid swdata->type %d\n", swdata->type); + k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); + break; + } + + xdp = swdata->data.xdp; + cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len); + k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma); + pkt_len = cppi5_hdesc_get_pktlen(desc_rx); + /* firmware adds 4 CRC bytes, strip them */ + pkt_len -= 4; + cppi5_desc_get_tags_ids(&desc_rx->hdr, &port_id, NULL); + psdata = cppi5_hdesc_get_psdata(desc_rx); + k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); + count++; + xsk_buff_set_size(xdp, pkt_len); + xsk_buff_dma_sync_for_cpu(xdp); + + if (prueth_xdp_is_enabled(emac)) { + ret = emac_run_xdp(emac, xdp, &pkt_len); + switch (ret) { + case ICSSG_XDP_PASS: + /* prepare skb and send to n/w stack */ + emac_dispatch_skb_zc(emac, xdp, psdata); + xsk_buff_free(xdp); + break; + case ICSSG_XDP_CONSUMED: + xsk_buff_free(xdp); + break; + case ICSSG_XDP_TX: + case ICSSG_XDP_REDIR: + xdp_status |= ret; + break; + } + } else { + /* prepare skb and send to n/w stack */ + emac_dispatch_skb_zc(emac, xdp, psdata); + xsk_buff_free(xdp); + } + } + + if (xdp_status & ICSSG_XDP_REDIR) + xdp_do_flush(); + + /* Allocate xsk buffers from the pool for the "count" number of + * packets processed in order to be able to receive more packets. + */ + ret = prueth_rx_alloc_zc(emac, count); + + if (xsk_uses_need_wakeup(rx_chn->xsk_pool)) { + /* If the user space doesn't provide enough buffers then it must + * explicitly wake up the kernel when new buffers are available + */ + if (ret < count) + xsk_set_rx_need_wakeup(rx_chn->xsk_pool); + else + xsk_clear_rx_need_wakeup(rx_chn->xsk_pool); + } + + return count; +} + static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) { struct prueth_rx_chn *rx_chn = &emac->rx_chns; @@ -849,7 +1059,6 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) /* firmware adds 4 CRC bytes, strip them */ pkt_len -= 4; cppi5_desc_get_tags_ids(&desc_rx->hdr, &port_id, NULL); - k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); /* if allocation fails we drop the packet but push the @@ -921,12 +1130,16 @@ void prueth_rx_cleanup(void *data, dma_addr_t desc_dma) struct cppi5_host_desc_t *desc_rx; struct prueth_swdata *swdata; struct page_pool *pool; + struct xdp_buff *xdp; struct page *page; pool = rx_chn->pg_pool; desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma); swdata = cppi5_hdesc_get_swdata(desc_rx); - if (swdata->type == PRUETH_SWDATA_PAGE) { + if (rx_chn->xsk_pool) { + xdp = swdata->data.xdp; + xsk_buff_free(xdp); + } else { page = swdata->data.page; page_pool_recycle_direct(pool, page); } @@ -1174,6 +1387,7 @@ irqreturn_t prueth_rx_irq(int irq, void *dev_id) { struct prueth_emac *emac = dev_id; + emac->rx_chns.irq_disabled = true; disable_irq_nosync(irq); napi_schedule(&emac->napi_rx); @@ -1201,6 +1415,7 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget) PRUETH_RX_FLOW_DATA_SR1 : PRUETH_RX_FLOW_DATA; int flow = emac->is_sr1 ? PRUETH_MAX_RX_FLOWS_SR1 : PRUETH_MAX_RX_FLOWS; + struct prueth_rx_chn *rx_chn = &emac->rx_chns; int xdp_state_or = 0; int num_rx = 0; int cur_budget; @@ -1208,14 +1423,18 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget) int ret; while (flow--) { - cur_budget = budget - num_rx; - - while (cur_budget--) { - ret = emac_rx_packet(emac, flow, &xdp_state); - xdp_state_or |= xdp_state; - if (ret) - break; - num_rx++; + if (rx_chn->xsk_pool) { + num_rx = emac_rx_packet_zc(emac, flow, budget); + } else { + cur_budget = budget - num_rx; + + while (cur_budget--) { + ret = emac_rx_packet(emac, flow, &xdp_state); + xdp_state_or |= xdp_state; + if (ret) + break; + num_rx++; + } } if (num_rx >= budget) @@ -1231,7 +1450,11 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget) ns_to_ktime(emac->rx_pace_timeout_ns), HRTIMER_MODE_REL_PINNED); } else { - enable_irq(emac->rx_chns.irq[rx_flow]); + if (emac->rx_chns.irq_disabled) { + /* re-enable the RX IRQ */ + emac->rx_chns.irq_disabled = false; + enable_irq(emac->rx_chns.irq[rx_flow]); + } } } @@ -1239,62 +1462,48 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget) } EXPORT_SYMBOL_GPL(icssg_napi_rx_poll); -static struct page_pool *prueth_create_page_pool(struct prueth_emac *emac, - struct device *dma_dev, - int size) -{ - struct page_pool_params pp_params = { 0 }; - struct page_pool *pool; - - pp_params.order = 0; - pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; - pp_params.pool_size = size; - pp_params.nid = dev_to_node(emac->prueth->dev); - pp_params.dma_dir = DMA_BIDIRECTIONAL; - pp_params.dev = dma_dev; - pp_params.napi = &emac->napi_rx; - pp_params.max_len = PAGE_SIZE; - - pool = page_pool_create(&pp_params); - if (IS_ERR(pool)) - netdev_err(emac->ndev, "cannot create rx page pool\n"); - - return pool; -} - int prueth_prepare_rx_chan(struct prueth_emac *emac, struct prueth_rx_chn *chn, int buf_size) { - struct page_pool *pool; struct page *page; + int desc_avail; int i, ret; - pool = prueth_create_page_pool(emac, chn->dma_dev, chn->descs_num); - if (IS_ERR(pool)) - return PTR_ERR(pool); - - chn->pg_pool = pool; + desc_avail = k3_cppi_desc_pool_avail(chn->desc_pool); + if (desc_avail < chn->descs_num) + netdev_warn(emac->ndev, + "not enough RX descriptors available %d < %d\n", + desc_avail, chn->descs_num); - for (i = 0; i < chn->descs_num; i++) { - /* NOTE: we're not using memory efficiently here. - * 1 full page (4KB?) used here instead of - * PRUETH_MAX_PKT_SIZE (~1.5KB?) + if (chn->xsk_pool) { + /* get pages from xsk_pool and push to RX ring + * queue as much as possible */ - page = page_pool_dev_alloc_pages(pool); - if (!page) { - netdev_err(emac->ndev, "couldn't allocate rx page\n"); - ret = -ENOMEM; + ret = prueth_rx_alloc_zc(emac, desc_avail); + if (!ret) goto recycle_alloc_pg; - } + } else { + for (i = 0; i < desc_avail; i++) { + /* NOTE: we're not using memory efficiently here. + * 1 full page (4KB?) used here instead of + * PRUETH_MAX_PKT_SIZE (~1.5KB?) + */ + page = page_pool_dev_alloc_pages(chn->pg_pool); + if (!page) { + netdev_err(emac->ndev, "couldn't allocate rx page\n"); + ret = -ENOMEM; + goto recycle_alloc_pg; + } - ret = prueth_dma_rx_push_mapped(emac, chn, page, buf_size); - if (ret < 0) { - netdev_err(emac->ndev, - "cannot submit page for rx chan %s ret %d\n", - chn->name, ret); - page_pool_recycle_direct(pool, page); - goto recycle_alloc_pg; + ret = prueth_dma_rx_push_mapped(emac, chn, page, buf_size); + if (ret < 0) { + netdev_err(emac->ndev, + "cannot submit page for rx chan %s ret %d\n", + chn->name, ret); + page_pool_recycle_direct(chn->pg_pool, page); + goto recycle_alloc_pg; + } } } diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index bb8d42ba0102f..22de04ac18cb4 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -395,7 +395,11 @@ static enum hrtimer_restart emac_rx_timer_callback(struct hrtimer *timer) container_of(timer, struct prueth_emac, rx_hrtimer); int rx_flow = PRUETH_RX_FLOW_DATA; - enable_irq(emac->rx_chns.irq[rx_flow]); + if (emac->rx_chns.irq_disabled) { + /* re-enable the RX IRQ */ + emac->rx_chns.irq_disabled = false; + enable_irq(emac->rx_chns.irq[rx_flow]); + } return HRTIMER_NORESTART; } @@ -569,31 +573,41 @@ const struct icss_iep_clockops prueth_iep_clockops = { .perout_enable = prueth_perout_enable, }; +static void prueth_destroy_xdp_rxqs(struct prueth_emac *emac) +{ + struct xdp_rxq_info *rxq = &emac->rx_chns.xdp_rxq; + + if (xdp_rxq_info_is_reg(rxq)) + xdp_rxq_info_unreg(rxq); +} + static int prueth_create_xdp_rxqs(struct prueth_emac *emac) { struct xdp_rxq_info *rxq = &emac->rx_chns.xdp_rxq; struct page_pool *pool = emac->rx_chns.pg_pool; + struct prueth_rx_chn *rx_chn = &emac->rx_chns; int ret; ret = xdp_rxq_info_reg(rxq, emac->ndev, 0, emac->napi_rx.napi_id); if (ret) return ret; - ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_PAGE_POOL, pool); - if (ret) - xdp_rxq_info_unreg(rxq); - - return ret; -} - -static void prueth_destroy_xdp_rxqs(struct prueth_emac *emac) -{ - struct xdp_rxq_info *rxq = &emac->rx_chns.xdp_rxq; + if (rx_chn->xsk_pool) { + ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_XSK_BUFF_POOL, NULL); + if (ret) + goto xdp_unreg; + xsk_pool_set_rxq_info(rx_chn->xsk_pool, rxq); + } else { + ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_PAGE_POOL, pool); + if (ret) + goto xdp_unreg; + } - if (!xdp_rxq_info_is_reg(rxq)) - return; + return 0; - xdp_rxq_info_unreg(rxq); +xdp_unreg: + prueth_destroy_xdp_rxqs(emac); + return ret; } static int icssg_prueth_add_mcast(struct net_device *ndev, const u8 *addr) @@ -1349,6 +1363,12 @@ int prueth_xsk_wakeup(struct net_device *ndev, u32 qid, u32 flags) { struct prueth_emac *emac = netdev_priv(ndev); struct prueth_tx_chn *tx_chn = &emac->tx_chns[qid]; + struct prueth_rx_chn *rx_chn = &emac->rx_chns; + + if (emac->xsk_qid != qid) { + netdev_err(ndev, "XSK queue %d not registered\n", qid); + return -EINVAL; + } if (qid >= PRUETH_MAX_RX_FLOWS || qid >= emac->tx_ch_num) { netdev_err(ndev, "Invalid XSK queue ID %d\n", qid); @@ -1360,6 +1380,11 @@ int prueth_xsk_wakeup(struct net_device *ndev, u32 qid, u32 flags) return -EINVAL; } + if (!rx_chn->xsk_pool) { + netdev_err(ndev, "XSK pool not registered for RX queue %d\n", qid); + return -EINVAL; + } + if (flags & XDP_WAKEUP_TX) { if (!napi_if_scheduled_mark_missed(&tx_chn->napi_tx)) { if (likely(napi_schedule_prep(&tx_chn->napi_tx))) @@ -1367,6 +1392,13 @@ int prueth_xsk_wakeup(struct net_device *ndev, u32 qid, u32 flags) } } + if (flags & XDP_WAKEUP_RX) { + if (!napi_if_scheduled_mark_missed(&emac->napi_rx)) { + if (likely(napi_schedule_prep(&emac->napi_rx))) + __napi_schedule(&emac->napi_rx); + } + } + return 0; } diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index 3147a1d8f59a9..10eadd3566504 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -143,6 +143,7 @@ struct prueth_rx_chn { struct page_pool *pg_pool; struct xdp_rxq_info xdp_rxq; struct xsk_buff_pool *xsk_pool; + bool irq_disabled; }; enum prueth_swdata_type { @@ -166,6 +167,7 @@ struct prueth_swdata { struct page *page; u32 cmd; struct xdp_frame *xdpf; + struct xdp_buff *xdp; } data; }; From 7ffec9c92a4b791e31f54a5a25885c9be103d33a Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 11 Nov 2025 15:45:23 +0530 Subject: [PATCH 844/867] net: ti: icssg-prueth: Enable zero copy in XDP features Enable the zero copy feature flag in xdp_set_features_flag() for a given ndev to get the AF-XDP zero copy support running for both Tx and Rx. Reviewed-by: Jacob Keller Signed-off-by: Meghana Malladi Signed-off-by: NipaLocal --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 22de04ac18cb4..f65041662173c 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -1554,7 +1554,8 @@ static int prueth_netdev_init(struct prueth *prueth, xdp_set_features_flag(ndev, NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | - NETDEV_XDP_ACT_NDO_XMIT); + NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_XSK_ZEROCOPY); netif_napi_add(ndev, &emac->napi_rx, icssg_napi_rx_poll); hrtimer_setup(&emac->rx_hrtimer, &emac_rx_timer_callback, CLOCK_MONOTONIC, From 9dd6e7bb29575f3126239e82f6d09beaadd2b1a8 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Tue, 11 Nov 2025 19:12:11 +0800 Subject: [PATCH 845/867] virtio-net: correct hdr_len handling for VIRTIO_NET_F_GUEST_HDRLEN The commit be50da3e9d4a ("net: virtio_net: implement exact header length guest feature") introduces support for the VIRTIO_NET_F_GUEST_HDRLEN feature in virtio-net. This feature requires virtio-net to set hdr_len to the actual header length of the packet when transmitting, the number of bytes from the start of the packet to the beginning of the transport-layer payload. However, in practice, hdr_len was being set using skb_headlen(skb), which is clearly incorrect. This commit fixes that issue. Fixes: be50da3e9d4a ("net: virtio_net: implement exact header length guest feature") Signed-off-by: Xuan Zhuo Signed-off-by: NipaLocal --- arch/um/drivers/vector_transports.c | 1 + drivers/net/tun_vnet.h | 4 ++-- drivers/net/virtio_net.c | 9 +++++++-- include/linux/virtio_net.h | 26 +++++++++++++++++++++----- net/packet/af_packet.c | 5 +++-- 5 files changed, 34 insertions(+), 11 deletions(-) diff --git a/arch/um/drivers/vector_transports.c b/arch/um/drivers/vector_transports.c index 0794d23f07cbc..03c5baa1d0c1b 100644 --- a/arch/um/drivers/vector_transports.c +++ b/arch/um/drivers/vector_transports.c @@ -121,6 +121,7 @@ static int raw_form_header(uint8_t *header, vheader, virtio_legacy_is_little_endian(), false, + false, 0 ); diff --git a/drivers/net/tun_vnet.h b/drivers/net/tun_vnet.h index 81662328b2c79..0d376bc70dd73 100644 --- a/drivers/net/tun_vnet.h +++ b/drivers/net/tun_vnet.h @@ -214,7 +214,7 @@ static inline int tun_vnet_hdr_from_skb(unsigned int flags, if (virtio_net_hdr_from_skb(skb, hdr, tun_vnet_is_little_endian(flags), true, - vlan_hlen)) { + false, vlan_hlen)) { struct skb_shared_info *sinfo = skb_shinfo(skb); if (net_ratelimit()) { @@ -244,7 +244,7 @@ tun_vnet_hdr_tnl_from_skb(unsigned int flags, if (virtio_net_hdr_tnl_from_skb(skb, tnl_hdr, has_tnl_offload, tun_vnet_is_little_endian(flags), - vlan_hlen)) { + false, vlan_hlen)) { struct virtio_net_hdr_v1 *hdr = &tnl_hdr->hash_hdr.hdr; struct skb_shared_info *sinfo = skb_shinfo(skb); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index cfa006b886887..3fe5dcfc57ed4 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3317,9 +3317,13 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; struct virtnet_info *vi = sq->vq->vdev->priv; struct virtio_net_hdr_v1_hash_tunnel *hdr; - int num_sg; unsigned hdr_len = vi->hdr_len; + bool hdrlen_negotiated; bool can_push; + int num_sg; + + hdrlen_negotiated = virtio_has_feature(vi->vdev, + VIRTIO_NET_F_GUEST_HDRLEN); pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); @@ -3339,7 +3343,8 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) hdr = &skb_vnet_common_hdr(skb)->tnl_hdr; if (virtio_net_hdr_tnl_from_skb(skb, hdr, vi->tx_tnl, - virtio_is_little_endian(vi->vdev), 0)) + virtio_is_little_endian(vi->vdev), + hdrlen_negotiated, 0)) return -EPROTO; if (vi->mergeable_rx_bufs) diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index b673c31569f32..3cd8b2ebc1977 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -211,16 +211,15 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, struct virtio_net_hdr *hdr, bool little_endian, bool has_data_valid, + bool hdrlen_negotiated, int vlan_hlen) { memset(hdr, 0, sizeof(*hdr)); /* no info leak */ if (skb_is_gso(skb)) { struct skb_shared_info *sinfo = skb_shinfo(skb); + u16 hdr_len; - /* This is a hint as to how much should be linear. */ - hdr->hdr_len = __cpu_to_virtio16(little_endian, - skb_headlen(skb)); hdr->gso_size = __cpu_to_virtio16(little_endian, sinfo->gso_size); if (sinfo->gso_type & SKB_GSO_TCPV4) @@ -231,6 +230,21 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP_L4; else return -EINVAL; + + if (hdrlen_negotiated) { + hdr_len = skb_transport_offset(skb); + + if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP_L4) + hdr_len += sizeof(struct udphdr); + else + hdr_len += tcp_hdrlen(skb); + } else { + /* This is a hint as to how much should be linear. */ + hdr_len = skb_headlen(skb); + } + + hdr->hdr_len = __cpu_to_virtio16(little_endian, hdr_len); + if (sinfo->gso_type & SKB_GSO_TCP_ECN) hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; } else @@ -384,6 +398,7 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, struct virtio_net_hdr_v1_hash_tunnel *vhdr, bool tnl_hdr_negotiated, bool little_endian, + bool hdrlen_negotiated, int vlan_hlen) { struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)vhdr; @@ -395,7 +410,7 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, SKB_GSO_UDP_TUNNEL_CSUM); if (!tnl_gso_type) return virtio_net_hdr_from_skb(skb, hdr, little_endian, false, - vlan_hlen); + hdrlen_negotiated, vlan_hlen); /* Tunnel support not negotiated but skb ask for it. */ if (!tnl_hdr_negotiated) @@ -408,7 +423,8 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, /* Let the basic parsing deal with plain GSO features. */ skb_shinfo(skb)->gso_type &= ~tnl_gso_type; - ret = virtio_net_hdr_from_skb(skb, hdr, true, false, vlan_hlen); + ret = virtio_net_hdr_from_skb(skb, hdr, true, false, hdrlen_negotiated, + vlan_hlen); skb_shinfo(skb)->gso_type |= tnl_gso_type; if (ret) return ret; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 494d628d10a51..2d370b92db709 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2093,7 +2093,8 @@ static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb, return -EINVAL; *len -= vnet_hdr_sz; - if (virtio_net_hdr_from_skb(skb, (struct virtio_net_hdr *)&vnet_hdr, vio_le(), true, 0)) + if (virtio_net_hdr_from_skb(skb, (struct virtio_net_hdr *)&vnet_hdr, + vio_le(), true, false, 0)) return -EINVAL; return memcpy_to_msg(msg, (void *)&vnet_hdr, vnet_hdr_sz); @@ -2361,7 +2362,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (vnet_hdr_sz && virtio_net_hdr_from_skb(skb, h.raw + macoff - sizeof(struct virtio_net_hdr), - vio_le(), true, 0)) { + vio_le(), true, false, 0)) { if (po->tp_version == TPACKET_V3) prb_clear_blk_fill_status(&po->rx_ring); goto drop_n_account; From 7b09c2a824dc135492bd28d988f932f8a2692439 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Tue, 11 Nov 2025 19:12:12 +0800 Subject: [PATCH 846/867] virtio-net: correct hdr_len handling for tunnel gso The commit a2fb4bc4e2a6a03 ("net: implement virtio helpers to handle UDP GSO tunneling.") introduces support for the UDP GSO tunnel feature in virtio-net. The virtio spec says: If the \field{gso_type} has the VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4 bit or VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6 bit set, \field{hdr_len} accounts for all the headers up to and including the inner transport. The commit did not update the hdr_len to include the inner transport. I observed that the "hdr_len" is 116 for this packet: 17:36:18.241105 52:55:00:d1:27:0a > 2e:2c:df:46:a9:e1, ethertype IPv4 (0x0800), length 2912: (tos 0x0, ttl 64, id 45197, offset 0, flags [none], proto UDP (17), length 2898) 192.168.122.100.50613 > 192.168.122.1.4789: [bad udp cksum 0x8106 -> 0x26a0!] VXLAN, flags [I] (0x08), vni 1 fa:c3:ba:82:05:ee > ce:85:0c:31:77:e5, ethertype IPv4 (0x0800), length 2862: (tos 0x0, ttl 64, id 14678, offset 0, flags [DF], proto TCP (6), length 2848) 192.168.3.1.49880 > 192.168.3.2.9898: Flags [P.], cksum 0x9266 (incorrect -> 0xaa20), seq 515667:518463, ack 1, win 64, options [nop,nop,TS val 2990048824 ecr 2798801412], length 2796 116 = 14(mac) + 20(ip) + 8(udp) + 8(vxlan) + 14(inner mac) + 20(inner ip) + 32(innner tcp) Fixes: a2fb4bc4e2a6a03 ("net: implement virtio helpers to handle UDP GSO tunneling.") Signed-off-by: Xuan Zhuo Signed-off-by: NipaLocal --- include/linux/virtio_net.h | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 3cd8b2ebc1977..432b17979d174 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -232,12 +232,23 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, return -EINVAL; if (hdrlen_negotiated) { - hdr_len = skb_transport_offset(skb); + if (sinfo->gso_type & (SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM)) { + hdr_len = skb_inner_transport_offset(skb); + + if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP_L4) + hdr_len += sizeof(struct udphdr); + else + hdr_len += inner_tcp_hdrlen(skb); + } else { + hdr_len = skb_transport_offset(skb); + + if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP_L4) + hdr_len += sizeof(struct udphdr); + else + hdr_len += tcp_hdrlen(skb); + } - if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP_L4) - hdr_len += sizeof(struct udphdr); - else - hdr_len += tcp_hdrlen(skb); } else { /* This is a hint as to how much should be linear. */ hdr_len = skb_headlen(skb); @@ -421,11 +432,8 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, vhdr->hash_hdr.hash_report = 0; vhdr->hash_hdr.padding = 0; - /* Let the basic parsing deal with plain GSO features. */ - skb_shinfo(skb)->gso_type &= ~tnl_gso_type; ret = virtio_net_hdr_from_skb(skb, hdr, true, false, hdrlen_negotiated, vlan_hlen); - skb_shinfo(skb)->gso_type |= tnl_gso_type; if (ret) return ret; From 8b0109a70050f454622d48915863e20053bec5b4 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 11 Nov 2025 11:26:34 +0000 Subject: [PATCH 847/867] net: stmmac: improve ndev->max_mtu setup readability Improve the readibility of the code setting ndev->max_mtu. This depends on the hardware specific maximum defined by the MAC core, and also a platform provided maximum. The code was originally checking that the platform specific maximum was between ndev->min_mtu..MAC core maximum before reducing ndev->max_mtu, otherwise if the platform specific maximum was less than ndev->min_mtu, issuing a warning. Re-order the code to handle the case where the platform specific max is below ndev->min_mtu, which then means that the subsequent test is simply reducing ndev->max_mtu. Update the comment, and add a few blank lines to separate the blocks of code. Signed-off-by: Russell King (Oracle) Signed-off-by: NipaLocal --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ccf383b355e75..eb43501939962 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1392,9 +1392,9 @@ static unsigned int stmmac_rx_offset(struct stmmac_priv *priv) return NET_SKB_PAD; } -static int stmmac_set_bfsize(int mtu, int bufsize) +static int stmmac_set_bfsize(int mtu) { - int ret = bufsize; + int ret; if (mtu >= BUF_SIZE_8KiB) ret = BUF_SIZE_16KiB; @@ -3958,12 +3958,13 @@ stmmac_setup_dma_desc(struct stmmac_priv *priv, unsigned int mtu) return ERR_PTR(-ENOMEM); } + /* Returns 0 or BUF_SIZE_16KiB if mtu > 8KiB and dwmac4 or ring mode */ bfsize = stmmac_set_16kib_bfsize(priv, mtu); if (bfsize < 0) bfsize = 0; if (bfsize < BUF_SIZE_16KiB) - bfsize = stmmac_set_bfsize(mtu, 0); + bfsize = stmmac_set_bfsize(mtu); dma_conf->dma_buf_sz = bfsize; /* Chose the tx/rx size from the already defined one in the @@ -7773,22 +7774,23 @@ int stmmac_dvr_probe(struct device *device, /* MTU range: 46 - hw-specific max */ ndev->min_mtu = ETH_ZLEN - ETH_HLEN; + if (priv->plat->core_type == DWMAC_CORE_XGMAC) ndev->max_mtu = XGMAC_JUMBO_LEN; - else if ((priv->plat->enh_desc) || (priv->synopsys_id >= DWMAC_CORE_4_00)) + else if (priv->plat->enh_desc || priv->synopsys_id >= DWMAC_CORE_4_00) ndev->max_mtu = JUMBO_LEN; else ndev->max_mtu = SKB_MAX_HEAD(NET_SKB_PAD + NET_IP_ALIGN); - /* Will not overwrite ndev->max_mtu if plat->maxmtu > ndev->max_mtu - * as well as plat->maxmtu < ndev->min_mtu which is a invalid range. + + /* Warn if the platform's maxmtu is smaller than the minimum MTU, + * otherwise clamp the maximum MTU above to the platform's maxmtu. */ - if ((priv->plat->maxmtu < ndev->max_mtu) && - (priv->plat->maxmtu >= ndev->min_mtu)) - ndev->max_mtu = priv->plat->maxmtu; - else if (priv->plat->maxmtu < ndev->min_mtu) + if (priv->plat->maxmtu < ndev->min_mtu) dev_warn(priv->device, "%s: warning: maxmtu having invalid value (%d)\n", __func__, priv->plat->maxmtu); + else if (priv->plat->maxmtu < ndev->max_mtu) + ndev->max_mtu = priv->plat->maxmtu; ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; From 05334798f938f480187c30fbb66aac8630ebbbfa Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 11 Nov 2025 11:26:39 +0000 Subject: [PATCH 848/867] net: stmmac: clean up stmmac_reset() stmmac_reset() takes the stmmac_priv and an ioaddr. It has one call site, which passes the priv pointer, and dereferences priv for the ioaddr. stmmac_reset() then checks whether priv is NULL. If it was, the caller would have oopsed. Remove the checks for NULL, and move the dereference for ioaddr into stmmac_reset(). Signed-off-by: Russell King (Oracle) Signed-off-by: NipaLocal --- drivers/net/ethernet/stmicro/stmmac/hwif.c | 8 +++----- drivers/net/ethernet/stmicro/stmmac/hwif.h | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 8212441f9826e..ee612cadbd77f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -103,12 +103,10 @@ static int stmmac_dwxlgmac_quirks(struct stmmac_priv *priv) return 0; } -int stmmac_reset(struct stmmac_priv *priv, void __iomem *ioaddr) +int stmmac_reset(struct stmmac_priv *priv) { - struct plat_stmmacenet_data *plat = priv ? priv->plat : NULL; - - if (!priv) - return -EINVAL; + struct plat_stmmacenet_data *plat = priv->plat; + void __iomem *ioaddr = priv->ioaddr; if (plat && plat->fix_soc_reset) return plat->fix_soc_reset(priv, ioaddr); diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index cb8fc09caf86b..d359722100fa3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -698,7 +698,7 @@ extern const struct stmmac_tc_ops dwmac510_tc_ops; #define GMAC_VERSION 0x00000020 /* GMAC CORE Version */ #define GMAC4_VERSION 0x00000110 /* GMAC4+ CORE Version */ -int stmmac_reset(struct stmmac_priv *priv, void __iomem *ioaddr); +int stmmac_reset(struct stmmac_priv *priv); int stmmac_hwif_init(struct stmmac_priv *priv); #endif /* __STMMAC_HWIF_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index eb43501939962..d202f604161ef 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3162,7 +3162,7 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv) if (ret) return ret; - ret = stmmac_reset(priv, priv->ioaddr); + ret = stmmac_reset(priv); if (ret) { netdev_err(priv->dev, "Failed to reset the dma\n"); return ret; From f61d023461121f8d3b9e5c5fd9b5f613a498aa0c Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 11 Nov 2025 11:38:38 +0000 Subject: [PATCH 849/867] net: phy: allow drivers to disable EEE support via .get_features() Allow PHY drivers to hook the .get_features() method to disable EEE support. This is useful for TI PHYs, where we have a statement that none of their gigabit products support EEE, yet at least DP83867 reports EEE capabilties and implements EEE negotiation. Signed-off-by: Russell King (Oracle) Signed-off-by: NipaLocal --- drivers/net/phy/phy-core.c | 2 -- drivers/net/phy/phy_device.c | 34 ++++++++++++++++++++++++++++++---- include/linux/phy.h | 1 + 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 605ca20ae192d..43ccbd3a09f8c 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -207,8 +207,6 @@ void of_set_phy_eee_broken(struct phy_device *phydev) if (!IS_ENABLED(CONFIG_OF_MDIO) || !node) return; - linkmode_zero(modes); - if (of_property_read_bool(node, "eee-broken-100tx")) linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, modes); if (of_property_read_bool(node, "eee-broken-1000t")) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 81984d4ebb7cb..437b89a0e944b 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -3397,6 +3397,34 @@ struct fwnode_handle *fwnode_get_phy_node(const struct fwnode_handle *fwnode) } EXPORT_SYMBOL_GPL(fwnode_get_phy_node); +static int phy_get_features(struct phy_device *phydev) +{ + int err; + + if (phydev->is_c45) + err = genphy_c45_pma_read_abilities(phydev); + else + err = genphy_read_abilities(phydev); + + return err; +} + +/** + * phy_get_Features_no_eee - read the PHY features, disabling all EEE + * @phydev: phy_device structure to be added to the MDIO bus + * + * Read the PHY features, and fill the @phydev->eee_disabled_modes to + * prevent EEE being used. This is intended to be used for PHY .get_feature + * methods where a PHY reports incorrect capabilities. + */ +int phy_get_features_no_eee(struct phy_device *phydev) +{ + linkmode_fill(phydev->eee_disabled_modes); + + return phy_get_features(phydev); +} +EXPORT_SYMBOL_GPL(phy_get_features_no_eee); + /** * phy_probe - probe and init a PHY device * @dev: device to probe and init @@ -3442,10 +3470,8 @@ static int phy_probe(struct device *dev) } else if (phydrv->get_features) err = phydrv->get_features(phydev); - else if (phydev->is_c45) - err = genphy_c45_pma_read_abilities(phydev); - else - err = genphy_read_abilities(phydev); + else + err = phy_get_features(phydev); if (err) goto out; diff --git a/include/linux/phy.h b/include/linux/phy.h index bf5457341ca80..2655c0ae64883 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -2299,6 +2299,7 @@ void phy_support_sym_pause(struct phy_device *phydev); void phy_support_asym_pause(struct phy_device *phydev); void phy_support_eee(struct phy_device *phydev); void phy_disable_eee(struct phy_device *phydev); +int phy_get_features_no_eee(struct phy_device *phydev); void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx, bool autoneg); void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx); From 99d40aec7d56f96c6615bad31bf4bc56e921a2bf Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 11 Nov 2025 11:38:43 +0000 Subject: [PATCH 850/867] net: phy: TI PHYs use phy_get_features_no_eee() As TI Gigabit PHYs do not support EEE, use the newly introduced phy_get_features_no_eee() to read the features but mark EEE as disabled. Signed-off-by: Russell King (Oracle) Signed-off-by: NipaLocal --- drivers/net/phy/dp83822.c | 3 +++ drivers/net/phy/dp83867.c | 1 + drivers/net/phy/dp83869.c | 1 + drivers/net/phy/dp83tc811.c | 1 + 4 files changed, 6 insertions(+) diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index 33db21251f2ef..20caf9a5faa72 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -1160,6 +1160,7 @@ static int dp83822_led_hw_control_get(struct phy_device *phydev, u8 index, .name = (_name), \ /* PHY_BASIC_FEATURES */ \ .probe = dp83822_probe, \ + .get_features = phy_get_features_no_eee, \ .soft_reset = dp83822_phy_reset, \ .config_init = dp83822_config_init, \ .read_status = dp83822_read_status, \ @@ -1180,6 +1181,7 @@ static int dp83822_led_hw_control_get(struct phy_device *phydev, u8 index, .name = (_name), \ /* PHY_BASIC_FEATURES */ \ .probe = dp8382x_probe, \ + .get_features = phy_get_features_no_eee, \ .soft_reset = dp83822_phy_reset, \ .config_init = dp83825_config_init, \ .get_wol = dp83822_get_wol, \ @@ -1196,6 +1198,7 @@ static int dp83822_led_hw_control_get(struct phy_device *phydev, u8 index, .name = (_name), \ /* PHY_BASIC_FEATURES */ \ .probe = dp83826_probe, \ + .get_features = phy_get_features_no_eee, \ .soft_reset = dp83822_phy_reset, \ .config_init = dp83826_config_init, \ .get_wol = dp83822_get_wol, \ diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 36a0c1b7f59c7..da055ff861beb 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -1124,6 +1124,7 @@ static struct phy_driver dp83867_driver[] = { /* PHY_GBIT_FEATURES */ .probe = dp83867_probe, + .get_features = phy_get_features_no_eee, .config_init = dp83867_config_init, .soft_reset = dp83867_phy_reset, diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c index 1f381d7b13ff3..4400654b0f726 100644 --- a/drivers/net/phy/dp83869.c +++ b/drivers/net/phy/dp83869.c @@ -906,6 +906,7 @@ static int dp83869_phy_reset(struct phy_device *phydev) PHY_ID_MATCH_MODEL(_id), \ .name = (_name), \ .probe = dp83869_probe, \ + .get_features = phy_get_features_no_eee, \ .config_init = dp83869_config_init, \ .soft_reset = dp83869_phy_reset, \ .config_intr = dp83869_config_intr, \ diff --git a/drivers/net/phy/dp83tc811.c b/drivers/net/phy/dp83tc811.c index e480c2a074505..92c5f3cfee9e2 100644 --- a/drivers/net/phy/dp83tc811.c +++ b/drivers/net/phy/dp83tc811.c @@ -390,6 +390,7 @@ static struct phy_driver dp83811_driver[] = { .phy_id_mask = 0xfffffff0, .name = "TI DP83TC811", /* PHY_BASIC_FEATURES */ + .get_features = phy_get_features_no_eee, .config_init = dp83811_config_init, .config_aneg = dp83811_config_aneg, .soft_reset = dp83811_phy_reset, From 7473540b4a25a0121c6892e9f8462b0515cf8528 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 11 Nov 2025 14:14:39 +0200 Subject: [PATCH 851/867] devlink: rate: Unset parent pointer in devl_rate_nodes_destroy The function devl_rate_nodes_destroy is documented to "Unset parent for all rate objects". However, it was only calling the driver-specific `rate_leaf_parent_set` or `rate_node_parent_set` ops and decrementing the parent's refcount, without actually setting the `devlink_rate->parent` pointer to NULL. This leaves a dangling pointer in the `devlink_rate` struct, which is inconsistent with the behavior of `devlink_nl_rate_parent_node_set`, where the parent pointer is correctly cleared. This patch fixes the issue by explicitly setting `devlink_rate->parent` to NULL after notifying the driver, thus fulfilling the function's documented behavior for all rate objects. Fixes: d75559845078 ("devlink: Allow setting parent node of rate objects") Signed-off-by: Shay Drory Reviewed-by: Carolina Jubran Signed-off-by: Tariq Toukan Signed-off-by: NipaLocal --- net/devlink/rate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/devlink/rate.c b/net/devlink/rate.c index 264fb82cba196..d157a8419bcad 100644 --- a/net/devlink/rate.c +++ b/net/devlink/rate.c @@ -828,13 +828,15 @@ void devl_rate_nodes_destroy(struct devlink *devlink) if (!devlink_rate->parent) continue; - refcount_dec(&devlink_rate->parent->refcnt); if (devlink_rate_is_leaf(devlink_rate)) ops->rate_leaf_parent_set(devlink_rate, NULL, devlink_rate->priv, NULL, NULL); else if (devlink_rate_is_node(devlink_rate)) ops->rate_node_parent_set(devlink_rate, NULL, devlink_rate->priv, NULL, NULL); + + refcount_dec(&devlink_rate->parent->refcnt); + devlink_rate->parent = NULL; } list_for_each_entry_safe(devlink_rate, tmp, &devlink->rate_list, list) { if (devlink_rate_is_node(devlink_rate)) { From 2e41195e31d58e64f75ba542dbc95665b5017f41 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Nov 2025 15:12:35 +0000 Subject: [PATCH 852/867] net: clear skb->sk in skb_release_head_state() skb_release_head_state() inlines skb_orphan(). We need to clear skb->sk otherwise we can freeze TCP flows on a mostly idle host, because skb_fclone_busy() would return true as long as the packet is not yet processed by skb_defer_free_flush(). Fixes: 1fcf572211da ("net: allow skb_release_head_state() to be called multiple times") Fixes: e20dfbad8aab ("net: fix napi_consume_skb() with alien skbs") Signed-off-by: Eric Dumazet Signed-off-by: NipaLocal --- net/core/skbuff.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4f4d7ab7057f1..f34372666e67c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1152,6 +1152,7 @@ void skb_release_head_state(struct sk_buff *skb) #endif skb->destructor = NULL; + skb->sk = NULL; } nf_reset_ct(skb); skb_ext_reset(skb); From a4d4035cabb756acaefad389da075851a3e8fafd Mon Sep 17 00:00:00 2001 From: Felix Maurer Date: Tue, 11 Nov 2025 17:29:32 +0100 Subject: [PATCH 853/867] hsr: Fix supervision frame sending on HSRv0 On HSRv0, no supervision frames were sent. The supervison frames were generated successfully, but failed the check for a sufficiently long mac header, i.e., at least sizeof(struct hsr_ethhdr), in hsr_fill_frame_info() because the mac header only contained the ethernet header. Fix this by including the HSR header in the mac header when generating HSR supervision frames. Note that the mac header now also includes the TLV fields. This matches how we set the headers on rx and also the size of struct hsrv0_ethhdr_sp. Reported-by: Hangbin Liu Closes: https://lore.kernel.org/netdev/aMONxDXkzBZZRfE5@fedora/ Fixes: 9cfb5e7f0ded ("net: hsr: fix hsr_init_sk() vs network/transport headers.") Signed-off-by: Felix Maurer Signed-off-by: NipaLocal --- net/hsr/hsr_device.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index fbbc3ccf9df64..1235abb2d79fa 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -320,6 +320,9 @@ static void send_hsr_supervision_frame(struct hsr_port *port, } hsr_stag = skb_put(skb, sizeof(struct hsr_sup_tag)); + skb_set_network_header(skb, ETH_HLEN + HSR_HLEN); + skb_reset_mac_len(skb); + set_hsr_stag_path(hsr_stag, (hsr->prot_version ? 0x0 : 0xf)); set_hsr_stag_HSR_ver(hsr_stag, hsr->prot_version); From ccff4fe5c5cd8cff22d3fa6ceb9c6eb09f552af3 Mon Sep 17 00:00:00 2001 From: Felix Maurer Date: Tue, 11 Nov 2025 17:29:33 +0100 Subject: [PATCH 854/867] hsr: Follow standard for HSRv0 supervision frames For HSRv0, the path_id has the following meaning: - 0000: PRP supervision frame - 0001-1001: HSR ring identifier - 1010-1011: Frames from PRP network (A/B, with RedBoxes) - 1111: HSR supervision frame Follow the IEC 62439-3:2010 standard more closely by setting the right path_id for HSRv0 supervision frames (actually, it is correctly set when the frame is constructed, but hsr_set_path_id() overwrites it) and set a fixed HSR ring identifier of 1. The ring identifier seems to be generally unused and we ignore it anyways on reception, but some fixed identifier is definitely better than using one identifier in one direction and a wrong identifier in the other. This was also the behavior before commit f266a683a480 ("net/hsr: Better frame dispatch") which introduced the alternating path_id. This was later moved to hsr_set_path_id() in commit 451d8123f897 ("net: prp: add packet handling support"). The IEC 62439-3:2010 also contains 6 unused bytes after the MacAddressA in the HSRv0 supervision frames. Adjust a TODO comment accordingly. Fixes: f266a683a480 ("net/hsr: Better frame dispatch") Fixes: 451d8123f897 ("net: prp: add packet handling support") Signed-off-by: Felix Maurer Signed-off-by: NipaLocal --- net/hsr/hsr_device.c | 2 +- net/hsr/hsr_forward.c | 22 +++++++++++++++------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 1235abb2d79fa..492cbc78ab75a 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -337,7 +337,7 @@ static void send_hsr_supervision_frame(struct hsr_port *port, } hsr_stag->tlv.HSR_TLV_type = type; - /* TODO: Why 12 in HSRv0? */ + /* HSRv0 has 6 unused bytes after the MAC */ hsr_stag->tlv.HSR_TLV_length = hsr->prot_version ? sizeof(struct hsr_sup_payload) : 12; diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index c67c0d35921de..339f0d2202129 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -262,15 +262,23 @@ static struct sk_buff *prp_fill_rct(struct sk_buff *skb, return skb; } -static void hsr_set_path_id(struct hsr_ethhdr *hsr_ethhdr, +static void hsr_set_path_id(struct hsr_frame_info *frame, + struct hsr_ethhdr *hsr_ethhdr, struct hsr_port *port) { int path_id; - if (port->type == HSR_PT_SLAVE_A) - path_id = 0; - else - path_id = 1; + if (port->hsr->prot_version) { + if (port->type == HSR_PT_SLAVE_A) + path_id = 0; + else + path_id = 1; + } else { + if (frame->is_supervision) + path_id = 0xf; + else + path_id = 1; + } set_hsr_tag_path(&hsr_ethhdr->hsr_tag, path_id); } @@ -304,7 +312,7 @@ static struct sk_buff *hsr_fill_tag(struct sk_buff *skb, else hsr_ethhdr = (struct hsr_ethhdr *)pc; - hsr_set_path_id(hsr_ethhdr, port); + hsr_set_path_id(frame, hsr_ethhdr, port); set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, lsdu_size); hsr_ethhdr->hsr_tag.sequence_nr = htons(frame->sequence_nr); hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto; @@ -330,7 +338,7 @@ struct sk_buff *hsr_create_tagged_frame(struct hsr_frame_info *frame, (struct hsr_ethhdr *)skb_mac_header(frame->skb_hsr); /* set the lane id properly */ - hsr_set_path_id(hsr_ethhdr, port); + hsr_set_path_id(frame, hsr_ethhdr, port); return skb_clone(frame->skb_hsr, GFP_ATOMIC); } else if (port->dev->features & NETIF_F_HW_HSR_TAG_INS) { return skb_clone(frame->skb_std, GFP_ATOMIC); From 703d6c1b560cab9a46426b4dd5839dea91fd044c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 11 May 2025 19:46:34 -0700 Subject: [PATCH 855/867] nipa: disable random kunit tests Signed-off-by: Jakub Kicinski Signed-off-by: NipaLocal --- drivers/clk/Kconfig | 4 ---- drivers/firewire/Kconfig | 5 ----- drivers/firmware/cirrus/Kconfig | 1 - drivers/fpga/tests/Kconfig | 1 - drivers/gpu/drm/vc4/Kconfig | 1 - drivers/gpu/drm/xe/Kconfig.debug | 1 - drivers/hid/Kconfig | 1 - fs/ext4/Kconfig | 1 - fs/fat/Kconfig | 1 - sound/soc/codecs/Kconfig | 1 - tools/testing/kunit/configs/all_tests.config | 16 ---------------- 11 files changed, 33 deletions(-) diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index 3a1611008e48e..7fd4192302c94 100644 --- a/drivers/clk/Kconfig +++ b/drivers/clk/Kconfig @@ -555,7 +555,6 @@ source "drivers/clk/zynqmp/Kconfig" config CLK_KUNIT_TEST tristate "Basic Clock Framework Kunit Tests" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS select DTC help Kunit tests for the common clock framework. @@ -563,7 +562,6 @@ config CLK_KUNIT_TEST config CLK_FIXED_RATE_KUNIT_TEST tristate "Basic fixed rate clk type KUnit test" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS select DTC help KUnit tests for the basic fixed rate clk type. @@ -572,14 +570,12 @@ config CLK_GATE_KUNIT_TEST tristate "Basic gate type Kunit test" if !KUNIT_ALL_TESTS depends on KUNIT depends on !S390 - default KUNIT_ALL_TESTS help Kunit test for the basic clk gate type. config CLK_FD_KUNIT_TEST tristate "Basic fractional divider type Kunit test" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS help Kunit test for the clk-fractional-divider type. diff --git a/drivers/firewire/Kconfig b/drivers/firewire/Kconfig index a5f5e250223a1..8cddc84d4d236 100644 --- a/drivers/firewire/Kconfig +++ b/drivers/firewire/Kconfig @@ -21,7 +21,6 @@ config FIREWIRE config FIREWIRE_KUNIT_UAPI_TEST tristate "KUnit tests for layout of structure in UAPI" if !KUNIT_ALL_TESTS depends on FIREWIRE && KUNIT - default KUNIT_ALL_TESTS help This builds the KUnit tests whether structures exposed to user space have expected layout. @@ -37,7 +36,6 @@ config FIREWIRE_KUNIT_UAPI_TEST config FIREWIRE_KUNIT_DEVICE_ATTRIBUTE_TEST tristate "KUnit tests for device attributes" if !KUNIT_ALL_TESTS depends on FIREWIRE && KUNIT - default KUNIT_ALL_TESTS help This builds the KUnit tests for device attribute for node and unit. @@ -53,7 +51,6 @@ config FIREWIRE_KUNIT_DEVICE_ATTRIBUTE_TEST config FIREWIRE_KUNIT_PACKET_SERDES_TEST tristate "KUnit tests for packet serialization/deserialization" if !KUNIT_ALL_TESTS depends on FIREWIRE && KUNIT - default KUNIT_ALL_TESTS help This builds the KUnit tests for packet serialization and deserialization. @@ -69,7 +66,6 @@ config FIREWIRE_KUNIT_PACKET_SERDES_TEST config FIREWIRE_KUNIT_SELF_ID_SEQUENCE_HELPER_TEST tristate "KUnit tests for helpers of self ID sequence" if !KUNIT_ALL_TESTS depends on FIREWIRE && KUNIT - default KUNIT_ALL_TESTS help This builds the KUnit tests for helpers of self ID sequence. @@ -95,7 +91,6 @@ config FIREWIRE_OHCI config FIREWIRE_KUNIT_OHCI_SERDES_TEST tristate "KUnit tests for serialization/deserialization of data in buffers/registers" if !KUNIT_ALL_TESTS depends on FIREWIRE && KUNIT - default KUNIT_ALL_TESTS help This builds the KUnit tests to check serialization and deserialization of data in buffers and registers defined in 1394 OHCI specification. diff --git a/drivers/firmware/cirrus/Kconfig b/drivers/firmware/cirrus/Kconfig index e3c2e38b746df..ad7055f7e48d7 100644 --- a/drivers/firmware/cirrus/Kconfig +++ b/drivers/firmware/cirrus/Kconfig @@ -10,7 +10,6 @@ config FW_CS_DSP_KUNIT_TEST_UTILS config FW_CS_DSP_KUNIT_TEST tristate "KUnit tests for Cirrus Logic cs_dsp" if !KUNIT_ALL_TESTS depends on KUNIT && REGMAP && FW_CS_DSP - default KUNIT_ALL_TESTS select FW_CS_DSP_KUNIT_TEST_UTILS help This builds KUnit tests for cs_dsp. diff --git a/drivers/fpga/tests/Kconfig b/drivers/fpga/tests/Kconfig index e4a64815f16d6..e8ded96191df9 100644 --- a/drivers/fpga/tests/Kconfig +++ b/drivers/fpga/tests/Kconfig @@ -1,7 +1,6 @@ config FPGA_KUNIT_TESTS tristate "KUnit test for the FPGA subsystem" if !KUNIT_ALL_TESTS depends on FPGA && FPGA_REGION && FPGA_BRIDGE && KUNIT=y - default KUNIT_ALL_TESTS help This builds unit tests for the FPGA subsystem diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig index 123ab0ce17815..3751163aa3d2d 100644 --- a/drivers/gpu/drm/vc4/Kconfig +++ b/drivers/gpu/drm/vc4/Kconfig @@ -43,7 +43,6 @@ config DRM_VC4_KUNIT_TEST tristate "KUnit tests for VC4" if !KUNIT_ALL_TESTS depends on DRM_VC4 && KUNIT select DRM_KUNIT_TEST_HELPERS - default KUNIT_ALL_TESTS help This builds unit tests for the VC4 DRM/KMS driver. This option is not useful for distributions or general kernels, but only for kernel diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug index 87902b4bd6d3b..df9ba4d9cdb51 100644 --- a/drivers/gpu/drm/xe/Kconfig.debug +++ b/drivers/gpu/drm/xe/Kconfig.debug @@ -76,7 +76,6 @@ config DRM_XE_DEBUG_MEM config DRM_XE_KUNIT_TEST tristate "KUnit tests for the drm xe driver" if !KUNIT_ALL_TESTS depends on DRM_XE && KUNIT && DEBUG_FS - default KUNIT_ALL_TESTS select DRM_EXPORT_FOR_TESTS if m help Choose this option to allow the driver to perform selftests under diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 04420a713be08..24e8018f8a446 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -1413,7 +1413,6 @@ config HID_KUNIT_TEST depends on KUNIT depends on HID_BATTERY_STRENGTH depends on HID_UCLOGIC - default KUNIT_ALL_TESTS help This builds unit tests for HID. This option is not useful for distributions or general kernels, but only for kernel diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 01873c2a34ad2..5bad43e85ee1a 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -77,7 +77,6 @@ config EXT4_DEBUG config EXT4_KUNIT_TESTS tristate "KUnit tests for ext4" if !KUNIT_ALL_TESTS depends on EXT4_FS && KUNIT - default KUNIT_ALL_TESTS help This builds the ext4 KUnit tests. diff --git a/fs/fat/Kconfig b/fs/fat/Kconfig index 25fae1c83725b..79c8502ba9115 100644 --- a/fs/fat/Kconfig +++ b/fs/fat/Kconfig @@ -121,7 +121,6 @@ config FAT_DEFAULT_UTF8 config FAT_KUNIT_TEST tristate "Unit Tests for FAT filesystems" if !KUNIT_ALL_TESTS depends on KUNIT && FAT_FS - default KUNIT_ALL_TESTS help This builds the FAT KUnit tests diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 160c07699a8b7..5c93033677e80 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -787,7 +787,6 @@ config SND_SOC_CS_AMP_LIB config SND_SOC_CS_AMP_LIB_TEST tristate "KUnit test for Cirrus Logic cs-amp-lib" if !KUNIT_ALL_TESTS depends on SND_SOC_CS_AMP_LIB && KUNIT - default KUNIT_ALL_TESTS help This builds KUnit tests for the Cirrus Logic common amplifier library. diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index 422e186cf3cf1..2f093048d985c 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -36,22 +36,6 @@ CONFIG_MAC80211=y CONFIG_WLAN_VENDOR_INTEL=y CONFIG_IWLWIFI=y -CONFIG_DAMON=y -CONFIG_DAMON_VADDR=y -CONFIG_DAMON_PADDR=y - CONFIG_REGMAP_BUILD=y -CONFIG_AUDIT=y - CONFIG_PRIME_NUMBERS=y - -CONFIG_SECURITY=y -CONFIG_SECURITY_APPARMOR=y -CONFIG_SECURITY_LANDLOCK=y - -CONFIG_SOUND=y -CONFIG_SND=y -CONFIG_SND_SOC=y -CONFIG_SND_SOC_TOPOLOGY_BUILD=y -CONFIG_SND_SOC_CS35L56_I2C=y From 808ed2d45545e0c7a7086da50cbf5c1bc9ff6927 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 30 Jul 2025 16:44:59 -0700 Subject: [PATCH 856/867] nipa: disable 6.17's merge window kunit tests Signed-off-by: Jakub Kicinski Signed-off-by: NipaLocal --- kernel/irq/Kconfig | 1 - lib/crypto/tests/Kconfig | 7 ------- 2 files changed, 8 deletions(-) diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 1b4254d19a73e..c49dee35fc520 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -144,7 +144,6 @@ config IRQ_KUNIT_TEST bool "KUnit tests for IRQ management APIs" if !KUNIT_ALL_TESTS depends on KUNIT=y depends on SPARSE_IRQ - default KUNIT_ALL_TESTS select IRQ_DOMAIN imply SMP help diff --git a/lib/crypto/tests/Kconfig b/lib/crypto/tests/Kconfig index 578af717e13a7..18573597e2fcc 100644 --- a/lib/crypto/tests/Kconfig +++ b/lib/crypto/tests/Kconfig @@ -3,7 +3,6 @@ config CRYPTO_LIB_BLAKE2S_KUNIT_TEST tristate "KUnit tests for BLAKE2s" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE # No need to select CRYPTO_LIB_BLAKE2S here, as that option doesn't # exist; the BLAKE2s code is always built-in for the /dev/random driver. @@ -13,7 +12,6 @@ config CRYPTO_LIB_BLAKE2S_KUNIT_TEST config CRYPTO_LIB_CURVE25519_KUNIT_TEST tristate "KUnit tests for Curve25519" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE select CRYPTO_LIB_CURVE25519 help @@ -22,7 +20,6 @@ config CRYPTO_LIB_CURVE25519_KUNIT_TEST config CRYPTO_LIB_MD5_KUNIT_TEST tristate "KUnit tests for MD5" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE select CRYPTO_LIB_MD5 help @@ -32,7 +29,6 @@ config CRYPTO_LIB_MD5_KUNIT_TEST config CRYPTO_LIB_POLY1305_KUNIT_TEST tristate "KUnit tests for Poly1305" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE select CRYPTO_LIB_POLY1305 help @@ -41,7 +37,6 @@ config CRYPTO_LIB_POLY1305_KUNIT_TEST config CRYPTO_LIB_SHA1_KUNIT_TEST tristate "KUnit tests for SHA-1" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE select CRYPTO_LIB_SHA1 help @@ -53,7 +48,6 @@ config CRYPTO_LIB_SHA1_KUNIT_TEST config CRYPTO_LIB_SHA256_KUNIT_TEST tristate "KUnit tests for SHA-224 and SHA-256" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE select CRYPTO_LIB_SHA256 help @@ -65,7 +59,6 @@ config CRYPTO_LIB_SHA256_KUNIT_TEST config CRYPTO_LIB_SHA512_KUNIT_TEST tristate "KUnit tests for SHA-384 and SHA-512" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE select CRYPTO_LIB_SHA512 help From a5009483fa6fef493898712c3194092bab13a6cc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 30 Jul 2025 07:19:58 -0700 Subject: [PATCH 857/867] nipa: config: x86: use periodic HZ tick Let's see if this increases stability of timing-related results.. Signed-off-by: Jakub Kicinski Signed-off-by: NipaLocal --- arch/x86/configs/i386_defconfig | 2 +- arch/x86/configs/x86_64_defconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 79fa38ca954d1..16a3de1c1b9a7 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -2,7 +2,7 @@ CONFIG_WERROR=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_AUDIT=y -CONFIG_NO_HZ=y +CONFIG_HZ_PERIODIC=y CONFIG_HIGH_RES_TIMERS=y CONFIG_PREEMPT_VOLUNTARY=y CONFIG_BSD_PROCESS_ACCT=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 7d7310cdf8b0a..c65598526e18b 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -2,7 +2,7 @@ CONFIG_WERROR=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_AUDIT=y -CONFIG_NO_HZ=y +CONFIG_HZ_PERIODIC=y CONFIG_HIGH_RES_TIMERS=y CONFIG_PREEMPT_VOLUNTARY=y CONFIG_BSD_PROCESS_ACCT=y From de59fbcb44afc4faa7ca83ec385c0128728a26c8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 17 Aug 2024 10:53:12 -0700 Subject: [PATCH 858/867] nipa: profile (time) test output Signed-off-by: Jakub Kicinski Signed-off-by: NipaLocal --- tools/testing/selftests/kselftest/prefix.pl | 8 ++++++++ tools/testing/selftests/kselftest/runner.sh | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kselftest/prefix.pl b/tools/testing/selftests/kselftest/prefix.pl index 12a7f4ca2684d..b8e4d697e3b3a 100755 --- a/tools/testing/selftests/kselftest/prefix.pl +++ b/tools/testing/selftests/kselftest/prefix.pl @@ -4,12 +4,15 @@ # to have unbuffering forced with "stdbuf -i0 -o0 -e0 $cmd". use strict; use IO::Handle; +use Time::HiRes qw( time ); binmode STDIN; binmode STDOUT; STDOUT->autoflush(1); +my $start_time = time(); +my $prev_time = $start_time; my $needed = 1; while (1) { my $char; @@ -17,6 +20,11 @@ exit 0 if ($bytes == 0); if ($needed) { print "# "; + if ($ENV{kselftest_profile}) { + my $now = time(); + printf("%.2f [+%.2f] ", $now - $start_time, $now - $prev_time); + $prev_time = $now; + } $needed = 0; } print $char; diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh index 2c3c58e65a419..e65bffb9981f8 100644 --- a/tools/testing/selftests/kselftest/runner.sh +++ b/tools/testing/selftests/kselftest/runner.sh @@ -89,7 +89,7 @@ run_one() fi field=$(echo "$line" | cut -d= -f1) value=$(echo "$line" | cut -d= -f2-) - eval "kselftest_$field"="$value" + eval "export kselftest_$field"="$value" done < "$settings" fi From da430f4a181c129441610a330f9f6a2e692f97bf Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 13 Jun 2025 15:27:15 -0700 Subject: [PATCH 859/867] nipa: timestamp - try waking Signed-off-by: Jakub Kicinski Signed-off-by: NipaLocal --- tools/testing/selftests/net/txtimestamp.c | 33 ++++++++++++++++++++--- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c index dae91eb97d699..2a8d6a463397a 100644 --- a/tools/testing/selftests/net/txtimestamp.c +++ b/tools/testing/selftests/net/txtimestamp.c @@ -62,6 +62,7 @@ static int do_ipv4 = 1; static int do_ipv6 = 1; static int cfg_payload_len = 10; static int cfg_poll_timeout = 100; +static bool cfg_wake_every_msec = true; static int cfg_delay_snd; static int cfg_delay_ack; static int cfg_delay_tolerance_usec = 500; @@ -286,6 +287,16 @@ static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr) daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown"); } +static int64_t get_time_now_us64(void) +{ + static struct timespec ts; + + if (clock_gettime(CLOCK_REALTIME, &ts)) + error(1, errno, "clock_gettime"); + + return timespec_to_us64(&ts); +} + static void __epoll(int epfd) { struct epoll_event events; @@ -300,11 +311,19 @@ static void __epoll(int epfd) static void __poll(int fd) { struct pollfd pollfd; + int64_t end_of_wait; + int timeout; int ret; - memset(&pollfd, 0, sizeof(pollfd)); - pollfd.fd = fd; - ret = poll(&pollfd, 1, cfg_poll_timeout); + timeout = cfg_wake_every_msec ? 1 : cfg_poll_timeout; + end_of_wait = get_time_now_us64() + cfg_poll_timeout * 1000; + + do { + memset(&pollfd, 0, sizeof(pollfd)); + pollfd.fd = fd; + ret = poll(&pollfd, 1, timeout); + } while (!ret && get_time_now_us64() < end_of_wait); + if (ret != 1) error(1, errno, "poll"); } @@ -707,6 +726,7 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -P: use PF_PACKET\n" " -r: use raw\n" " -R: use raw (IP_HDRINCL)\n" + " -s: single sleep until timeout (from -S), by default wake every 1msec\n" " -S N: usec to sleep before reading error queue\n" " -t N: tolerance (usec) for timestamp validation\n" " -u: use udp\n" @@ -723,7 +743,7 @@ static void parse_opt(int argc, char **argv) int c; while ((c = getopt(argc, argv, - "46bc:CeEFhIl:LnNo:p:PrRS:t:uv:V:x")) != -1) { + "46bc:CeEFhIl:LnNo:p:PrRsS:t:uv:V:x")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -787,6 +807,9 @@ static void parse_opt(int argc, char **argv) cfg_proto = SOCK_RAW; cfg_ipproto = IPPROTO_RAW; break; + case 's': /* sleep 'till timeout */ + cfg_wake_every_msec = false; + break; case 'S': cfg_sleep_usec = strtoul(optarg, NULL, 10); break; @@ -825,6 +848,8 @@ static void parse_opt(int argc, char **argv) error(1, 0, "cannot ask for pktinfo over pf_packet"); if (cfg_busy_poll && cfg_use_epoll) error(1, 0, "pass epoll or busy_poll, not both"); + if (cfg_wake_every_msec && cfg_use_epoll) + error(1, 0, "periodic wake not implemented for epoll, use -s"); if (cfg_proto == SOCK_STREAM && cfg_use_cmsg_opt_id) error(1, 0, "TCP sockets don't support SCM_TS_OPT_ID"); From e45b31c582e8ed5db5ea6c3955f187b37ff0b0a4 Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 31 Mar 2025 07:13:26 -0700 Subject: [PATCH 860/867] nipa: dbg: tests: bonding: print info on failure Signed-off-by: NipaLocal --- .../net/bonding/bond_macvlan_ipvlan.sh | 42 ++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh index c4711272fe45d..c7c9c8c5f1fd5 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh @@ -30,9 +30,49 @@ check_connection() local message=${3} RET=0 + ip netns exec ${ns} nstat >/dev/null + ip netns exec ${xvlan2_ns} nstat >/dev/null ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null - check_err $? "ping failed" + R=$? + N=$( ip netns exec ${ns} nstat) + N2=$(ip netns exec ${xvlan2_ns} nstat) + check_err $R "ping failed" log_test "${bond_mode}/${xvlan_type}_${xvlan_mode}: ${message}" + + if [ $R -ne 0 ]; then + echo "===" + echo $O + echo + echo $N + echo + echo $N2 + echo + echo 'local' + ip link + echo + ip addr + echo + ip neigh + echo + ip route + echo 'ns' + ip -s -s -netns ${ns} link + echo + ip -netns ${ns} addr + echo + ip -netns ${ns} neigh + echo + ip -netns ${ns} route + echo 'X ns 2' + ip -s -s -netns ${xvlan2_ns} link + echo + ip -netns ${xvlan2_ns} addr + echo + ip -netns ${xvlan2_ns} neigh + echo + ip -netns ${xvlan2_ns} route + echo "=<=" + fi } xvlan_over_bond() From 8b5c3c8906b997fbc5b3dd6ceed0dc509ae00829 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 3 Nov 2024 16:10:42 -0800 Subject: [PATCH 861/867] nipa: selftests: net: enable profiling Signed-off-by: Jakub Kicinski Signed-off-by: NipaLocal --- tools/testing/selftests/net/settings | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/settings b/tools/testing/selftests/net/settings index ed8418e8217a0..a38764182822e 100644 --- a/tools/testing/selftests/net/settings +++ b/tools/testing/selftests/net/settings @@ -1 +1,2 @@ timeout=3600 +profile=1 From 4aca4041358da30c7cf90eada7a71f30b6679d88 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 29 Aug 2024 19:57:39 -0700 Subject: [PATCH 862/867] nipa: tc_action dbg Signed-off-by: Jakub Kicinski Signed-off-by: NipaLocal --- .../selftests/net/forwarding/tc_actions.sh | 26 ++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh index ea89e558672db..5823d94b19e9b 100755 --- a/tools/testing/selftests/net/forwarding/tc_actions.sh +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -223,19 +223,32 @@ mirred_egress_to_ingress_tcp_test() ip_proto icmp \ action drop - ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 & - local rpid=$! - ip vrf exec v$h1 ncat -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1 - wait -n $rpid - cmp -s $mirred_e2i_tf1 $mirred_e2i_tf2 - check_err $? "server output check failed" + echo P2 $MZ $h1 -c 10 -p 64 -a $h1mac -b $h1mac -A 192.0.2.1 -B 192.0.2.1 \ -t icmp "ping,id=42,seq=5" -q + echo P2.1 tc_check_packets "dev $h1 egress" 101 10 check_err $? "didn't mirred redirect ICMP" + echo P2.2 tc_check_packets "dev $h1 ingress" 102 10 check_err $? "didn't drop mirred ICMP" + echo P2.3 + + echo P1 + + ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 & + local rpid=$! + sleep 0.2 + echo P1.1 + ip vrf exec v$h1 ncat -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1 + echo P1.2 + sleep 0.2 + wait -n $rpid + cmp -s $mirred_e2i_tf1 $mirred_e2i_tf2 + check_err $? "server output check failed" + + echo P3 tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower @@ -359,4 +372,5 @@ else tests_run fi +dmesg exit $EXIT_STATUS From c321df2d65b8b052d5c3b139d231d6dd9d8d9817 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 Aug 2025 16:39:42 -0700 Subject: [PATCH 863/867] nipa: config: disable CPU_MITIGATIONS These are unlikely to matter for CI testing and they slow things down. Signed-off-by: Jakub Kicinski Signed-off-by: NipaLocal --- arch/x86/configs/x86_64_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index c65598526e18b..28d6541d1857f 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -29,6 +29,7 @@ CONFIG_KALLSYMS_ALL=y CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_SMP=y +CONFIG_CPU_MITIGATIONS=n CONFIG_HYPERVISOR_GUEST=y CONFIG_PARAVIRT=y CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y From 7bca6e41869e65b437e97b4b60e4e777e5e17838 Mon Sep 17 00:00:00 2001 From: fedora Cloud User Date: Thu, 1 Feb 2024 06:34:18 -0800 Subject: [PATCH 864/867] nipa: forwarding: set timeout to 3 hours tc_actions.sh keeps hanging the forwarding tests. sdf@: tdc & tdc-dbg started intermittenly failing around Sep 25th Signed-off-by: NipaLocal --- tools/testing/selftests/net/forwarding/settings | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/forwarding/settings b/tools/testing/selftests/net/forwarding/settings index e7b9417537fbc..ff3de4936a009 100644 --- a/tools/testing/selftests/net/forwarding/settings +++ b/tools/testing/selftests/net/forwarding/settings @@ -1 +1,2 @@ -timeout=0 +timeout=10800 +profile=1 From 8e29db4d50655819b6a86e9ed83f7e6e7219169c Mon Sep 17 00:00:00 2001 From: NipaLocal Date: Tue, 11 Mar 2025 22:49:47 -0700 Subject: [PATCH 865/867] nipa: drv: net: add timeout Signed-off-by: NipaLocal --- tools/testing/selftests/drivers/net/settings | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/testing/selftests/drivers/net/settings diff --git a/tools/testing/selftests/drivers/net/settings b/tools/testing/selftests/drivers/net/settings new file mode 100644 index 0000000000000..a953c96aa16e1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/settings @@ -0,0 +1 @@ +timeout=180 From dba6633847555dae1abbe563e1c51d457c37d2f0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 30 Jul 2025 07:17:43 -0700 Subject: [PATCH 866/867] nipa: config: x86: disable GPUs and sound We exclusively use headless VMs today, don't waste time compiling sound and GPU drivers. Signed-off-by: Jakub Kicinski Signed-off-by: NipaLocal --- arch/x86/configs/i386_defconfig | 13 ------------- arch/x86/configs/x86_64_defconfig | 13 ------------- 2 files changed, 26 deletions(-) diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 16a3de1c1b9a7..652896207f2f0 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -190,19 +190,6 @@ CONFIG_HPET=y # CONFIG_HPET_MMAP is not set CONFIG_I2C_I801=y CONFIG_WATCHDOG=y -CONFIG_AGP=y -CONFIG_AGP_AMD64=y -CONFIG_AGP_INTEL=y -CONFIG_DRM=y -CONFIG_DRM_I915=y -CONFIG_DRM_VIRTIO_GPU=y -CONFIG_SOUND=y -CONFIG_SND=y -CONFIG_SND_HRTIMER=y -CONFIG_SND_SEQUENCER=y -CONFIG_SND_SEQ_DUMMY=y -CONFIG_SND_HDA_INTEL=y -CONFIG_SND_HDA_HWDEP=y CONFIG_HIDRAW=y CONFIG_HID_GYRATION=y CONFIG_HID_NTRIG=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 28d6541d1857f..d9e4feeb5a3d5 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -188,19 +188,6 @@ CONFIG_HPET=y # CONFIG_HPET_MMAP is not set CONFIG_I2C_I801=y CONFIG_WATCHDOG=y -CONFIG_AGP=y -CONFIG_AGP_AMD64=y -CONFIG_AGP_INTEL=y -CONFIG_DRM=y -CONFIG_DRM_I915=y -CONFIG_DRM_VIRTIO_GPU=y -CONFIG_SOUND=y -CONFIG_SND=y -CONFIG_SND_HRTIMER=y -CONFIG_SND_SEQUENCER=y -CONFIG_SND_SEQ_DUMMY=y -CONFIG_SND_HDA_INTEL=y -CONFIG_SND_HDA_HWDEP=y CONFIG_HIDRAW=y CONFIG_HID_GYRATION=y CONFIG_HID_NTRIG=y From 6cf0a45538e8fff9f0d42fda8a88674838ec45cc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 24 Jul 2025 06:47:48 -0700 Subject: [PATCH 867/867] nipa: config: disable kmemleak auto scan kmemleak auto scan could be a source of latency for the tests. We run a full scan after the tests manually, we don't need the autoscan thread to be enabled. Signed-off-by: Jakub Kicinski Signed-off-by: NipaLocal --- kernel/configs/debug.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/configs/debug.config b/kernel/configs/debug.config index e81327d2cd639..84fcc67875fdc 100644 --- a/kernel/configs/debug.config +++ b/kernel/configs/debug.config @@ -58,7 +58,7 @@ CONFIG_DEBUG_NET=y CONFIG_PAGE_EXTENSION=y CONFIG_PAGE_OWNER=y CONFIG_DEBUG_KMEMLEAK=y -CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN=y +CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN=n CONFIG_DEBUG_OBJECTS=y CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1 CONFIG_DEBUG_OBJECTS_FREE=y