Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cherry-picks from upstream on v1.14.14 for conformance test fix #20

Open
wants to merge 4 commits into
base: dhij/v1.14.14-conformance
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Documentation/cmdref/cilium-agent.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Documentation/cmdref/cilium_service_update.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 8 additions & 8 deletions Documentation/network/kubernetes/kubeproxy-free.rst
Original file line number Diff line number Diff line change
Expand Up @@ -242,14 +242,14 @@ In this example, services with port ``31940`` were created (one for each of devi
$ kubectl -n kube-system exec ds/cilium -- cilium service list
ID Frontend Service Type Backend
[...]
4 10.104.239.135:80 ClusterIP 1 => 10.217.0.107:80
2 => 10.217.0.149:80
5 0.0.0.0:31940 NodePort 1 => 10.217.0.107:80
2 => 10.217.0.149:80
6 192.168.178.29:31940 NodePort 1 => 10.217.0.107:80
2 => 10.217.0.149:80
7 172.16.0.29:31940 NodePort 1 => 10.217.0.107:80
2 => 10.217.0.149:80
4 10.104.239.135:80/TCP ClusterIP 1 => 10.217.0.107:80/TCP
2 => 10.217.0.149:80/TCP
5 0.0.0.0:31940/TCP NodePort 1 => 10.217.0.107:80/TCP
2 => 10.217.0.149:80/TCP
6 192.168.178.29:31940/TCP NodePort 1 => 10.217.0.107:80/TCP
2 => 10.217.0.149:80/TCP
7 172.16.0.29:31940/TCP NodePort 1 => 10.217.0.107:80/TCP
2 => 10.217.0.149:80/TCP

Create a variable with the node port for testing:

Expand Down
3 changes: 3 additions & 0 deletions api/v1/models/backend_address.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions api/v1/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2863,6 +2863,9 @@ definitions:
ip:
description: Layer 3 address
type: string
protocol:
description: Layer 4 protocol (TCP, UDP, etc)
type: string
port:
description: Layer 4 port number
type: integer
Expand Down
8 changes: 8 additions & 0 deletions api/v1/server/embedded_spec.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

94 changes: 75 additions & 19 deletions bpf/bpf_sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ static __always_inline __maybe_unused bool is_v6_loopback(const union v6addr *da
return ipv6_addr_equals(&loopback, daddr);
}

/* Hack due to missing narrow ctx access. */
#define ctx_protocol(__ctx) ((__u8)(volatile __u32)(__ctx)->protocol)

/* Hack due to missing narrow ctx access. */
static __always_inline __maybe_unused __be16
ctx_dst_port(const struct bpf_sock_addr *ctx)
Expand Down Expand Up @@ -104,12 +107,12 @@ bool sock_is_health_check(struct bpf_sock_addr *ctx __maybe_unused)
static __always_inline __maybe_unused
__u64 sock_select_slot(struct bpf_sock_addr *ctx)
{
return ctx->protocol == IPPROTO_TCP ?
return ctx_protocol(ctx) == IPPROTO_TCP ?
get_prandom_u32() : sock_local_cookie(ctx);
}

static __always_inline __maybe_unused
bool sock_proto_enabled(__u32 proto)
bool sock_proto_enabled(__u8 proto)
{
switch (proto) {
case IPPROTO_TCP:
Expand Down Expand Up @@ -294,10 +297,14 @@ static __always_inline int __sock4_xlate_fwd(struct bpf_sock_addr *ctx,
struct lb4_backend *backend;
struct lb4_service *svc;
__u16 dst_port = ctx_dst_port(ctx);
__u8 protocol = ctx_protocol(ctx);
__u32 dst_ip = ctx->user_ip4;
struct lb4_key key = {
.address = dst_ip,
.dport = dst_port,
#if defined(ENABLE_SERVICE_PROTOCOL_DIFFERENTIATION)
.proto = protocol,
#endif
}, orig_key = key;
struct lb4_service *backend_slot;
bool backend_from_affinity = false;
Expand All @@ -309,16 +316,21 @@ static __always_inline int __sock4_xlate_fwd(struct bpf_sock_addr *ctx,
if (is_defined(ENABLE_SOCKET_LB_HOST_ONLY) && !in_hostns)
return -ENXIO;

if (!udp_only && !sock_proto_enabled(ctx->protocol))
if (!udp_only && !sock_proto_enabled(protocol))
return -ENOTSUP;

/* In case a direct match fails, we try to look-up surrogate
* service entries via wildcarded lookup for NodePort and
* HostPort services.
*/
svc = lb4_lookup_service(&key, true, false);
if (!svc)
if (!svc) {
/* Restore the original key's protocol as lb4_lookup_service
* has overwritten it.
*/
lb4_key_set_protocol(&key, protocol);
svc = sock4_wildcard_lookup_full(&key, in_hostns);
}
if (!svc)
return -ENXIO;
if (svc->count == 0 && !lb4_svc_is_l7loadbalancer(svc))
Expand Down Expand Up @@ -477,23 +489,32 @@ int cil_sock4_connect(struct bpf_sock_addr *ctx)
static __always_inline int __sock4_post_bind(struct bpf_sock *ctx,
struct bpf_sock *ctx_full)
{
__u8 protocol = ctx_protocol(ctx);
struct lb4_service *svc;
struct lb4_key key = {
.address = ctx->src_ip4,
.dport = ctx_src_port(ctx),
#if defined(ENABLE_SERVICE_PROTOCOL_DIFFERENTIATION)
.proto = protocol,
#endif
};

if (!sock_proto_enabled(ctx->protocol) ||
if (!sock_proto_enabled(protocol) ||
!ctx_in_hostns(ctx_full, NULL))
return 0;

svc = lb4_lookup_service(&key, true, false);
if (!svc)
if (!svc) {
/* Perform a wildcard lookup for the case where the caller
* tries to bind to loopback or an address with host identity
* (without remote hosts).
*
* Restore the original key's protocol as lb4_lookup_service
* has overwritten it.
*/
lb4_key_set_protocol(&key, protocol);
svc = sock4_wildcard_lookup(&key, false, false, true);
}

/* If the sockaddr of this socket overlaps with a NodePort,
* LoadBalancer or ExternalIP service. We must reject this
Expand Down Expand Up @@ -540,7 +561,7 @@ static __always_inline int __sock4_pre_bind(struct bpf_sock_addr *ctx,
.peer = {
.address = ctx->user_ip4,
.port = ctx_dst_port(ctx),
.proto = (__u8)ctx->protocol,
.proto = ctx_protocol(ctx),
},
};
int ret;
Expand All @@ -556,7 +577,7 @@ int cil_sock4_pre_bind(struct bpf_sock_addr *ctx)
{
int ret = SYS_PROCEED;

if (!sock_proto_enabled(ctx->protocol) ||
if (!sock_proto_enabled(ctx_protocol(ctx)) ||
!ctx_in_hostns(ctx, NULL))
return ret;
if (sock_is_health_check(ctx) &&
Expand All @@ -573,6 +594,7 @@ static __always_inline int __sock4_xlate_rev(struct bpf_sock_addr *ctx,
{
struct ipv4_revnat_entry *val;
__u16 dst_port = ctx_dst_port(ctx);
__u8 protocol = ctx_protocol(ctx);
__u32 dst_ip = ctx->user_ip4;
struct ipv4_revnat_tuple key = {
.cookie = sock_local_cookie(ctx_full),
Expand All @@ -588,12 +610,20 @@ static __always_inline int __sock4_xlate_rev(struct bpf_sock_addr *ctx,
struct lb4_key svc_key = {
.address = val->address,
.dport = val->port,
#if defined(ENABLE_SERVICE_PROTOCOL_DIFFERENTIATION)
.proto = protocol,
#endif
};

svc = lb4_lookup_service(&svc_key, true, false);
if (!svc)
if (!svc) {
/* Restore the original key's protocol as lb4_lookup_service
* has overwritten it.
*/
lb4_key_set_protocol(&svc_key, protocol);
svc = sock4_wildcard_lookup_full(&svc_key,
ctx_in_hostns(ctx_full, NULL));
}
if (!svc || svc->rev_nat_index != val->rev_nat_index ||
(svc->count == 0 && !lb4_svc_is_l7loadbalancer(svc))) {
map_delete_elem(&LB4_REVERSE_NAT_SK_MAP, &key);
Expand Down Expand Up @@ -810,7 +840,7 @@ int sock6_xlate_v4_in_v6(struct bpf_sock_addr *ctx __maybe_unused,
return -ENXIO;

memset(&fake_ctx, 0, sizeof(fake_ctx));
fake_ctx.protocol = ctx->protocol;
fake_ctx.protocol = ctx_protocol(ctx);
fake_ctx.user_ip4 = addr6.p4;
fake_ctx.user_port = ctx_dst_port(ctx);

Expand Down Expand Up @@ -840,7 +870,7 @@ sock6_post_bind_v4_in_v6(struct bpf_sock *ctx __maybe_unused)
return 0;

memset(&fake_ctx, 0, sizeof(fake_ctx));
fake_ctx.protocol = ctx->protocol;
fake_ctx.protocol = ctx_protocol(ctx);
fake_ctx.src_ip4 = addr6.p4;
fake_ctx.src_port = ctx->src_port;

Expand All @@ -851,19 +881,27 @@ sock6_post_bind_v4_in_v6(struct bpf_sock *ctx __maybe_unused)

static __always_inline int __sock6_post_bind(struct bpf_sock *ctx)
{
__u8 protocol = ctx_protocol(ctx);
struct lb6_service *svc;
struct lb6_key key = {
.dport = ctx_src_port(ctx),
#if defined(ENABLE_SERVICE_PROTOCOL_DIFFERENTIATION)
.proto = protocol,
#endif
};

if (!sock_proto_enabled(ctx->protocol) ||
if (!sock_proto_enabled(protocol) ||
!ctx_in_hostns(ctx, NULL))
return 0;

ctx_get_v6_src_address(ctx, &key.address);

svc = lb6_lookup_service(&key, true, false);
if (!svc) {
/* Restore the original key's protocol as lb6_lookup_service
* has overwritten it.
*/
lb6_key_set_protocol(&key, protocol);
svc = sock6_wildcard_lookup(&key, false, false, true);
if (!svc)
return sock6_post_bind_v4_in_v6(ctx);
Expand Down Expand Up @@ -904,7 +942,7 @@ sock6_pre_bind_v4_in_v6(struct bpf_sock_addr *ctx __maybe_unused)
ctx_get_v6_address(ctx, &addr6);

memset(&fake_ctx, 0, sizeof(fake_ctx));
fake_ctx.protocol = ctx->protocol;
fake_ctx.protocol = ctx_protocol(ctx);
fake_ctx.user_ip4 = addr6.p4;
fake_ctx.user_port = ctx_dst_port(ctx);

Expand Down Expand Up @@ -935,7 +973,7 @@ static __always_inline int __sock6_pre_bind(struct bpf_sock_addr *ctx)
struct lb6_health val = {
.peer = {
.port = ctx_dst_port(ctx),
.proto = (__u8)ctx->protocol,
.proto = ctx_protocol(ctx),
},
};
int ret = 0;
Expand All @@ -957,7 +995,7 @@ int cil_sock6_pre_bind(struct bpf_sock_addr *ctx)
{
int ret = SYS_PROCEED;

if (!sock_proto_enabled(ctx->protocol) ||
if (!sock_proto_enabled(ctx_protocol(ctx)) ||
!ctx_in_hostns(ctx, NULL))
return ret;
if (sock_is_health_check(ctx) &&
Expand All @@ -978,8 +1016,12 @@ static __always_inline int __sock6_xlate_fwd(struct bpf_sock_addr *ctx,
struct lb6_backend *backend;
struct lb6_service *svc;
__u16 dst_port = ctx_dst_port(ctx);
__u8 protocol = ctx_protocol(ctx);
struct lb6_key key = {
.dport = dst_port,
#if defined(ENABLE_SERVICE_PROTOCOL_DIFFERENTIATION)
.proto = protocol,
#endif
}, orig_key;
struct lb6_service *backend_slot;
bool backend_from_affinity = false;
Expand All @@ -991,15 +1033,20 @@ static __always_inline int __sock6_xlate_fwd(struct bpf_sock_addr *ctx,
if (is_defined(ENABLE_SOCKET_LB_HOST_ONLY) && !in_hostns)
return -ENXIO;

if (!udp_only && !sock_proto_enabled(ctx->protocol))
if (!udp_only && !sock_proto_enabled(protocol))
return -ENOTSUP;

ctx_get_v6_address(ctx, &key.address);
memcpy(&orig_key, &key, sizeof(key));

svc = lb6_lookup_service(&key, true, false);
if (!svc)
if (!svc) {
/* Restore the original key's protocol as lb6_lookup_service
* has overwritten it.
*/
lb6_key_set_protocol(&key, protocol);
svc = sock6_wildcard_lookup_full(&key, in_hostns);
}
if (!svc)
return sock6_xlate_v4_in_v6(ctx, udp_only);
if (svc->count == 0 && !lb6_svc_is_l7loadbalancer(svc))
Expand Down Expand Up @@ -1144,7 +1191,7 @@ sock6_xlate_rev_v4_in_v6(struct bpf_sock_addr *ctx __maybe_unused)
return -ENXIO;

memset(&fake_ctx, 0, sizeof(fake_ctx));
fake_ctx.protocol = ctx->protocol;
fake_ctx.protocol = ctx_protocol(ctx);
fake_ctx.user_ip4 = addr6.p4;
fake_ctx.user_port = ctx_dst_port(ctx);

Expand All @@ -1167,6 +1214,7 @@ static __always_inline int __sock6_xlate_rev(struct bpf_sock_addr *ctx)
struct ipv6_revnat_tuple key = {};
struct ipv6_revnat_entry *val;
__u16 dst_port = ctx_dst_port(ctx);
__u8 protocol = ctx_protocol(ctx);

key.cookie = sock_local_cookie(ctx);
key.port = dst_port;
Expand All @@ -1181,12 +1229,20 @@ static __always_inline int __sock6_xlate_rev(struct bpf_sock_addr *ctx)
struct lb6_key svc_key = {
.address = val->address,
.dport = val->port,
#if defined(ENABLE_SERVICE_PROTOCOL_DIFFERENTIATION)
.proto = protocol,
#endif
};

svc = lb6_lookup_service(&svc_key, true, false);
if (!svc)
if (!svc) {
/* Restore the original key's protocol as lb6_lookup_service
* has overwritten it.
*/
lb6_key_set_protocol(&svc_key, protocol);
svc = sock6_wildcard_lookup_full(&svc_key,
ctx_in_hostns(ctx, NULL));
}
if (!svc || svc->rev_nat_index != val->rev_nat_index ||
(svc->count == 0 && !lb6_svc_is_l7loadbalancer(svc))) {
map_delete_elem(&LB6_REVERSE_NAT_SK_MAP, &key);
Expand Down
4 changes: 2 additions & 2 deletions bpf/lib/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -947,7 +947,7 @@ struct lb6_key {
union v6addr address; /* Service virtual IPv6 address */
__be16 dport; /* L4 port filter, if unset, all ports apply */
__u16 backend_slot; /* Backend iterator, 0 indicates the svc frontend */
__u8 proto; /* L4 protocol, currently not used (set to 0) */
__u8 proto; /* L4 protocol, 0 indicates any protocol */
__u8 scope; /* LB_LOOKUP_SCOPE_* for externalTrafficPolicy=Local */
__u8 pad[2];
};
Expand Down Expand Up @@ -1005,7 +1005,7 @@ struct lb4_key {
__be32 address; /* Service virtual IPv4 address */
__be16 dport; /* L4 port filter, if unset, all ports apply */
__u16 backend_slot; /* Backend iterator, 0 indicates the svc frontend */
__u8 proto; /* L4 protocol, currently not used (set to 0) */
__u8 proto; /* L4 protocol, 0 indicates any protocol */
__u8 scope; /* LB_LOOKUP_SCOPE_* for externalTrafficPolicy=Local */
__u8 pad[2];
};
Expand Down
Loading
Loading