Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DASH HA session API design. #532

Merged
merged 22 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .wordlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Accton
ACK
Ack
ack
ack'ed
acl
ACL
ACLs
Expand Down Expand Up @@ -304,6 +305,7 @@ IxLoad
ixload
IxNetwork
IxNetworkWeb
Jiang
Jinja
jitter
journaled
Expand Down Expand Up @@ -372,6 +374,7 @@ NonSynStateful
NorthBound
Novus
NPL
NPU
NPUS
NSG
NSGs
Expand Down Expand Up @@ -471,11 +474,13 @@ README
READMEs
README's
reconvergence
RECV
RedirectRuleResimulatedUf
redis
renderer
repo
repos
REQ
resimulated
resimulation
responder
Expand Down Expand Up @@ -534,6 +539,7 @@ SmartAppliances
SmartNIC
SmartNic
SmartNICs
SmartSwitch
SmartSwitches
snappi
SNAT
Expand Down
2 changes: 1 addition & 1 deletion dash-pipeline/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ sai-submodule:
# P4 Source code compile TARGETS
######################################

P4_SRC=$(wildcard bmv2/*.p4)
P4_SRC=$(wildcard bmv2/**/*.p4)
P4_MAIN=bmv2/dash_pipeline.p4
P4_OUTDIR=bmv2/dash_pipeline.bmv2
P4_ARTIFACTS=$(P4_OUTDIR)/dash_pipeline.json $(P4_OUTDIR)/dash_pipeline_p4rt.txt
Expand Down
8 changes: 8 additions & 0 deletions dash-pipeline/bmv2/dash_arch_specific.p4
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,12 @@

#endif // TARGET_DPDK_PNA

//
// Utility macros
//

// The second macro will have the value of x expanded before stringification.
#define PP_STR_RAW(x) #x
#define PP_STR(x) PP_STR_RAW(x)

#endif // __DASH_TARGET_SPECIFIC__
72 changes: 70 additions & 2 deletions dash-pipeline/bmv2/dash_metadata.p4
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@

#include "dash_headers.p4"

#define MAX_ENI 64
#define MAX_HA_SET 1

enum bit<32> dash_routing_actions_t {
NONE = 0,
STATIC_ENCAP = (1 << 0),
Expand All @@ -14,7 +17,21 @@ enum bit<16> dash_direction_t {
INVALID = 0,
OUTBOUND = 1,
INBOUND = 2
}
};

enum bit<8> dash_packet_source_t {
EXTERNAL = 0, // Packets from external sources.
DPAPP = 1, // Packets from data plane app.
PEER = 2 // Packets from the paired DPU.
};

enum bit<8> dash_packet_type_t {
REGULAR = 0, // Regular packets from external sources.
FLOW_SYNC_REQ = 1, // Flow sync request packet.
FLOW_SYNC_ACK = 2, // Flow sync ack packet.
DP_PROBE_REQ = 3, // Data plane probe packet.
DP_PROBE_ACK = 4 // Data plane probe ack packet.
};

// Pipeline stages:
enum bit<16> dash_pipeline_stage_t {
Expand All @@ -30,7 +47,7 @@ enum bit<16> dash_pipeline_stage_t {

// Common stages
ROUTING_ACTION_APPLY = 300
}
};

struct conntrack_data_t {
bool allow_in;
Expand Down Expand Up @@ -76,7 +93,55 @@ struct overlay_rewrite_data_t {
IPv6Address dip_mask;
}

// HA roles
enum bit<8> dash_ha_role_t {
DEAD = 0,
ACTIVE = 1,
STANDBY = 2,
STANDALONE = 3,
SWITCHING_TO_ACTIVE = 4
};

// Flow sync state
enum bit<8> dash_ha_flow_sync_state_t {
FLOW_MISS = 0, // Flow not created yet
FLOW_CREATED = 1, // Flow is created but not synched or waiting for ack
FLOW_SYNCED = 2, // Flow has been synched to its peer
FLOW_PENDING_DELETE = 3, // Flow is pending deletion, waiting for ack
FLOW_PENDING_RESIMULATION = 4 // Flow is marked as pending resimulation
};

// HA flow sync operations
enum bit<8> dash_ha_flow_sync_op_t {
FLOW_CREATE = 0, // New flow creation.
FLOW_UPDATE = 1, // Flow resimulation or any other reason causing existing flow to be updated.
FLOW_DELETE = 2 // Flow deletion.
};

struct ha_data_t {
// HA scope settings
bit<16> ha_scope_id;
bit<16> ha_set_id;
dash_ha_role_t ha_role;

// HA set settings
bit<1> local_ip_is_v6;
IPv4ORv6Address local_ip;
bit<1> peer_ip_is_v6;
IPv4ORv6Address peer_ip;
bit<16> dp_channel_dst_port;
bit<16> dp_channel_src_port_min;
bit<16> dp_channel_src_port_max;

// HA packet/flow state
dash_ha_flow_sync_state_t flow_sync_state;
}

struct metadata_t {
// Packet type
dash_packet_source_t packet_source; // TODO: Parse packet source in parser.
dash_packet_type_t packet_type; // TODO: Parse packet type in parser.

// Lookup context
dash_direction_t direction;
EthernetAddress eni_addr;
Expand Down Expand Up @@ -112,6 +177,9 @@ struct metadata_t {
bool is_fast_path_icmp_flow_redirection_packet;
bit<1> fast_path_icmp_flow_redirection_disabled;

// HA
ha_data_t ha;

// Stage transition control
dash_pipeline_stage_t target_stage;

Expand Down
31 changes: 26 additions & 5 deletions dash-pipeline/bmv2/dash_pipeline.p4
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,11 @@
#include "dash_conntrack.p4"
#include "stages/direction_lookup.p4"
#include "stages/eni_lookup.p4"
#include "stages/ha.p4"
#include "stages/routing_action_apply.p4"
#include "stages/metering_update.p4"
#include "underlay.p4"

#define MAX_ENI 64

control dash_ingress(
inout headers_t hdr
, inout metadata_t meta
Expand Down Expand Up @@ -92,12 +91,19 @@ control dash_ingress(
meta.stage4_dash_acl_group_id = ## prefix ##_stage4_dash_acl_group_id; \
meta.stage5_dash_acl_group_id = ## prefix ##_stage5_dash_acl_group_id;

DEFINE_COUNTER(eni_lb_fast_path_icmp_in_counter, MAX_ENI, name="lb_fast_path_icmp_in", attr_type="stats", action_names="set_eni_attrs")
DEFINE_COUNTER(eni_rx_counter, MAX_ENI, name="rx", attr_type="stats", action_names="set_eni_attrs", order=0)
DEFINE_COUNTER(eni_tx_counter, MAX_ENI, name="tx", attr_type="stats", action_names="set_eni_attrs", order=0)
DEFINE_COUNTER(eni_outbound_rx_counter, MAX_ENI, name="outbound_rx", attr_type="stats", action_names="set_eni_attrs", order=0)
DEFINE_COUNTER(eni_outbound_tx_counter, MAX_ENI, name="outbound_tx", attr_type="stats", action_names="set_eni_attrs", order=0)
DEFINE_COUNTER(eni_inbound_rx_counter, MAX_ENI, name="inbound_rx", attr_type="stats", action_names="set_eni_attrs", order=0)
DEFINE_COUNTER(eni_inbound_tx_counter, MAX_ENI, name="inbound_tx", attr_type="stats", action_names="set_eni_attrs", order=0)
DEFINE_COUNTER(eni_lb_fast_path_icmp_in_counter, MAX_ENI, name="lb_fast_path_icmp_in", attr_type="stats", action_names="set_eni_attrs", order=0)

action set_eni_attrs(bit<32> cps,
bit<32> pps,
bit<32> flows,
bit<1> admin_state,
@SaiVal[type="sai_object_id_t"] bit<16> ha_scope_id,
@SaiVal[type="sai_ip_address_t"] IPv4Address vm_underlay_dip,
@SaiVal[type="sai_uint32_t"] bit<24> vm_vni,
@SaiVal[type="sai_object_id_t"] bit<16> vnet_id,
Expand Down Expand Up @@ -144,7 +150,8 @@ control dash_ingress(
}
meta.meter_policy_id = v4_meter_policy_id;
}


meta.ha.ha_scope_id = ha_scope_id;
meta.fast_path_icmp_flow_redirection_disabled = disable_fast_path_icmp_flow_redirection;
}

Expand Down Expand Up @@ -301,17 +308,23 @@ control dash_ingress(
if (meta.eni_data.admin_state == 0) {
deny();
}


UPDATE_COUNTER(eni_rx_counter, meta.eni_id);
if (meta.is_fast_path_icmp_flow_redirection_packet) {
UPDATE_COUNTER(eni_lb_fast_path_icmp_in_counter, meta.eni_id);
}

ha_stage.apply(hdr, meta);

acl_group.apply();

if (meta.direction == dash_direction_t.OUTBOUND) {
UPDATE_COUNTER(eni_outbound_rx_counter, meta.eni_id);

meta.target_stage = dash_pipeline_stage_t.OUTBOUND_ROUTING;
outbound.apply(hdr, meta);
} else if (meta.direction == dash_direction_t.INBOUND) {
UPDATE_COUNTER(eni_inbound_rx_counter, meta.eni_id);
inbound.apply(hdr, meta);
}

Expand Down Expand Up @@ -339,6 +352,14 @@ control dash_ingress(

if (meta.dropped) {
drop_action();
} else {
UPDATE_COUNTER(eni_tx_counter, meta.eni_id);

if (meta.direction == dash_direction_t.OUTBOUND) {
UPDATE_COUNTER(eni_outbound_tx_counter, meta.eni_id);
} else if (meta.direction == dash_direction_t.INBOUND) {
UPDATE_COUNTER(eni_inbound_tx_counter, meta.eni_id);
}
}
}
}
Expand Down
133 changes: 133 additions & 0 deletions dash-pipeline/bmv2/stages/ha.p4
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#ifndef _DASH_STAGE_HA_P4_
#define _DASH_STAGE_HA_P4_

control ha_stage(inout headers_t hdr,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@r12f, as discussed can we add a capability attribute for DPU scope versus ENI scope HA.

Copy link
Collaborator Author

@r12f r12f Mar 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi Mukesh, sure thing! And as we chatted, it will be better to add it in another PR.

The DPU HA capability will make more sense to be added along with the DPU API or a dedicated one, if needed. Putting the DPU HA capability into ENI HA API will be really weird...

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, let me get some capacities in there, we can probably model this in a generic way.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @mukeshmv , I have added the capabilities in the PR :D. Feel free to help take a look!

inout metadata_t meta)
{
//
// ENI-level flow operation counters:
//
DEFINE_HIT_COUNTER(flow_created_counter, MAX_ENI, name="flow_created", attr_type="stats", action_names="set_eni_attrs", order=1)
DEFINE_HIT_COUNTER(flow_create_failed_counter, MAX_ENI, name="flow_create_failed", attr_type="stats", action_names="set_eni_attrs", order=1)
DEFINE_HIT_COUNTER(flow_updated_counter, MAX_ENI, name="flow_updated", attr_type="stats", action_names="set_eni_attrs", order=1)
DEFINE_HIT_COUNTER(flow_update_failed_counter, MAX_ENI, name="flow_update_failed", attr_type="stats", action_names="set_eni_attrs", order=1)
DEFINE_HIT_COUNTER(flow_deleted_counter, MAX_ENI, name="flow_deleted", attr_type="stats", action_names="set_eni_attrs", order=1)
DEFINE_HIT_COUNTER(flow_delete_failed_counter, MAX_ENI, name="flow_delete_failed", attr_type="stats", action_names="set_eni_attrs", order=1)
DEFINE_HIT_COUNTER(flow_aged_counter, MAX_ENI, name="flow_aged", attr_type="stats", action_names="set_eni_attrs", order=1)

//
// ENI-level flow sync packet counters:
//
DEFINE_COUNTER(inline_sync_packet_rx_counter, MAX_ENI, name="inline_sync_packet_rx", attr_type="stats", action_names="set_eni_attrs", order=2)
DEFINE_COUNTER(inline_sync_packet_tx_counter, MAX_ENI, name="inline_sync_packet_tx", attr_type="stats", action_names="set_eni_attrs", order=2)
DEFINE_COUNTER(timed_sync_packet_rx_counter, MAX_ENI, name="timed_sync_packet_rx", attr_type="stats", action_names="set_eni_attrs", order=2)
DEFINE_COUNTER(timed_sync_packet_tx_counter, MAX_ENI, name="timed_sync_packet_tx", attr_type="stats", action_names="set_eni_attrs", order=2)

//
// ENI-level flow sync request counters:
// - Depends on implementations, the flow sync request could be batched, hence they need to tracked separately.
// - The counters are defined as combination of following things:
// - 3 flow sync operations: create, update, delete.
// - 2 ways of sync: Inline sync and timed sync.
// - Request result: succeeded, failed (unexpected) and ignored (expected and ok to ignore, e.g., more packets arrives before flow sync is acked).
//
#define DEFINE_ENI_FLOW_SYNC_COUNTERS(counter_name) \
DEFINE_HIT_COUNTER(inline_ ## counter_name ## _req_sent_counter, MAX_ENI, name=PP_STR(inline_ ## counter_name ## _req_sent), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(inline_ ## counter_name ## _req_recv_counter, MAX_ENI, name=PP_STR(inline_ ## counter_name ## _req_recv), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(inline_ ## counter_name ## _req_failed_counter, MAX_ENI, name=PP_STR(inline_ ## counter_name ## _req_failed), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(inline_ ## counter_name ## _req_ignored_counter, MAX_ENI, name=PP_STR(inline_ ## counter_name ## _req_failed), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(inline_ ## counter_name ## _ack_recv_counter, MAX_ENI, name=PP_STR(inline_ ## counter_name ## _ack_recv), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(inline_ ## counter_name ## _ack_failed_counter, MAX_ENI, name=PP_STR(inline_ ## counter_name ## _ack_failed_recv), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(inline_ ## counter_name ## _ack_ignored_counter, MAX_ENI, name=PP_STR(inline_ ## counter_name ## _ack_ignored_recv), attr_type="stats", action_names="set_eni_attrs", order=2) \
\
DEFINE_HIT_COUNTER(timed_ ## counter_name ## _req_sent_counter, MAX_ENI, name=PP_STR(timed_ ## counter_name ## _req_sent), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(timed_ ## counter_name ## _req_recv_counter, MAX_ENI, name=PP_STR(timed_ ## counter_name ## _req_recv), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(timed_ ## counter_name ## _req_failed_counter, MAX_ENI, name=PP_STR(timed_ ## counter_name ## _req_failed), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(timed_ ## counter_name ## _req_ignored_counter, MAX_ENI, name=PP_STR(timed_ ## counter_name ## _req_failed), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(timed_ ## counter_name ## _ack_recv_counter, MAX_ENI, name=PP_STR(timed_ ## counter_name ## _ack_recv), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(timed_ ## counter_name ## _ack_failed_counter, MAX_ENI, name=PP_STR(timed_ ## counter_name ## _ack_failed_recv), attr_type="stats", action_names="set_eni_attrs", order=2) \
DEFINE_HIT_COUNTER(timed_ ## counter_name ## _ack_ignored_counter, MAX_ENI, name=PP_STR(timed_ ## counter_name ## _ack_ignored_recv), attr_type="stats", action_names="set_eni_attrs", order=2)

DEFINE_ENI_FLOW_SYNC_COUNTERS(flow_create)
DEFINE_ENI_FLOW_SYNC_COUNTERS(flow_update)
DEFINE_ENI_FLOW_SYNC_COUNTERS(flow_delete)

//
// HA scope:
//
action set_ha_scope_attr(
@SalVal[type="sai_object_id_t"] bit<16> ha_set_id,
@SaiVal[type="sai_dash_ha_role_t"] dash_ha_role_t dash_ha_role,
@SaiVal[isreadonly="true"] bit<32> flow_version
) {
meta.ha.ha_set_id = ha_set_id;
meta.ha.ha_role = dash_ha_role;
}

@SaiTable[api = "dash_ha", order=1, isobject="true"]
table ha_scope {
key = {
meta.ha.ha_scope_id : exact;
}
actions = {
set_ha_scope_attr;
}
}

//
// HA set:
//
DEFINE_COUNTER(dp_probe_req_rx, MAX_HA_SET, name="dp_probe_req_rx", attr_type="stats", action_names="set_ha_set_attr")
DEFINE_COUNTER(dp_probe_req_tx, MAX_HA_SET, name="dp_probe_req_tx", attr_type="stats", action_names="set_ha_set_attr")
DEFINE_COUNTER(dp_probe_ack_rx, MAX_HA_SET, name="dp_probe_ack_rx", attr_type="stats", action_names="set_ha_set_attr")
DEFINE_COUNTER(dp_probe_ack_tx, MAX_HA_SET, name="dp_probe_ack_tx", attr_type="stats", action_names="set_ha_set_attr")
DEFINE_HIT_COUNTER(dp_probe_failed, MAX_HA_SET, name="dp_probe_failed", attr_type="stats", action_names="set_ha_set_attr")

action set_ha_set_attr(
bit<1> local_ip_is_v6,
@SaiVal[type="sai_ip_address_t"] IPv4ORv6Address local_ip,
bit<1> peer_ip_is_v6,
@SaiVal[type="sai_ip_address_t"] IPv4ORv6Address peer_ip,
bit<16> dp_channel_dst_port,
bit<16> dp_channel_min_src_port,
bit<16> dp_channel_max_src_port,
bit<32> dp_channel_probe_interval_ms,
bit<32> dp_channel_probe_fail_threshold,
@SaiVal[isreadonly="true"] bit<1> dp_channel_is_alive
) {
meta.ha.peer_ip_is_v6 = peer_ip_is_v6;
meta.ha.peer_ip = peer_ip;

meta.ha.dp_channel_dst_port = dp_channel_dst_port;
meta.ha.dp_channel_src_port_min = dp_channel_min_src_port;
meta.ha.dp_channel_src_port_max = dp_channel_max_src_port;
}

@SaiTable[api = "dash_ha", order=0, isobject="true"]
table ha_set {
key = {
meta.ha.ha_set_id : exact @SaiVal[type="sai_object_id_t"];
}
actions = {
set_ha_set_attr;
}
}

apply {
// If HA scope id is not set, then HA is not enabled.
if (meta.ha.ha_scope_id == 0) {
return;
}
ha_scope.apply();

// If HA set id is not set, then HA is not enabled.
if (meta.ha.ha_set_id == 0) {
return;
}
ha_set.apply();

// TODO: HA state machine handling.
}
}

#endif /* _DASH_STAGE_HA_P4_ */
Loading
Loading