From da5980bdaeada3ace7ba7ca58f3bec582eecf673 Mon Sep 17 00:00:00 2001 From: r12f Date: Thu, 7 Mar 2024 05:35:31 +0000 Subject: [PATCH 01/22] DASH HA session API design. --- dash-pipeline/Makefile | 2 +- dash-pipeline/bmv2/dash_arch_specific.p4 | 8 + dash-pipeline/bmv2/dash_metadata.p4 | 71 ++- dash-pipeline/bmv2/dash_pipeline.p4 | 31 +- dash-pipeline/bmv2/stages/ha.p4 | 132 +++++ documentation/high-avail/ha-api-hld.md | 517 ++++++++++++++++++ .../high-avail/images/ha-bm-packet-flow.svg | 4 + .../images/ha-components-complex.svg | 4 + .../images/ha-components-dpu-level-ha.svg | 4 + .../images/ha-components-eni-level-ha.svg | 4 + 10 files changed, 769 insertions(+), 8 deletions(-) create mode 100644 dash-pipeline/bmv2/stages/ha.p4 create mode 100644 documentation/high-avail/ha-api-hld.md create mode 100644 documentation/high-avail/images/ha-bm-packet-flow.svg create mode 100644 documentation/high-avail/images/ha-components-complex.svg create mode 100644 documentation/high-avail/images/ha-components-dpu-level-ha.svg create mode 100644 documentation/high-avail/images/ha-components-eni-level-ha.svg diff --git a/dash-pipeline/Makefile b/dash-pipeline/Makefile index 5929fc840..400b4f67c 100644 --- a/dash-pipeline/Makefile +++ b/dash-pipeline/Makefile @@ -92,7 +92,7 @@ sai-submodule: # P4 Source code compile TARGETS ###################################### -P4_SRC=$(wildcard bmv2/*.p4) +P4_SRC=$(wildcard bmv2/%.p4) P4_MAIN=bmv2/dash_pipeline.p4 P4_OUTDIR=bmv2/dash_pipeline.bmv2 P4_ARTIFACTS=$(P4_OUTDIR)/dash_pipeline.json $(P4_OUTDIR)/dash_pipeline_p4rt.txt diff --git a/dash-pipeline/bmv2/dash_arch_specific.p4 b/dash-pipeline/bmv2/dash_arch_specific.p4 index 17b8c60e8..978a6ef07 100644 --- a/dash-pipeline/bmv2/dash_arch_specific.p4 +++ b/dash-pipeline/bmv2/dash_arch_specific.p4 @@ -82,4 +82,12 @@ #endif // TARGET_DPDK_PNA +// +// Utility macros +// + +// The second macro will have the value of x expanded before stringification. +#define PP_STR_RAW(x) #x +#define PP_STR(x) PP_STR_RAW(x) + #endif // __DASH_TARGET_SPECIFIC__ diff --git a/dash-pipeline/bmv2/dash_metadata.p4 b/dash-pipeline/bmv2/dash_metadata.p4 index 900ab79bf..1882f10a4 100644 --- a/dash-pipeline/bmv2/dash_metadata.p4 +++ b/dash-pipeline/bmv2/dash_metadata.p4 @@ -3,6 +3,9 @@ #include "dash_headers.p4" +#define MAX_ENI 64 +#define MAX_HA_SET 1 + enum bit<32> dash_routing_actions_t { NONE = 0, STATIC_ENCAP = (1 << 0), @@ -14,7 +17,18 @@ enum bit<16> dash_direction_t { INVALID = 0, OUTBOUND = 1, INBOUND = 2 -} +}; + +enum bit<8> dash_packet_source_t { + EXTERNAL = 0, + DPAPP = 1 +}; + +enum bit<8> dash_packet_type_t { + REGULAR = 0, + FLOW_SYNC_REQ = 1, + FLOW_SYNC_ACK = 2 +}; // Pipeline stages: enum bit<16> dash_pipeline_stage_t { @@ -30,7 +44,7 @@ enum bit<16> dash_pipeline_stage_t { // Common stages ROUTING_ACTION_APPLY = 300 -} +}; struct conntrack_data_t { bool allow_in; @@ -75,8 +89,58 @@ struct overlay_rewrite_data_t { IPv6Address sip_mask; IPv6Address dip_mask; } + +enum bit<8> dash_ha_role_t { + DEAD = 0, + ACTIVE = 1, + STANDBY = 2, + STANDALONE = 3, + SWITCHING_TO_ACTIVE = 4 +}; + +enum bit<8> dash_ha_flow_sync_state_t { + FLOW_MISS = 0, + FLOW_CREATED = 1, + FLOW_SYNCED = 2, + FLOW_PENDING_DELETE = 3 +}; + +enum bit<8> dash_ha_flow_sync_op_t { + FLOW_CREATE = 0, + FLOW_UPDATE = 1, + FLOW_DELETE = 2 +}; + +struct ha_data_t { + // + // ENI HA settings + // + bit<16> ha_scope_id; + bit<16> ha_set_id; + dash_ha_role_t ha_role; + + // + // HA set settings + // + bit<1> local_ip_is_v6; + IPv4ORv6Address local_ip; + bit<1> peer_ip_is_v6; + IPv4ORv6Address peer_ip; + bit<16> dp_channel_dst_port; + bit<16> dp_channel_src_port_min; + bit<16> dp_channel_src_port_max; + + // + // HA packet/flow state + // + dash_ha_flow_sync_state_t flow_sync_state; +} struct metadata_t { + // Packet type + dash_packet_source_t packet_source; // TODO: Parse packet source in parser. + dash_packet_type_t packet_type; // TODO: Parse packet type in parser. + // Lookup context dash_direction_t direction; EthernetAddress eni_addr; @@ -112,6 +176,9 @@ struct metadata_t { bool is_fast_path_icmp_flow_redirection_packet; bit<1> fast_path_icmp_flow_redirection_disabled; + // HA + ha_data_t ha; + // Stage transition control dash_pipeline_stage_t target_stage; diff --git a/dash-pipeline/bmv2/dash_pipeline.p4 b/dash-pipeline/bmv2/dash_pipeline.p4 index da4d6f7a7..023bcc3d7 100644 --- a/dash-pipeline/bmv2/dash_pipeline.p4 +++ b/dash-pipeline/bmv2/dash_pipeline.p4 @@ -10,12 +10,11 @@ #include "dash_conntrack.p4" #include "stages/direction_lookup.p4" #include "stages/eni_lookup.p4" +#include "stages/ha.p4" #include "stages/routing_action_apply.p4" #include "stages/metering_update.p4" #include "underlay.p4" -#define MAX_ENI 64 - control dash_ingress( inout headers_t hdr , inout metadata_t meta @@ -92,12 +91,19 @@ control dash_ingress( meta.stage4_dash_acl_group_id = ## prefix ##_stage4_dash_acl_group_id; \ meta.stage5_dash_acl_group_id = ## prefix ##_stage5_dash_acl_group_id; - DEFINE_COUNTER(eni_lb_fast_path_icmp_in_counter, MAX_ENI, name="lb_fast_path_icmp_in", attr_type="stats", action_names="set_eni_attrs") + DEFINE_COUNTER(eni_rx_counter, MAX_ENI, name="rx", attr_type="stats", action_names="set_eni_attrs", order=0) + DEFINE_COUNTER(eni_tx_counter, MAX_ENI, name="tx", attr_type="stats", action_names="set_eni_attrs", order=0) + DEFINE_COUNTER(eni_outbound_rx_counter, MAX_ENI, name="outbound_rx", attr_type="stats", action_names="set_eni_attrs", order=0) + DEFINE_COUNTER(eni_outbound_tx_counter, MAX_ENI, name="outbound_tx", attr_type="stats", action_names="set_eni_attrs", order=0) + DEFINE_COUNTER(eni_inbound_rx_counter, MAX_ENI, name="inbound_rx", attr_type="stats", action_names="set_eni_attrs", order=0) + DEFINE_COUNTER(eni_inbound_tx_counter, MAX_ENI, name="inbound_tx", attr_type="stats", action_names="set_eni_attrs", order=0) + DEFINE_COUNTER(eni_lb_fast_path_icmp_in_counter, MAX_ENI, name="lb_fast_path_icmp_in", attr_type="stats", action_names="set_eni_attrs", order=0) action set_eni_attrs(bit<32> cps, bit<32> pps, bit<32> flows, bit<1> admin_state, + @SalVal[type="sai_object_id_t"] bit<16> ha_scope_id, @SaiVal[type="sai_ip_address_t"] IPv4Address vm_underlay_dip, @SaiVal[type="sai_uint32_t"] bit<24> vm_vni, @SaiVal[type="sai_object_id_t"] bit<16> vnet_id, @@ -144,7 +150,8 @@ control dash_ingress( } meta.meter_policy_id = v4_meter_policy_id; } - + + meta.ha.ha_scope_id = ha_scope_id; meta.fast_path_icmp_flow_redirection_disabled = disable_fast_path_icmp_flow_redirection; } @@ -301,17 +308,23 @@ control dash_ingress( if (meta.eni_data.admin_state == 0) { deny(); } - + + UPDATE_COUNTER(eni_rx_counter, meta.eni_id); if (meta.is_fast_path_icmp_flow_redirection_packet) { UPDATE_COUNTER(eni_lb_fast_path_icmp_in_counter, meta.eni_id); } + ha_stage.apply(hdr, meta); + acl_group.apply(); if (meta.direction == dash_direction_t.OUTBOUND) { + UPDATE_COUNTER(eni_outbound_rx_counter, meta.eni_id); + meta.target_stage = dash_pipeline_stage_t.OUTBOUND_ROUTING; outbound.apply(hdr, meta); } else if (meta.direction == dash_direction_t.INBOUND) { + UPDATE_COUNTER(eni_inbound_rx_counter, meta.eni_id); inbound.apply(hdr, meta); } @@ -339,6 +352,14 @@ control dash_ingress( if (meta.dropped) { drop_action(); + } else { + UPDATE_COUNTER(eni_tx_counter, meta.eni_id); + + if (meta.direction == dash_direction_t.OUTBOUND) { + UPDATE_COUNTER(eni_outbound_tx_counter, meta.eni_id); + } else if (meta.direction == dash_direction_t.INBOUND) { + UPDATE_COUNTER(eni_inbound_tx_counter, meta.eni_id); + } } } } diff --git a/dash-pipeline/bmv2/stages/ha.p4 b/dash-pipeline/bmv2/stages/ha.p4 new file mode 100644 index 000000000..d254c69aa --- /dev/null +++ b/dash-pipeline/bmv2/stages/ha.p4 @@ -0,0 +1,132 @@ +#ifndef _DASH_STAGE_HA_P4_ +#define _DASH_STAGE_HA_P4_ + +control ha_stage(inout headers_t hdr, + inout metadata_t meta) +{ + // + // ENI-level flow operation counters: + // + DEFINE_HIT_COUNTER(flow_created_counter, MAX_ENI, name="flow_created", attr_type="stats", action_names="set_eni_attrs", order=1) + DEFINE_HIT_COUNTER(flow_create_failed_counter, MAX_ENI, name="flow_create_failed", attr_type="stats", action_names="set_eni_attrs", order=1) + DEFINE_HIT_COUNTER(flow_updated_counter, MAX_ENI, name="flow_updated", attr_type="stats", action_names="set_eni_attrs", order=1) + DEFINE_HIT_COUNTER(flow_update_failed_counter, MAX_ENI, name="flow_update_failed", attr_type="stats", action_names="set_eni_attrs", order=1) + DEFINE_HIT_COUNTER(flow_deleted_counter, MAX_ENI, name="flow_deleted", attr_type="stats", action_names="set_eni_attrs", order=1) + DEFINE_HIT_COUNTER(flow_delete_failed_counter, MAX_ENI, name="flow_delete_failed", attr_type="stats", action_names="set_eni_attrs", order=1) + DEFINE_HIT_COUNTER(flow_aged_counter, MAX_ENI, name="flow_aged", attr_type="stats", action_names="set_eni_attrs", order=1) + + // + // ENI-level flow sync packet counters: + // + DEFINE_COUNTER(inline_sync_packet_rx_counter, MAX_ENI, name="inline_sync_packet_rx", attr_type="stats", action_names="set_eni_attrs", order=2) + DEFINE_COUNTER(inline_sync_packet_tx_counter, MAX_ENI, name="inline_sync_packet_tx", attr_type="stats", action_names="set_eni_attrs", order=2) + DEFINE_COUNTER(timed_sync_packet_rx_counter, MAX_ENI, name="timed_sync_packet_rx", attr_type="stats", action_names="set_eni_attrs", order=2) + DEFINE_COUNTER(timed_sync_packet_tx_counter, MAX_ENI, name="timed_sync_packet_tx", attr_type="stats", action_names="set_eni_attrs", order=2) + + // + // ENI-level flow sync request counters: + // - Depends on implementations, the flow sync request could be batched, hence they need to tracked separately. + // - The counters are defined as combination of following things: + // - 3 flow sync operations: create, update, delete. + // - 2 ways of sync: Inline sync and timed sync. + // - Request result: succeeded, failed (unexpected) and ignored (expected and ok to ignore, e.g., more packets arrives before flow sync is acked). + // + #define DEFINE_ENI_FLOW_SYNC_COUNTERS(counter_name) \ + DEFINE_HIT_COUNTER(inline_ ## counter_name ## _req_sent_counter, MAX_ENI, name=PP_STR(inline_ ## counter_name ## _req_sent), attr_type="stats", action_names="set_eni_attrs", order=2) \ + DEFINE_HIT_COUNTER(inline_ ## counter_name ## _req_recv_counter, MAX_ENI, name=PP_STR(inline_ ## counter_name ## _req_recv), attr_type="stats", action_names="set_eni_attrs", order=2) \ + DEFINE_HIT_COUNTER(inline_ ## counter_name ## _req_failed_counter, MAX_ENI, name=PP_STR(inline_ ## counter_name ## _req_failed), attr_type="stats", action_names="set_eni_attrs", order=2) \ + DEFINE_HIT_COUNTER(inline_ ## counter_name ## _req_ignored_counter, MAX_ENI, name=PP_STR(inline_ ## counter_name ## _req_failed), attr_type="stats", action_names="set_eni_attrs", order=2) \ + DEFINE_HIT_COUNTER(inline_ ## counter_name ## _ack_recv_counter, MAX_ENI, name=PP_STR(inline_ ## counter_name ## _ack_recv), attr_type="stats", action_names="set_eni_attrs", order=2) \ + DEFINE_HIT_COUNTER(inline_ ## counter_name ## _ack_failed_counter, MAX_ENI, name=PP_STR(inline_ ## counter_name ## _ack_failed_recv), attr_type="stats", action_names="set_eni_attrs", order=2) \ + DEFINE_HIT_COUNTER(inline_ ## counter_name ## _ack_ignored_counter, MAX_ENI, name=PP_STR(inline_ ## counter_name ## _ack_ignored_recv), attr_type="stats", action_names="set_eni_attrs", order=2) \ + \ + DEFINE_HIT_COUNTER(timed_ ## counter_name ## _req_sent_counter, MAX_ENI, name=PP_STR(timed_ ## counter_name ## _req_sent), attr_type="stats", action_names="set_eni_attrs", order=2) \ + DEFINE_HIT_COUNTER(timed_ ## counter_name ## _req_recv_counter, MAX_ENI, name=PP_STR(timed_ ## counter_name ## _req_recv), attr_type="stats", action_names="set_eni_attrs", order=2) \ + DEFINE_HIT_COUNTER(timed_ ## counter_name ## _req_failed_counter, MAX_ENI, name=PP_STR(timed_ ## counter_name ## _req_failed), attr_type="stats", action_names="set_eni_attrs", order=2) \ + DEFINE_HIT_COUNTER(timed_ ## counter_name ## _req_ignored_counter, MAX_ENI, name=PP_STR(timed_ ## counter_name ## _req_failed), attr_type="stats", action_names="set_eni_attrs", order=2) \ + DEFINE_HIT_COUNTER(timed_ ## counter_name ## _ack_recv_counter, MAX_ENI, name=PP_STR(timed_ ## counter_name ## _ack_recv), attr_type="stats", action_names="set_eni_attrs", order=2) \ + DEFINE_HIT_COUNTER(timed_ ## counter_name ## _ack_failed_counter, MAX_ENI, name=PP_STR(timed_ ## counter_name ## _ack_failed_recv), attr_type="stats", action_names="set_eni_attrs", order=2) \ + DEFINE_HIT_COUNTER(timed_ ## counter_name ## _ack_ignored_counter, MAX_ENI, name=PP_STR(timed_ ## counter_name ## _ack_ignored_recv), attr_type="stats", action_names="set_eni_attrs", order=2) + + DEFINE_ENI_FLOW_SYNC_COUNTERS(flow_create) + DEFINE_ENI_FLOW_SYNC_COUNTERS(flow_update) + DEFINE_ENI_FLOW_SYNC_COUNTERS(flow_delete) + + // + // HA scope: + // + action set_ha_scope_attr( + @SalVal[type="sai_object_id_t"] bit<16> ha_set_id, + @SaiVal[type="sai_dash_ha_role_t"] dash_ha_role_t ha_role, + @SaiVal[isreadonly="true"] bit<32> flow_version + ) { + meta.ha.ha_set_id = ha_set_id; + meta.ha.ha_role = ha_role; + } + + @SaiTable[api = "dash_ha", order=1, isobject="true"] + table ha_scope { + key = { + meta.ha.ha_scope_id : exact @SaiVal[type="sai_object_id_t"]; + } + actions = { + set_ha_scope_attr; + } + } + + // + // HA set: + // + DEFINE_COUNTER(dp_probe_req_rx, MAX_HA_SET, name="dp_probe_req_rx", attr_type="stats", action_names="set_ha_set_attr") + DEFINE_COUNTER(dp_probe_req_tx, MAX_HA_SET, name="dp_probe_req_tx", attr_type="stats", action_names="set_ha_set_attr") + DEFINE_COUNTER(dp_probe_ack_rx, MAX_HA_SET, name="dp_probe_ack_rx", attr_type="stats", action_names="set_ha_set_attr") + DEFINE_COUNTER(dp_probe_ack_tx, MAX_HA_SET, name="dp_probe_ack_tx", attr_type="stats", action_names="set_ha_set_attr") + DEFINE_HIT_COUNTER(dp_probe_failed, MAX_HA_SET, name="dp_probe_failed", attr_type="stats", action_names="set_ha_set_attr") + + action set_ha_set_attr( + bit<1> local_ip_is_v6, + @SaiVal[type="sai_ip_address_t"] IPv4ORv6Address local_ip, + bit<1> peer_ip_is_v6, + @SaiVal[type="sai_ip_address_t"] IPv4ORv6Address peer_ip, + bit<16> dp_channel_dst_port, + bit<16> dp_channel_src_port_min, + bit<16> dp_channel_src_port_max, + bit<32> dp_channel_probe_interval_ms, + bit<32> dp_channel_probe_fail_threshold + ) { + meta.ha.peer_ip_is_v6 = peer_ip_is_v6; + meta.ha.peer_ip = peer_ip; + + meta.ha.dp_channel_dst_port = dp_channel_dst_port; + meta.ha.dp_channel_src_port_min = dp_channel_src_port_min; + meta.ha.dp_channel_src_port_max = dp_channel_src_port_max; + } + + @SaiTable[api = "dash_ha", order=0, isobject="true"] + table ha_set { + key = { + meta.ha.ha_set_id : exact @SaiVal[type="sai_object_id_t"]; + } + actions = { + set_ha_set_attr; + } + } + + apply { + // If HA scope id is not set, then HA is not enabled. + if (meta.ha.ha_scope_id == 0) { + return; + } + ha_scope.apply(); + + // If HA set id is not set, then HA is not enabled. + if (meta.ha.ha_set_id == 0) { + return; + } + ha_set.apply(); + + // TODO: HA state machine handling. + } +} + +#endif /* _DASH_STAGE_HA_P4_ */ diff --git a/documentation/high-avail/ha-api-hld.md b/documentation/high-avail/ha-api-hld.md new file mode 100644 index 000000000..57b804148 --- /dev/null +++ b/documentation/high-avail/ha-api-hld.md @@ -0,0 +1,517 @@ +# DASH High Availablility API + +| Rev | Date | Author | Change Description | +| --- | ---- | ------ | ------------------ | +| 0.1 | 03/02/2024 | Riff Jiang | Initial version | + +1. [1. Terminology](#1-terminology) +2. [2. Background](#2-background) +3. [3. Overview](#3-overview) +4. [4. SAI APIs](#4-sai-apis) + 1. [4.1. HA Set](#41-ha-set) + 2. [4.2. HA Scope](#42-ha-scope) + 3. [4.3. Flow table](#43-flow-table) + 4. [4.4. Flow](#44-flow) + 5. [4.5. ENI](#45-eni) + 6. [4.6. Event notifications](#46-event-notifications) + 7. [4.7. Counters](#47-counters) + 1. [4.7.1. HA set stats](#471-ha-set-stats) + 2. [4.7.2. ENI stats](#472-eni-stats) + 1. [4.7.2.1. ENI-level traffic counters](#4721-eni-level-traffic-counters) + 2. [4.7.2.2. ENI-level flow operation counters](#4722-eni-level-flow-operation-counters) + 3. [4.7.2.3. ENI-level flow sync packet counters](#4723-eni-level-flow-sync-packet-counters) + 4. [4.7.2.4. ENI-level flow sync operations counters](#4724-eni-level-flow-sync-operations-counters) +5. [5. HA in DASH behavior model](#5-ha-in-dash-behavior-model) + 1. [5.1. HA stage](#51-ha-stage) + 2. [5.2. Packet type and flow operations](#52-packet-type-and-flow-operations) + 3. [5.3. Life of the packet](#53-life-of-the-packet) +6. [6. Workflows](#6-workflows) + 1. [6.1. ENI level HA](#61-eni-level-ha) + 1. [6.1.1. HA set and ENI creation](#611-ha-set-and-eni-creation) + 2. [6.1.2. Switchover](#612-switchover) + 3. [6.1.3. Unplanned failover](#613-unplanned-failover) + 4. [6.1.4. Recover from unplanned failover](#614-recover-from-unplanned-failover) + +## 1. Terminology + +| Term | Explanation | +| ---- | ----------- | +| HA | High Availability. | +| NPU | Network Processing Unit. | +| DPU | Data Processing Unit. | +| ENI | Elastic Network Interface. | +| VIP | Virtual IP address. | + +## 2. Background + +The DASH high availability APIs are a set of APIs to support flow HA feature for DASH. It follows the [SmartSwitch high availability design](https://github.com/sonic-net/SONiC/blob/master/doc/smart-switch/high-availability/smart-switch-ha-hld.md) and used to ensure the flow created on the active DPU can be correctly synchronized to the peered DPU. + +For how the network topology is setup and how flow HA works, such as lifetime management, inline sync, bulk sync, and packet format, please refer to the [SmartSwitch high availability design](https://github.com/sonic-net/SONiC/blob/master/doc/smart-switch/high-availability/smart-switch-ha-hld.md). In this doc, we will only focus on the design from SAI API perspective. + +## 3. Overview + +To support the [SmartSwitch HA workflows](https://github.com/sonic-net/SONiC/blob/master/doc/smart-switch/high-availability/smart-switch-ha-hld.md), from SAI perspective, there are a few key components involved: + +- **HA set**: It defines the peer DPU and data plane channel that used for inline flow sync. +- **HA scope**: It controls the failover scope, such as HA role, such as active, standby or standalone, and expected flow version for new flows. Depends on the HA role of the ENI, the packet will be processed differently to get the flow sync'ed. +- **Flow table**: It is the container of all flow entries. It can be attached to all ENIs in a DPU or being attached to a single DPU, depends on at which level we like to provide the flow HA, i.e. HA scope. +- **Flow**: It is used to represent a network connection, which contains match conditions and packet transformations. In HA, each flow will have it own HA-related states, such flow version, flow sync state and etc. +- **ENI**: In ENI-level HA, each ENI will be connected to a HA scope. + +The components is designed to be conceptually simple and reusable, hence we can use these components to support different HA setup. For example, to support the current ENI-level HA design, these components can be put together as below: + +![](./images/ha-components-eni-level-ha.svg) + +If more complex scenario support is needed, we can reuse the same components to represent them easily. For example, here is the topology that can be used to represent DPU-level HA: + +![](./images/ha-components-dpu-level-ha.svg) + +Or, with even more complicated topology, such as each ENI owns 2 dedicated flow tables and a single DPU can pair with multiple DPUs: + +![](./images/ha-components-complex.svg) + +> Note: The graphs here is to show the flexibiilty of these concepts, but not require all the possible topologies to be supported by the DASH providers. + +## 4. SAI APIs + +To illustrate how these concepts look, their SAI API design is shown as below. + +### 4.1. HA Set + +HA set is defined as a SAI object and contains the following SAI attributes: + +| Attribute name | Type | Description | +| -------------- | ---- | ----------- | +| SAI_HA_SET_ATTR_LOCAL_IP | sai_ip_address_t | The IP address of the local DPU. | +| SAI_HA_SET_ATTR_PEER_IP | sai_ip_address_t | The IP address of the peer DPU. | +| SAI_HA_SET_ATTR_DP_CHANNEL_DST_PORT | sai_uint16_t | The destination port of the data plane channel. | +| SAI_HA_SET_ATTR_DP_CHANNEL_SRC_PORT_MIN | sai_uint16_t | The minimum source port of the data plane channel. | +| SAI_HA_SET_ATTR_DP_CHANNEL_SRC_PORT_MAX | sai_uint16_t | The maximum source port of the data plane channel. | +| SAI_HA_SET_ATTR_DP_CHANNEL_PROBE_INTERVAL_MS | sai_uint32_t | The interval of the data plane channel probe. | +| SAI_HA_SET_ATTR_DP_CHANNEL_PROBE_FAIL_THRESHOLD | sai_uint32_t | The threshold of the data plane channel probe fail. | + +### 4.2. HA Scope + +HA scope is also defined as a SAI object and contains the following SAI attributes: + +| Attribute name | Type | Description | +| -------------- | ---- | ----------- | +| SAI_HA_SCOPE_ATTR_HA_SET_ID | sai_object_id_t | The HA set ID for this scope. | +| SAI_HA_SCOPE_ATTR_HA_ROLE | sai_dash_ha_role_t | The HA role. | +| SAI_HA_SCOPE_ATTR_FLOW_VERSION | sai_uint32_t | The flow version for new flows. | + +### 4.3. Flow table + +HA uses the DASH flow table to achieve the flow state manipulation. Since the flow table already provides the CRUD operations, we don't need any extra APIs from flow table. + +For more information, please refer to DASH flow API documentation. + +### 4.4. Flow + +To support HA, each flow contains the following SAI attributes: + +| Attribute name | Type | Description | +| -------------- | ---- | ----------- | +| SAI_FLOW_ATTR_FLOW_VERSION | sai_uint32_t | The flow version. | +| SAI_FLOW_ATTR_FLOW_SYNC_STATE | sai_dash_ha_flow_sync_state_t | The flow sync state. | + +The flow sync state is defined as below: + +```c +typedef enum _sai_dash_ha_flow_sync_state_t +{ + SAI_DASH_HA_FLOW_SYNC_STATE_FLOW_MISS, + SAI_DASH_HA_FLOW_SYNC_STATE_FLOW_CREATED, + SAI_DASH_HA_FLOW_SYNC_STATE_FLOW_SYNCED, + SAI_DASH_HA_FLOW_SYNC_STATE_FLOW_PENDING_DELETE +} sai_dash_ha_flow_sync_state_t; +``` + +The flow sync state is a small state machine that represents if the flow is sync'ed or not, so we can make the packet processing decision accordingly to achieve HA. + +```mermaid +stateDiagram-v2 + M: FLOW_MISS + C: FLOW_CREATED + S: FLOW_SYNCED + D: FLOW_PENDING_DELETE + + M --> C: Flow created + M --> S: Flow sync request
received on
standby node + + C --> S: Flow sync ack received + S --> C: Flow resimulated + + C --> D: Flow deleted + S --> D: Flow deleted + + D --> M: Flow sync ack received +``` + + +For more information, please refer to DASH flow API documentation. + +### 4.5. ENI + +To provide the ENI-level HA control, each ENI will have the following SAI attributes: + +| Attribute name | Type | Description | +| -------------- | ---- | ----------- | +| SAI_ENI_ATTR_HA_SCOPE_ID | sai_object_id_t | The HA scope ID of the ENI. | + +The HA role is defined as below: + +```c +typedef enum _sai_dash_ha_role_t +{ + SAI_DASH_HA_ROLE_DEAD, + SAI_DASH_HA_ROLE_ACTIVE, + SAI_DASH_HA_ROLE_STANDBY, + SAI_DASH_HA_ROLE_STANDALONE, + SAI_DASH_HA_ROLE_SWITCHING_TO_ACTIVE, +} sai_dash_ha_role_t; +``` + +### 4.6. Event notifications + +To receive the HA state updates from the DASH implementation, the following SAI notification attributes are added on the switch object: + +| Attribute name | Type | Description | +| -------------- | ---- | ----------- | +| SAI_SWITCH_ATTR_HA_SCOPE_EVENT_NOTIFY | sai_ha_scope_event_notification_fn | The callback function for receiving events on the HA scope. | + +And the callback function and HA state changed event is defined as below: + +```c +/** + * @brief HA scope event type + */ +typedef enum _sai_ha_scope_event_t +{ + /** Moved to a new HA state. */ + SAI_HA_SCOPE_STATE_CHANGED, + +} sai_ha_scope_event_t; + +/** + * @brief Notification data format received from SAI HA scope callback + * + * @count attr[attr_count] + */ +typedef struct _sai_ha_scope_event_data_t +{ + /** Event type */ + sai_ha_scope_event_t event_type; + + /** HA scope id */ + sai_object_id_t ha_scope_id; + + /** Attributes count */ + uint32_t attr_count; + + /** + * @brief Attributes + * + * @objects SAI_OBJECT_TYPE_HA_SCOPE + */ + sai_attribute_t *attr; + +} sai_ha_scope_event_data_t; + +/** + * @brief HA scope event notification + * + * Passed as a parameter into sai_initialize_switch() + * + * @count data[count] + * + * @param[in] count Number of notifications + * @param[in] data Array of HA scope events + */ +typedef void (*sai_ha_scope_event_notification_fn)( + _In_ uint32_t count, + _In_ const sai_ha_scope_event_data_t *ha_scope_event_data); +``` + +### 4.7. Counters + +To check how HA works, we will provide the following counters, which follows the [SmartSwitch HA detailed design doc](https://github.com/sonic-net/SONiC/blob/master/doc/smart-switch/high-availability/smart-switch-ha-detailed-design.md). + +#### 4.7.1. HA set stats + +Here are the new stats we added for monitoring HA on HA set (DPU pair): + +| SAI stats name | Description | +| -------------- | ----------- | +| SAI_HA_SET_STAT_DP_PROBE_(REQ/ACK)_RX_BYTES | The bytes of data plane probes that this HA set received. | +| SAI_HA_SET_STAT_DP_PROBE_(REQ/ACK)_RX_PACKETS | The number of packets of data plane probes that this HA set received. | +| SAI_HA_SET_STAT_DP_PROBE_(REQ/ACK)_TX_BYTES | The bytes of data plane probes that this HA set sent. | +| SAI_HA_SET_STAT_DP_PROBE_(REQ/ACK)_TX_PACKETS | The number of packets of data plane probes that this HA set sent. | +| SAI_HA_SET_STAT_DP_PROBE_FAILED | The number of probes that failed. The failure rate = the number of failed probes / the number of tx packets. | + +#### 4.7.2. ENI stats + +##### 4.7.2.1. ENI-level traffic counters + +To monitor the traffic on ENI level, the following stats are added: + +| SAI stats name | Description | +| -------------- | ----------- | +| SAI_ENI_STAT_(/OUTBOUND_/INBOUND_)RX_BYTES | Total bytes recevied on ENI (overall/outbound/inbound) pipeline. | +| SAI_ENI_STAT_(/OUTBOUND_/INBOUND_)RX_PACKETS | Total number of packets received on ENI (overall/outbound/inbound) pipeline. | +| SAI_ENI_STAT_(/OUTBOUND_/INBOUND_)TX_BYTES | Total bytes sent by ENI (overall/outbound/inbound) pipeline. | +| SAI_ENI_STAT_(/OUTBOUND_/INBOUND_)TX_PACKETS | Total number of packets sent by ENI (overall/outbound/inbound) pipeline. | + +The packet size of all the counters should contain the size of both the customer packet and the encap, to reflect the real traffic size. For the traffic volume of the customer packet only, they are counted using metering buckets. + +##### 4.7.2.2. ENI-level flow operation counters + +Here are the new stats added for monitoring flow operations on each ENI: + +| SAI stats name | Description | +| -------------- | ----------- | +| SAI_ENI_STAT_FLOW_CREATED | Total flow created on ENI. | +| SAI_ENI_STAT_FLOW_CREATE_FAILED | Total flow failed to create on ENI. | +| SAI_ENI_STAT_FLOW_UPDATED | Total flow updated on ENI. | +| SAI_ENI_STAT_FLOW_UPDATE_FAILED | Total flow failed to update on ENI. | +| SAI_ENI_STAT_FLOW_DELETED | Total flow deleted on ENI. | +| SAI_ENI_STAT_FLOW_DELETE_FAILED | Total flow failed to delete on ENI. | +| SAI_ENI_STAT_FLOW_AGED | Total flow aged out on ENI. A flow is aged out doesn't mean the flow entry is deleted. It could be marked as pending deletion and get deleted later. | + +##### 4.7.2.3. ENI-level flow sync packet counters + +Here are the new stats added for monitoring flow sync packets on each ENI: + +- The flow can be sync'ed inline with the packet, or on a timer such as idle timeout. + +| SAI stats name | Description | +| -------------- | ----------- | +| SAI_ENI_STAT_(INLINE/TIMED)_FLOW_SYNC_PACKET_RX_BYTES | The bytes of inline/timed flow sync packet received by the ENI. | +| SAI_ENI_STAT_(INLINE/TIMED)_FLOW_SYNC_PACKET_RX_PACKETS | The number of inline/timed flow sync packets received by the ENI. | +| SAI_ENI_STAT_(INLINE/TIMED)_FLOW_SYNC_PACKET_TX_BYTES | The bytes of inline/timed flow sync packet that this ENI sents. | +| SAI_ENI_STAT_(INLINE/TIMED)_FLOW_SYNC_PACKET_TX_PACKETS | The number of inline/timed flow sync packets that this ENI sents. | + +##### 4.7.2.4. ENI-level flow sync operations counters + +Here are the new stats added for monitoring flow sync operations on each ENI: + +- The number of flow operations can be different from the number of flow sync packets. Depending on implementation, a single flow sync packet can carry multiple flow operations. +- The flow operation could fail or be ignored by ENI. + - Failed means it is unexpected to receive packet, and we failed to process it. + - Ignored means packet is expected be received, but we should ignore the flow operation inside and move on without dropping the packet. E.g., more packet arrives before flow sync is ack'ed. + +| SAI stats name | Description | +| -------------- | ----------- | +| SAI_ENI_STAT_(INLINE/TIMED)\_FLOW\_(CREATE/UPDATE/DELETE)_REQ_SENT | The number of inline/timed flow create/update/delete request that the ENI sent. | +| SAI_ENI_STAT_(INLINE/TIMED)\_FLOW\_(CREATE/UPDATE/DELETE)_REQ_RECV | The number of inline/timed flow create/update/delete request that the ENI received. | +| SAI_ENI_STAT_(INLINE/TIMED)\_FLOW\_(CREATE/UPDATE/DELETE)_REQ_FAILED | The number of inline/timed flow create/update/delete request that the ENI received but failed to process. | +| SAI_ENI_STAT_(INLINE/TIMED)\_FLOW\_(CREATE/UPDATE/DELETE)_REQ_IGNORED | The number of inline/timed flow create/update/delete request that the ENI received but its flow operation is processed as ignored. | +| SAI_ENI_STAT_(INLINE/TIMED)\_FLOW\_(CREATE/UPDATE/DELETE)_ACK_RECV | The number of inline/timed flow create/update/delete ack that the ENI is received. | +| SAI_ENI_STAT_(INLINE/TIMED)\_FLOW\_(CREATE/UPDATE/DELETE)_ACK_FAILED | The number of inline/timed flow create/update/delete ack that the ENI is received but failed to process. | +| SAI_ENI_STAT_(INLINE/TIMED)\_FLOW\_(CREATE/UPDATE/DELETE)_ACK_IGNORED | The number of inline/timed flow create/update/delete ack that the ENI is received but its flow operation is processed as ignored. | + +## 5. HA in DASH behavior model + +With these concepts, we can now use them to build a pipeline with flow HA supported. + +To illustrate how HA works in DASH, we implements HA in our behavior model. However it should only be treated as a logical example, but doesn't require the DASH providers to follow the exact same implementation. The goal is still to satisfy the requirements that defined in [our HA HLD](https://github.com/sonic-net/SONiC/blob/master/doc/smart-switch/high-availability/smart-switch-ha-hld.md). + +> NOTE: This work is currently in progress, but how it will work is described here. + +### 5.1. HA stage + +In the DASH pipeline, the HA can be considered as a substage of the conntrack lookup or a stage that follows the conntrack lookup. + +When a packet arrives, it will: + +1. From ENI, pick up the flow table id and HA scope id. +2. From flow table, pick up the current flow info, such as if flow exists and sync'ed. +3. From HA scope id, pick up the HA set id, HA role and expected flow version if it will create a new flow. +4. From HA set, pick up the peer DPU information. + +Then, based on these information, the HA stage will make a decision on whether this packet should be forwarded directly, or going through slow path to get flow created or updated, or tunneled to its peer for flow sync or sending to active. + +### 5.2. Packet type and flow operations + +To support the flow HA, due to the nature of having different type of packets, such as regular packets vs flow sync packets, and different type of flow operations, such as flow creations vs flow updates, we have these things defined: + +```c +// Packet source +enum bit<8> dash_packet_source_t { + EXTERNAL = 0, + DPAPP = 1, + PEER = 2, +}; + +// Packet type +enum bit<8> dash_packet_type_t { + REGULAR = 0, + FLOW_SYNC_REQ = 1, + FLOW_SYNC_ACK = 2 +}; + +// HA flow sync operations +enum bit<8> dash_ha_flow_sync_op_t { + FLOW_CREATE = 0, + FLOW_UPDATE = 1, + FLOW_DELETE = 2 +}; +``` + +### 5.3. Life of the packet + +With these ingredients, we can enhance our current DASH behavior model to support flow HA. + +To simplify the scenario, let's say a packet arrives at the active DPU in active-standby steady state. At a high level, the packet will transverse our pipeline as below, which syncs the flow inline: + +![](./images/ha-bm-packet-flow.svg) + +1. (Green Lines) First, the packet for a new flow arrives to the active DPU. After ENI lookup, it will try to find the flow in the flow table. Due to flow being missing, the packet will go through the rest of the pipeline and eventually trapped into the data plane app. +2. (Black Lines in Active DPU) The data plane app will get the lookup result from the packet and insert the flow entry in not sync'ed state to flow table using the DASH SAI flow APIs. +3. (Red Line in Active DPU) The data plane app recirculate the packet back to the pipeline and hit the flow table again, because the flow is not sync'ed and this ENI is active side, HA stage will kick in and change this packet to a flow sync request packet, then forward it to its peer DPU. +4. (Yellow Lines) The flow sync packet goes to the standby side, which will also hit the flow miss, skip the HA stage and trapped into data plane app. +5. (Black Line in Standby DPU) The data plane app takes the flow decision from the flow sync packet, inserts the flow entry in sync'ed state. +6. (Red Lines in Standby DPU) The data plane app recirculate the packet back to the pipeline and hit the flow table again, because the packet is a flow sync request and flow is in sync'ed state, HA stage will transform this packet into flow sync ack and sending it back to its active side. +7. (Blue Lines) The packet goes back to the active side, which will hit the flow again. Since it is flow sync ack and flow is in not-sync'ed state, the packet will be trapped into data plane app again. +8. (Black Line in Active DPU) The data plane app will update the flow sync state into sync'ed state and recirculate the packet again. +9. (Purple Lines) The final packet will be sent to the pipeline again, hit the flow entry in sync'ed state, applying all the transformation and send out to the network. + +## 6. Workflows + +### 6.1. ENI level HA + +In ENI-level HA, the HA SAI APIs is designed to act passively according to the HA control plane decision. + +To explain how HA API works from SAI API perspective, here are the examples of the common HA workflows. + +> NOTE: The following workflow only shows the sequence of how each SAI API being called. But to achieve HA, besides this, we have HA control plane services that drives the HA state machine which ensures the APIs are called in the expected sequence. For more information, please refer to the planned operations and unplanned operations in [our HA HLD](https://github.com/sonic-net/SONiC/blob/master/doc/smart-switch/high-availability/smart-switch-ha-hld.md). + +#### 6.1.1. HA set and ENI creation + +The first step to start using HA is to create the HA set to form the DPU pair. + +> The full workflow can be found in the ["Clean launch on both sides" section in SmartSwitch HA HLD](https://github.com/sonic-net/SONiC/blob/master/doc/smart-switch/high-availability/smart-switch-ha-hld.md#811-clean-launch-on-both-sides). + +```mermaid +sequenceDiagram + autonumber + + participant SDN as SDN Controller + participant S0D as Switch 0 DPU
(Desired Active) + participant S0N as Switch 0 NPU + participant S1N as Switch 1 NPU + participant S1D as Switch 1 DPU
(Desired Standby) + + SDN->>S0N: Create HA set on all switches + SDN->>S1N: Create HA set on all switches + + S0N->>S0D: Create HA set on DPU
(sai: create_ha_set) + S1N->>S1D: Create HA set on DPU
(sai: create_ha_set) + Note over S0D,S1D: DPU starts data path probe to peer DPU.
After this, ENIs can be created. + + SDN->>S0N: Create ENI with HA set ID as dead state + SDN->>S1N: Create ENI with HA set ID as dead state + + S0N->>S0D: Create ENI with HA set ID as dead role
(sai: create_eni) + S1N->>S1D: Create ENI with HA set ID as dead role
(sai: create_eni) + Note over SDN,S1D: SDN controller programs the SDN policy for both ENI. + + SDN->>S0N: Update ENI with desired state as active. + SDN->>S1N: Update ENI with desired state as empty. + Note over S0N,S1N: hamgrd driving HA state machine + + S1N->>S1D: Update ENI with standby role.
(sai: set_eni_attribute) + Note over S0N,S1N: hamgrd continue to drive
HA state machine + S0N->>S0D: Update ENI with active role.
(sai: set_eni_attribute) + + Note over S0N,S1N: hamgrd continue to drive HA
state machine and update
nexthop on all switches. +``` + +#### 6.1.2. Switchover + +After the DPU goes into steady state, we can start initiating the switchover process. + +In the ENI-level HA, the switchover starts from the standby side, so we can make sure the standby is ready to pick up the traffic before switching over. + +> The full workflow can be found in the ["Planned switchover" section in SmartSwitch HA HLD](https://github.com/sonic-net/SONiC/blob/master/doc/smart-switch/high-availability/smart-switch-ha-hld.md#82-planned-switchover). + +```mermaid +sequenceDiagram + autonumber + + participant SDN as SDN Controller + participant S0D as Switch 0 DPU
(Active->Standby) + participant S0N as Switch 0 NPU + participant S1N as Switch 1 NPU + participant S1D as Switch 1 DPU
(Standby->Active) + + SDN->>S0N: Update ENI with desired state as empty. + SDN->>S1N: Update ENI with desired state as active. + Note over S0N,S1N: hamgrd gets approval from
upstream service and drives
HA state machine + + S1N->>S1D: Update ENI with SwitchingToActive role.
(sai: set_eni_attribute) + Note over S0N,S1N: hamgrd continue to drive
HA state machine + S0N->>S0D: Update ENI with standby role.
(sai: set_eni_attribute) + Note over S0N,S1N: hamgrd continue to drive
HA state machine + S1N->>S1D: Update ENI with active role.
(sai: set_eni_attribute) + + Note over S0N,S1N: hamgrd continue to drive
HA state machine and
update nexthop on all switches. +``` + +#### 6.1.3. Unplanned failover + +Whenever the network or one DPU having problem, unplanned failover will be triggered. + +In ENI-level HA, the unplanned events are monitored by the HA control plane via various of different event sources, such as PMON, counters and etc. Once any event is triggered, we will trigger the unplanned failover as below: + +> The full workflow can be found in the ["Working as standalone setup" section in SmartSwitch HA HLD](https://github.com/sonic-net/SONiC/blob/master/doc/smart-switch/high-availability/smart-switch-ha-hld.md#101-working-as-standalone-setup). + +```mermaid +sequenceDiagram + autonumber + + participant S0D as Switch 0 DPU
(Active->Dead) + participant S0N as Switch 0 NPU + participant S1N as Switch 1 NPU + participant S1D as Switch 1 DPU
(Standby->Standalone) + + S0N->>S0N: PMON detects DPU0 is dead. + Note over S0N,S1N: hamgrd receives the health
signal and drives HA state
machine + + S1N->>S1D: Update ENI with Standalone role.
(sai: set_eni_attribute) + + Note over S0N,S1N: hamgrd continue to drive
HA state machine and
update nexthop on all switches. +``` + +#### 6.1.4. Recover from unplanned failover + +During recovery, bulk sync will be used to ensure both sides contains the same set of flows. + +In ENI-level HA today, perfect sync will be used as the default mechanism to sync the flows. It is done via the DASH flow APIs as below. For more information, please refer to the DASH flow API design doc. + +> The full workflow can be found in the ["Launch with standalone peer" section in SmartSwitch HA HLD](https://github.com/sonic-net/SONiC/blob/master/doc/smart-switch/high-availability/smart-switch-ha-hld.md#812-launch-with-standalone-peer). + +```mermaid +sequenceDiagram + autonumber + + participant SDN as SDN Controller + participant S0D as Switch 0 DPU
(Standalone->Active) + participant S0N as Switch 0 NPU + participant S1N as Switch 1 NPU + participant S1D as Switch 1 DPU
(Empty->Standby) + + S1D->>S1D: Coming back from dead. + S1N->>S1D: Create HA set on DPU
(sai: create_ha_set) + Note over S0D,S1D: DPU starts data path probe to peer DPU. + + SDN->>S1N: Reconcile SDN policy for the new DPU. + S1N->>S1D: Create ENI with HA set ID as dead role
(sai: create_eni) + Note over S0N,S1N: hamgrd driving HA state machine. + + S1N->>S1D: Update ENI with standby role.
(sai: set_eni_attribute) + Note over S0N,S1N: hamgrd continue to drive
HA state machine + S0N->>S0D: Update ENI with active role.
(sai: set_eni_attribute) + + Note over S0N,S1N: hamgrd continue to drive HA
state machine and update
nexthop on all switches. +``` diff --git a/documentation/high-avail/images/ha-bm-packet-flow.svg b/documentation/high-avail/images/ha-bm-packet-flow.svg new file mode 100644 index 000000000..1d6165402 --- /dev/null +++ b/documentation/high-avail/images/ha-bm-packet-flow.svg @@ -0,0 +1,4 @@ + + + +
Active
Data Plane App
P4 Pipeline
Parser
ENI Pipeline
ConnTrack
Lookup
Match
Stages
Action
Apply
Trap
Forward
HA
Flow Sync Req
Direction/ENI Lookup
In
Recirculate Initial Packet
Call Flow API to create flow
in not-sync'ed state or update
flow to sync'ed state
Recirculate Flow Sync Ack Packet
Legends
Packet circulated
by data plane app
Initial Packet
SAI API Call
Flow Sync Req
Flow Sync Ack
Final Packet
Standby
Data Plane App
P4 Pipeline
Parser
ENI Pipeline
ConnTrack
Lookup
Flow Sync Ack
Match
Stages
Action
Apply
Trap
Forward
HA
Direction/ENI Lookup
Recirculate Flow Sync Req Packet
Call Flow API to
create flow in
sync'ed state
Out
\ No newline at end of file diff --git a/documentation/high-avail/images/ha-components-complex.svg b/documentation/high-avail/images/ha-components-complex.svg new file mode 100644 index 000000000..fc050af3e --- /dev/null +++ b/documentation/high-avail/images/ha-components-complex.svg @@ -0,0 +1,4 @@ + + + +
DPU 2
DPU 1
HA Set 1
HA Scope 1
ENI 3
3-tuple Flow Table
5-tuple
Flow Table
ENI 3
3-tuple Flow Table
5-tuple
Flow Table
HA Scope 2
ENI 4
3-tuple Flow Table
5-tuple
Flow Table
ENI 4
3-tuple Flow Table
5-tuple
Flow Table
DPU 0
HA Set 0
HA Scope 0
ENI 0
3-tuple Flow Table
5-tuple
Flow Table
ENI 0
3-tuple Flow Table
5-tuple
Flow Table
ENI 1
3-tuple Flow Table
5-tuple
Flow Table
ENI 1
3-tuple Flow Table
5-tuple
Flow Table
\ No newline at end of file diff --git a/documentation/high-avail/images/ha-components-dpu-level-ha.svg b/documentation/high-avail/images/ha-components-dpu-level-ha.svg new file mode 100644 index 000000000..9f8748ba4 --- /dev/null +++ b/documentation/high-avail/images/ha-components-dpu-level-ha.svg @@ -0,0 +1,4 @@ + + + +
DPU 1
DPU 0
HA Set 0
HA Scope 0
ENI 0
ENI 1
Flow Table
ENI 0
ENI 1
Flow Table
\ No newline at end of file diff --git a/documentation/high-avail/images/ha-components-eni-level-ha.svg b/documentation/high-avail/images/ha-components-eni-level-ha.svg new file mode 100644 index 000000000..db0147eb1 --- /dev/null +++ b/documentation/high-avail/images/ha-components-eni-level-ha.svg @@ -0,0 +1,4 @@ + + + +
DPU 1
DPU 0
HA Set 0
HA Scope 0
ENI 0
HA Scope 1
ENI 1
Flow Table
HA Scope 0
ENI 0
HA Scope 1
ENI 1
Flow Table
\ No newline at end of file From d0fed0cc15282aac199188effe4f7f09cfdaeb97 Mon Sep 17 00:00:00 2001 From: r12f Date: Thu, 7 Mar 2024 07:41:53 +0000 Subject: [PATCH 02/22] fix spellcheck --- .wordlist.txt | 6 +++ documentation/high-avail/ha-api-hld.md | 60 +++++++++++++------------- 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/.wordlist.txt b/.wordlist.txt index 1b0b6257f..463b1a777 100644 --- a/.wordlist.txt +++ b/.wordlist.txt @@ -6,6 +6,7 @@ Accton ACK Ack ack +ack'ed acl ACL ACLs @@ -304,6 +305,7 @@ IxLoad ixload IxNetwork IxNetworkWeb +Jiang Jinja jitter journaled @@ -372,6 +374,7 @@ NonSynStateful NorthBound Novus NPL +NPU NPUS NSG NSGs @@ -471,11 +474,13 @@ README READMEs README's reconvergence +RECV RedirectRuleResimulatedUf redis renderer repo repos +REQ resimulated resimulation responder @@ -534,6 +539,7 @@ SmartAppliances SmartNIC SmartNic SmartNICs +SmartSwitch SmartSwitches snappi SNAT diff --git a/documentation/high-avail/ha-api-hld.md b/documentation/high-avail/ha-api-hld.md index 57b804148..fd3a3c697 100644 --- a/documentation/high-avail/ha-api-hld.md +++ b/documentation/high-avail/ha-api-hld.md @@ -1,4 +1,4 @@ -# DASH High Availablility API +# DASH High Availability API | Rev | Date | Author | Change Description | | --- | ---- | ------ | ------------------ | @@ -53,7 +53,7 @@ For how the network topology is setup and how flow HA works, such as lifetime ma To support the [SmartSwitch HA workflows](https://github.com/sonic-net/SONiC/blob/master/doc/smart-switch/high-availability/smart-switch-ha-hld.md), from SAI perspective, there are a few key components involved: - **HA set**: It defines the peer DPU and data plane channel that used for inline flow sync. -- **HA scope**: It controls the failover scope, such as HA role, such as active, standby or standalone, and expected flow version for new flows. Depends on the HA role of the ENI, the packet will be processed differently to get the flow sync'ed. +- **HA scope**: It controls the failover scope, such as HA role, such as active, standby or standalone, and expected flow version for new flows. Depends on the HA role of the ENI, the packet will be processed differently to get the flow synched. - **Flow table**: It is the container of all flow entries. It can be attached to all ENIs in a DPU or being attached to a single DPU, depends on at which level we like to provide the flow HA, i.e. HA scope. - **Flow**: It is used to represent a network connection, which contains match conditions and packet transformations. In HA, each flow will have it own HA-related states, such flow version, flow sync state and etc. - **ENI**: In ENI-level HA, each ENI will be connected to a HA scope. @@ -82,13 +82,13 @@ HA set is defined as a SAI object and contains the following SAI attributes: | Attribute name | Type | Description | | -------------- | ---- | ----------- | -| SAI_HA_SET_ATTR_LOCAL_IP | sai_ip_address_t | The IP address of the local DPU. | -| SAI_HA_SET_ATTR_PEER_IP | sai_ip_address_t | The IP address of the peer DPU. | -| SAI_HA_SET_ATTR_DP_CHANNEL_DST_PORT | sai_uint16_t | The destination port of the data plane channel. | -| SAI_HA_SET_ATTR_DP_CHANNEL_SRC_PORT_MIN | sai_uint16_t | The minimum source port of the data plane channel. | -| SAI_HA_SET_ATTR_DP_CHANNEL_SRC_PORT_MAX | sai_uint16_t | The maximum source port of the data plane channel. | -| SAI_HA_SET_ATTR_DP_CHANNEL_PROBE_INTERVAL_MS | sai_uint32_t | The interval of the data plane channel probe. | -| SAI_HA_SET_ATTR_DP_CHANNEL_PROBE_FAIL_THRESHOLD | sai_uint32_t | The threshold of the data plane channel probe fail. | +| SAI_HA_SET_ATTR_LOCAL_IP | `sai_ip_address_t` | The IP address of the local DPU. | +| SAI_HA_SET_ATTR_PEER_IP | `sai_ip_address_t` | The IP address of the peer DPU. | +| SAI_HA_SET_ATTR_DP_CHANNEL_DST_PORT | `sai_uint16_t` | The destination port of the data plane channel. | +| SAI_HA_SET_ATTR_DP_CHANNEL_SRC_PORT_MIN | `sai_uint16_t` | The minimum source port of the data plane channel. | +| SAI_HA_SET_ATTR_DP_CHANNEL_SRC_PORT_MAX | `sai_uint16_t` | The maximum source port of the data plane channel. | +| SAI_HA_SET_ATTR_DP_CHANNEL_PROBE_INTERVAL_MS | `sai_uint32_t` | The interval of the data plane channel probe. | +| SAI_HA_SET_ATTR_DP_CHANNEL_PROBE_FAIL_THRESHOLD | `sai_uint32_t` | The threshold of the data plane channel probe fail. | ### 4.2. HA Scope @@ -96,9 +96,9 @@ HA scope is also defined as a SAI object and contains the following SAI attribut | Attribute name | Type | Description | | -------------- | ---- | ----------- | -| SAI_HA_SCOPE_ATTR_HA_SET_ID | sai_object_id_t | The HA set ID for this scope. | -| SAI_HA_SCOPE_ATTR_HA_ROLE | sai_dash_ha_role_t | The HA role. | -| SAI_HA_SCOPE_ATTR_FLOW_VERSION | sai_uint32_t | The flow version for new flows. | +| SAI_HA_SCOPE_ATTR_HA_SET_ID | `sai_object_id_t` | The HA set ID for this scope. | +| SAI_HA_SCOPE_ATTR_HA_ROLE | `sai_dash_ha_role_`t` | The HA role. | +| SAI_HA_SCOPE_ATTR_FLOW_VERSION | `sai_uint32_t` | The flow version for new flows. | ### 4.3. Flow table @@ -112,8 +112,8 @@ To support HA, each flow contains the following SAI attributes: | Attribute name | Type | Description | | -------------- | ---- | ----------- | -| SAI_FLOW_ATTR_FLOW_VERSION | sai_uint32_t | The flow version. | -| SAI_FLOW_ATTR_FLOW_SYNC_STATE | sai_dash_ha_flow_sync_state_t | The flow sync state. | +| SAI_FLOW_ATTR_FLOW_VERSION | `sai_uint32_t` | The flow version. | +| SAI_FLOW_ATTR_FLOW_SYNC_STATE | `sai_dash_ha_flow_sync_state_t` | The flow sync state. | The flow sync state is defined as below: @@ -127,7 +127,7 @@ typedef enum _sai_dash_ha_flow_sync_state_t } sai_dash_ha_flow_sync_state_t; ``` -The flow sync state is a small state machine that represents if the flow is sync'ed or not, so we can make the packet processing decision accordingly to achieve HA. +The flow sync state is a small state machine that represents if the flow is synched or not, so we can make the packet processing decision accordingly to achieve HA. ```mermaid stateDiagram-v2 @@ -157,7 +157,7 @@ To provide the ENI-level HA control, each ENI will have the following SAI attrib | Attribute name | Type | Description | | -------------- | ---- | ----------- | -| SAI_ENI_ATTR_HA_SCOPE_ID | sai_object_id_t | The HA scope ID of the ENI. | +| SAI_ENI_ATTR_HA_SCOPE_ID | `sai_object_id_t` | The HA scope ID of the ENI. | The HA role is defined as below: @@ -178,7 +178,7 @@ To receive the HA state updates from the DASH implementation, the following SAI | Attribute name | Type | Description | | -------------- | ---- | ----------- | -| SAI_SWITCH_ATTR_HA_SCOPE_EVENT_NOTIFY | sai_ha_scope_event_notification_fn | The callback function for receiving events on the HA scope. | +| SAI_SWITCH_ATTR_HA_SCOPE_EVENT_NOTIFY | `sai_ha_scope_event_notification_fn` | The callback function for receiving events on the HA scope. | And the callback function and HA state changed event is defined as below: @@ -257,7 +257,7 @@ To monitor the traffic on ENI level, the following stats are added: | SAI stats name | Description | | -------------- | ----------- | -| SAI_ENI_STAT_(/OUTBOUND_/INBOUND_)RX_BYTES | Total bytes recevied on ENI (overall/outbound/inbound) pipeline. | +| SAI_ENI_STAT_(/OUTBOUND_/INBOUND_)RX_BYTES | Total bytes received on ENI (overall/outbound/inbound) pipeline. | | SAI_ENI_STAT_(/OUTBOUND_/INBOUND_)RX_PACKETS | Total number of packets received on ENI (overall/outbound/inbound) pipeline. | | SAI_ENI_STAT_(/OUTBOUND_/INBOUND_)TX_BYTES | Total bytes sent by ENI (overall/outbound/inbound) pipeline. | | SAI_ENI_STAT_(/OUTBOUND_/INBOUND_)TX_PACKETS | Total number of packets sent by ENI (overall/outbound/inbound) pipeline. | @@ -282,14 +282,14 @@ Here are the new stats added for monitoring flow operations on each ENI: Here are the new stats added for monitoring flow sync packets on each ENI: -- The flow can be sync'ed inline with the packet, or on a timer such as idle timeout. +- The flow can be synched inline with the packet, or on a timer such as idle timeout. | SAI stats name | Description | | -------------- | ----------- | | SAI_ENI_STAT_(INLINE/TIMED)_FLOW_SYNC_PACKET_RX_BYTES | The bytes of inline/timed flow sync packet received by the ENI. | | SAI_ENI_STAT_(INLINE/TIMED)_FLOW_SYNC_PACKET_RX_PACKETS | The number of inline/timed flow sync packets received by the ENI. | -| SAI_ENI_STAT_(INLINE/TIMED)_FLOW_SYNC_PACKET_TX_BYTES | The bytes of inline/timed flow sync packet that this ENI sents. | -| SAI_ENI_STAT_(INLINE/TIMED)_FLOW_SYNC_PACKET_TX_PACKETS | The number of inline/timed flow sync packets that this ENI sents. | +| SAI_ENI_STAT_(INLINE/TIMED)_FLOW_SYNC_PACKET_TX_BYTES | The bytes of inline/timed flow sync packet that this ENI sent. | +| SAI_ENI_STAT_(INLINE/TIMED)_FLOW_SYNC_PACKET_TX_PACKETS | The number of inline/timed flow sync packets that this ENI sent. | ##### 4.7.2.4. ENI-level flow sync operations counters @@ -320,12 +320,12 @@ To illustrate how HA works in DASH, we implements HA in our behavior model. Howe ### 5.1. HA stage -In the DASH pipeline, the HA can be considered as a substage of the conntrack lookup or a stage that follows the conntrack lookup. +In the DASH pipeline, the HA can be considered as a sub-stage of the Conntrack Lookup stage or a stage that follows the Conntrack Lookup stage. When a packet arrives, it will: 1. From ENI, pick up the flow table id and HA scope id. -2. From flow table, pick up the current flow info, such as if flow exists and sync'ed. +2. From flow table, pick up the current flow info, such as if flow exists and synched. 3. From HA scope id, pick up the HA set id, HA role and expected flow version if it will create a new flow. 4. From HA set, pick up the peer DPU information. @@ -367,14 +367,14 @@ To simplify the scenario, let's say a packet arrives at the active DPU in active ![](./images/ha-bm-packet-flow.svg) 1. (Green Lines) First, the packet for a new flow arrives to the active DPU. After ENI lookup, it will try to find the flow in the flow table. Due to flow being missing, the packet will go through the rest of the pipeline and eventually trapped into the data plane app. -2. (Black Lines in Active DPU) The data plane app will get the lookup result from the packet and insert the flow entry in not sync'ed state to flow table using the DASH SAI flow APIs. -3. (Red Line in Active DPU) The data plane app recirculate the packet back to the pipeline and hit the flow table again, because the flow is not sync'ed and this ENI is active side, HA stage will kick in and change this packet to a flow sync request packet, then forward it to its peer DPU. +2. (Black Lines in Active DPU) The data plane app will get the lookup result from the packet and insert the flow entry in not synched state to flow table using the DASH SAI flow APIs. +3. (Red Line in Active DPU) The data plane app recirculate the packet back to the pipeline and hit the flow table again, because the flow is not synched and this ENI is active side, HA stage will kick in and change this packet to a flow sync request packet, then forward it to its peer DPU. 4. (Yellow Lines) The flow sync packet goes to the standby side, which will also hit the flow miss, skip the HA stage and trapped into data plane app. -5. (Black Line in Standby DPU) The data plane app takes the flow decision from the flow sync packet, inserts the flow entry in sync'ed state. -6. (Red Lines in Standby DPU) The data plane app recirculate the packet back to the pipeline and hit the flow table again, because the packet is a flow sync request and flow is in sync'ed state, HA stage will transform this packet into flow sync ack and sending it back to its active side. -7. (Blue Lines) The packet goes back to the active side, which will hit the flow again. Since it is flow sync ack and flow is in not-sync'ed state, the packet will be trapped into data plane app again. -8. (Black Line in Active DPU) The data plane app will update the flow sync state into sync'ed state and recirculate the packet again. -9. (Purple Lines) The final packet will be sent to the pipeline again, hit the flow entry in sync'ed state, applying all the transformation and send out to the network. +5. (Black Line in Standby DPU) The data plane app takes the flow decision from the flow sync packet, inserts the flow entry in synched state. +6. (Red Lines in Standby DPU) The data plane app recirculate the packet back to the pipeline and hit the flow table again, because the packet is a flow sync request and flow is in synched state, HA stage will transform this packet into flow sync ack and sending it back to its active side. +7. (Blue Lines) The packet goes back to the active side, which will hit the flow again. Since it is flow sync ack and flow is in not-synched state, the packet will be trapped into data plane app again. +8. (Black Line in Active DPU) The data plane app will update the flow sync state into synched state and recirculate the packet again. +9. (Purple Lines) The final packet will be sent to the pipeline again, hit the flow entry in synched state, applying all the transformation and send out to the network. ## 6. Workflows From 65c58f90f45981c4202e9abcd541dd4ba5062e04 Mon Sep 17 00:00:00 2001 From: r12f Date: Thu, 7 Mar 2024 20:11:38 +0000 Subject: [PATCH 03/22] fix issues found during presentation. --- dash-pipeline/Makefile | 2 +- dash-pipeline/bmv2/dash_metadata.p4 | 2 +- dash-pipeline/bmv2/dash_pipeline.p4 | 2 +- dash-pipeline/bmv2/stages/ha.p4 | 2 +- documentation/high-avail/ha-api-hld.md | 37 +++++++++++++------------- 5 files changed, 22 insertions(+), 23 deletions(-) diff --git a/dash-pipeline/Makefile b/dash-pipeline/Makefile index 400b4f67c..43bde771d 100644 --- a/dash-pipeline/Makefile +++ b/dash-pipeline/Makefile @@ -92,7 +92,7 @@ sai-submodule: # P4 Source code compile TARGETS ###################################### -P4_SRC=$(wildcard bmv2/%.p4) +P4_SRC=$(wildcard bmv2/**/*.p4) P4_MAIN=bmv2/dash_pipeline.p4 P4_OUTDIR=bmv2/dash_pipeline.bmv2 P4_ARTIFACTS=$(P4_OUTDIR)/dash_pipeline.json $(P4_OUTDIR)/dash_pipeline_p4rt.txt diff --git a/dash-pipeline/bmv2/dash_metadata.p4 b/dash-pipeline/bmv2/dash_metadata.p4 index 1882f10a4..8ce43f05e 100644 --- a/dash-pipeline/bmv2/dash_metadata.p4 +++ b/dash-pipeline/bmv2/dash_metadata.p4 @@ -113,7 +113,7 @@ enum bit<8> dash_ha_flow_sync_op_t { struct ha_data_t { // - // ENI HA settings + // HA scope settings // bit<16> ha_scope_id; bit<16> ha_set_id; diff --git a/dash-pipeline/bmv2/dash_pipeline.p4 b/dash-pipeline/bmv2/dash_pipeline.p4 index 023bcc3d7..69cfe6157 100644 --- a/dash-pipeline/bmv2/dash_pipeline.p4 +++ b/dash-pipeline/bmv2/dash_pipeline.p4 @@ -103,7 +103,7 @@ control dash_ingress( bit<32> pps, bit<32> flows, bit<1> admin_state, - @SalVal[type="sai_object_id_t"] bit<16> ha_scope_id, + @SaiVal[type="sai_object_id_t"] bit<16> ha_scope_id, @SaiVal[type="sai_ip_address_t"] IPv4Address vm_underlay_dip, @SaiVal[type="sai_uint32_t"] bit<24> vm_vni, @SaiVal[type="sai_object_id_t"] bit<16> vnet_id, diff --git a/dash-pipeline/bmv2/stages/ha.p4 b/dash-pipeline/bmv2/stages/ha.p4 index d254c69aa..87aea5517 100644 --- a/dash-pipeline/bmv2/stages/ha.p4 +++ b/dash-pipeline/bmv2/stages/ha.p4 @@ -67,7 +67,7 @@ control ha_stage(inout headers_t hdr, @SaiTable[api = "dash_ha", order=1, isobject="true"] table ha_scope { key = { - meta.ha.ha_scope_id : exact @SaiVal[type="sai_object_id_t"]; + meta.ha.ha_scope_id : exact; } actions = { set_ha_scope_attr; diff --git a/documentation/high-avail/ha-api-hld.md b/documentation/high-avail/ha-api-hld.md index fd3a3c697..bd19004d4 100644 --- a/documentation/high-avail/ha-api-hld.md +++ b/documentation/high-avail/ha-api-hld.md @@ -97,9 +97,22 @@ HA scope is also defined as a SAI object and contains the following SAI attribut | Attribute name | Type | Description | | -------------- | ---- | ----------- | | SAI_HA_SCOPE_ATTR_HA_SET_ID | `sai_object_id_t` | The HA set ID for this scope. | -| SAI_HA_SCOPE_ATTR_HA_ROLE | `sai_dash_ha_role_`t` | The HA role. | +| SAI_HA_SCOPE_ATTR_HA_ROLE | `sai_dash_ha_role_t` | The HA role. | | SAI_HA_SCOPE_ATTR_FLOW_VERSION | `sai_uint32_t` | The flow version for new flows. | +The HA role is defined as below: + +```c +typedef enum _sai_dash_ha_role_t +{ + SAI_DASH_HA_ROLE_DEAD, + SAI_DASH_HA_ROLE_ACTIVE, + SAI_DASH_HA_ROLE_STANDBY, + SAI_DASH_HA_ROLE_STANDALONE, + SAI_DASH_HA_ROLE_SWITCHING_TO_ACTIVE, +} sai_dash_ha_role_t; +``` + ### 4.3. Flow table HA uses the DASH flow table to achieve the flow state manipulation. Since the flow table already provides the CRUD operations, we don't need any extra APIs from flow table. @@ -148,7 +161,6 @@ stateDiagram-v2 D --> M: Flow sync ack received ``` - For more information, please refer to DASH flow API documentation. ### 4.5. ENI @@ -159,19 +171,6 @@ To provide the ENI-level HA control, each ENI will have the following SAI attrib | -------------- | ---- | ----------- | | SAI_ENI_ATTR_HA_SCOPE_ID | `sai_object_id_t` | The HA scope ID of the ENI. | -The HA role is defined as below: - -```c -typedef enum _sai_dash_ha_role_t -{ - SAI_DASH_HA_ROLE_DEAD, - SAI_DASH_HA_ROLE_ACTIVE, - SAI_DASH_HA_ROLE_STANDBY, - SAI_DASH_HA_ROLE_STANDALONE, - SAI_DASH_HA_ROLE_SWITCHING_TO_ACTIVE, -} sai_dash_ha_role_t; -``` - ### 4.6. Event notifications To receive the HA state updates from the DASH implementation, the following SAI notification attributes are added on the switch object: @@ -326,7 +325,7 @@ When a packet arrives, it will: 1. From ENI, pick up the flow table id and HA scope id. 2. From flow table, pick up the current flow info, such as if flow exists and synched. -3. From HA scope id, pick up the HA set id, HA role and expected flow version if it will create a new flow. +3. From HA scope, pick up the HA set id, HA role and expected flow version if it will create a new flow. 4. From HA set, pick up the peer DPU information. Then, based on these information, the HA stage will make a decision on whether this packet should be forwarded directly, or going through slow path to get flow created or updated, or tunneled to its peer for flow sync or sending to active. @@ -352,9 +351,9 @@ enum bit<8> dash_packet_type_t { // HA flow sync operations enum bit<8> dash_ha_flow_sync_op_t { - FLOW_CREATE = 0, - FLOW_UPDATE = 1, - FLOW_DELETE = 2 + FLOW_CREATE = 0, // New flow creation. + FLOW_UPDATE = 1, // Flow resimulation or any other reason causing existing flow to be updated. + FLOW_DELETE = 2 // Flow deletion. }; ``` From f9e15da1ce94294e027abc65ce0f9b8cfcbb65a7 Mon Sep 17 00:00:00 2001 From: r12f Date: Thu, 7 Mar 2024 20:21:16 +0000 Subject: [PATCH 04/22] Fix build. --- dash-pipeline/bmv2/stages/ha.p4 | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dash-pipeline/bmv2/stages/ha.p4 b/dash-pipeline/bmv2/stages/ha.p4 index 87aea5517..15308c51d 100644 --- a/dash-pipeline/bmv2/stages/ha.p4 +++ b/dash-pipeline/bmv2/stages/ha.p4 @@ -57,11 +57,11 @@ control ha_stage(inout headers_t hdr, // action set_ha_scope_attr( @SalVal[type="sai_object_id_t"] bit<16> ha_set_id, - @SaiVal[type="sai_dash_ha_role_t"] dash_ha_role_t ha_role, + @SaiVal[type="sai_dash_ha_role_t"] dash_ha_role_t dash_ha_role, @SaiVal[isreadonly="true"] bit<32> flow_version ) { meta.ha.ha_set_id = ha_set_id; - meta.ha.ha_role = ha_role; + meta.ha.ha_role = dash_ha_role; } @SaiTable[api = "dash_ha", order=1, isobject="true"] @@ -89,8 +89,8 @@ control ha_stage(inout headers_t hdr, bit<1> peer_ip_is_v6, @SaiVal[type="sai_ip_address_t"] IPv4ORv6Address peer_ip, bit<16> dp_channel_dst_port, - bit<16> dp_channel_src_port_min, - bit<16> dp_channel_src_port_max, + bit<16> dp_channel_min_src_port, + bit<16> dp_channel_max_src_port, bit<32> dp_channel_probe_interval_ms, bit<32> dp_channel_probe_fail_threshold ) { @@ -98,8 +98,8 @@ control ha_stage(inout headers_t hdr, meta.ha.peer_ip = peer_ip; meta.ha.dp_channel_dst_port = dp_channel_dst_port; - meta.ha.dp_channel_src_port_min = dp_channel_src_port_min; - meta.ha.dp_channel_src_port_max = dp_channel_src_port_max; + meta.ha.dp_channel_src_port_min = dp_channel_min_src_port; + meta.ha.dp_channel_src_port_max = dp_channel_max_src_port; } @SaiTable[api = "dash_ha", order=0, isobject="true"] From e1d95f84df46544f57fde0158e79131bb0042257 Mon Sep 17 00:00:00 2001 From: r12f Date: Thu, 7 Mar 2024 21:32:33 +0000 Subject: [PATCH 05/22] Update tests. --- dash-pipeline/tests/libsai/vnet_out/vnet_out.cpp | 4 ++++ .../tests/saithrift/pytest/vnet/test_saithrift_vnet.py | 1 + test/test-cases/functional/ptf/sai_dash_utils.py | 1 + test/test-cases/functional/ptf/saidashacl.py | 1 + test/test-cases/functional/ptf/saidashvnet_sanity.py | 1 + .../functional/saic/config_bidir_setup_commands.py | 3 +++ .../functional/saic/config_inbound_setup_commands.py | 4 ++++ .../functional/saic/config_outbound_setup_commands.json | 2 ++ .../functional/saic/sai-api/test_sai_api_vnet_eni.py | 2 ++ .../functional/saic/sai-api/test_sai_api_vnet_in_route.py | 2 ++ .../saic/sai-api/test_sai_api_vnet_out_route.py | 2 ++ ...est_sai_vnet_outbound_small_scale_config_via_dpugen.py | 2 +- ...net_outbound_small_scale_config_via_dpugen_create.json | 4 ++++ .../test-cases/scale/saic/test_sai_vnet_outbound_scale.py | 2 +- .../scale/saic/vnet_inbound_setup_commands.json | 2 ++ .../scale/saic/vnet_outbound_setup_commands_scale.json | 2 ++ .../scale/saic/vnet_outbound_setup_commands_simple.json | 1 + .../saic/vnet_route_setup_commands_bidirectional.json | 8 ++++++++ .../saic/vnet_route_setup_commands_unidirectional.json | 2 ++ 19 files changed, 44 insertions(+), 2 deletions(-) diff --git a/dash-pipeline/tests/libsai/vnet_out/vnet_out.cpp b/dash-pipeline/tests/libsai/vnet_out/vnet_out.cpp index d386c1db5..c032c32e5 100644 --- a/dash-pipeline/tests/libsai/vnet_out/vnet_out.cpp +++ b/dash-pipeline/tests/libsai/vnet_out/vnet_out.cpp @@ -126,6 +126,10 @@ int main(int argc, char **argv) attr.value.booldata = true; attrs.push_back(attr); + attr.id = SAI_ENI_ATTR_HA_SCOPE_ID; + attr.value.oid = SAI_NULL_OBJECT_ID; + attrs.push_back(attr); + attr.id = SAI_ENI_ATTR_VM_UNDERLAY_DIP; sai_ip_addr_t u_dip_addr = {.ip4 = 0x010310ac}; sai_ip_address_t u_dip = {.addr_family = SAI_IP_ADDR_FAMILY_IPV4, diff --git a/dash-pipeline/tests/saithrift/pytest/vnet/test_saithrift_vnet.py b/dash-pipeline/tests/saithrift/pytest/vnet/test_saithrift_vnet.py index ffdc26568..7a8552aaf 100644 --- a/dash-pipeline/tests/saithrift/pytest/vnet/test_saithrift_vnet.py +++ b/dash-pipeline/tests/saithrift/pytest/vnet/test_saithrift_vnet.py @@ -44,6 +44,7 @@ def test_sai_thrift_create_eni(saithrift_client): eni = sai_thrift_create_eni(saithrift_client, cps=10000, pps=100000, flows=100000, admin_state=True, + ha_scope_id=0, vm_underlay_dip=vm_underlay_dip, vm_vni=9, vnet_id=vnet, diff --git a/test/test-cases/functional/ptf/sai_dash_utils.py b/test/test-cases/functional/ptf/sai_dash_utils.py index cc34ce22d..ea767e1f0 100644 --- a/test/test-cases/functional/ptf/sai_dash_utils.py +++ b/test/test-cases/functional/ptf/sai_dash_utils.py @@ -140,6 +140,7 @@ def eni_create(self, **kwargs): "pps": 100000, "flows": 100000, "admin_state": True, + "ha_scope_id": 0, "vm_underlay_dip": sai_ipaddress("0.0.0.0"), "vm_vni": 1, "vnet_id": 1, diff --git a/test/test-cases/functional/ptf/saidashacl.py b/test/test-cases/functional/ptf/saidashacl.py index 5f295d1c2..8fea9bc9f 100644 --- a/test/test-cases/functional/ptf/saidashacl.py +++ b/test/test-cases/functional/ptf/saidashacl.py @@ -194,6 +194,7 @@ def setUpSwitch(self): self.eni = self.create_obj(sai_thrift_create_eni, sai_thrift_remove_eni, cps=10000, pps=100000, flows=100000, admin_state=True, + ha_scope_id=0, vm_underlay_dip=vm_underlay_dip, vm_vni=9, vnet_id=self.vnet, diff --git a/test/test-cases/functional/ptf/saidashvnet_sanity.py b/test/test-cases/functional/ptf/saidashvnet_sanity.py index 6deeb4f9b..eb9879c64 100644 --- a/test/test-cases/functional/ptf/saidashvnet_sanity.py +++ b/test/test-cases/functional/ptf/saidashvnet_sanity.py @@ -65,6 +65,7 @@ def configureVnet(self): self.eni = sai_thrift_create_eni(self.client, cps=10000, pps=100000, flows=100000, admin_state=True, + ha_scope_id=0, vm_underlay_dip=vm_underlay_dip, vm_vni=9, vnet_id=self.vnet, diff --git a/test/test-cases/functional/saic/config_bidir_setup_commands.py b/test/test-cases/functional/saic/config_bidir_setup_commands.py index cd558797f..40667c746 100644 --- a/test/test-cases/functional/saic/config_bidir_setup_commands.py +++ b/test/test-cases/functional/saic/config_bidir_setup_commands.py @@ -95,6 +95,7 @@ "SAI_ENI_ATTR_PPS", "100000", "SAI_ENI_ATTR_FLOWS", "100000", "SAI_ENI_ATTR_ADMIN_STATE", "True", + "SAI_ENI_ATTR_HA_SCOPE_ID", "0", "SAI_ENI_ATTR_VM_UNDERLAY_DIP", ENI_VTEP_IP, "SAI_ENI_ATTR_VM_VNI", "9", "SAI_ENI_ATTR_VNET_ID", "$vnet", @@ -137,6 +138,7 @@ "SAI_ENI_ATTR_PPS", "100000", "SAI_ENI_ATTR_FLOWS", "100000", "SAI_ENI_ATTR_ADMIN_STATE", "True", + "SAI_ENI_ATTR_HA_SCOPE_ID", "0", "SAI_ENI_ATTR_VM_UNDERLAY_DIP", NETWORK_VTEP_IP, "SAI_ENI_ATTR_VM_VNI", "9", "SAI_ENI_ATTR_VNET_ID", "$vnet", @@ -179,6 +181,7 @@ "SAI_ENI_ATTR_PPS", "100000", "SAI_ENI_ATTR_FLOWS", "100000", "SAI_ENI_ATTR_ADMIN_STATE", "True", + "SAI_ENI_ATTR_HA_SCOPE_ID", "0", "SAI_ENI_ATTR_VM_UNDERLAY_DIP", NETWORK_VTEP_IP, "SAI_ENI_ATTR_VM_VNI", "9", "SAI_ENI_ATTR_VNET_ID", "$vnet", diff --git a/test/test-cases/functional/saic/config_inbound_setup_commands.py b/test/test-cases/functional/saic/config_inbound_setup_commands.py index 8ba05f2bc..ae457da4e 100644 --- a/test/test-cases/functional/saic/config_inbound_setup_commands.py +++ b/test/test-cases/functional/saic/config_inbound_setup_commands.py @@ -103,6 +103,8 @@ "100000", "SAI_ENI_ATTR_ADMIN_STATE", "True", + "SAI_ENI_ATTR_HA_SCOPE_ID", + "0", "SAI_ENI_ATTR_VM_UNDERLAY_DIP", ENI_VTEP_IP, "SAI_ENI_ATTR_VM_VNI", @@ -180,6 +182,8 @@ "100000", "SAI_ENI_ATTR_ADMIN_STATE", "True", + "SAI_ENI_ATTR_HA_SCOPE_ID", + "0", "SAI_ENI_ATTR_VM_UNDERLAY_DIP", NETWORK_VTEP_IP, "SAI_ENI_ATTR_VM_VNI", diff --git a/test/test-cases/functional/saic/config_outbound_setup_commands.json b/test/test-cases/functional/saic/config_outbound_setup_commands.json index 4c6b7f0c0..2ee4275fb 100644 --- a/test/test-cases/functional/saic/config_outbound_setup_commands.json +++ b/test/test-cases/functional/saic/config_outbound_setup_commands.json @@ -69,6 +69,7 @@ "SAI_ENI_ATTR_PPS", "100000", "SAI_ENI_ATTR_FLOWS", "100000", "SAI_ENI_ATTR_ADMIN_STATE", "True", + "SAI_ENI_ATTR_HA_SCOPE_ID", "0", "SAI_ENI_ATTR_VM_UNDERLAY_DIP", "221.0.1.11", "SAI_ENI_ATTR_VM_VNI", "9", "SAI_ENI_ATTR_VNET_ID", "$vnet", @@ -111,6 +112,7 @@ "SAI_ENI_ATTR_PPS", "100000", "SAI_ENI_ATTR_FLOWS", "100000", "SAI_ENI_ATTR_ADMIN_STATE", "True", + "SAI_ENI_ATTR_HA_SCOPE_ID", "0", "SAI_ENI_ATTR_VM_UNDERLAY_DIP", "221.0.2.101", "SAI_ENI_ATTR_VM_VNI", "9", "SAI_ENI_ATTR_VNET_ID", "$vnet", diff --git a/test/test-cases/functional/saic/sai-api/test_sai_api_vnet_eni.py b/test/test-cases/functional/saic/sai-api/test_sai_api_vnet_eni.py index 30798c09e..e733f05a4 100644 --- a/test/test-cases/functional/saic/sai-api/test_sai_api_vnet_eni.py +++ b/test/test-cases/functional/saic/sai-api/test_sai_api_vnet_eni.py @@ -38,6 +38,8 @@ def test_vnet_eni_create(self, dpu): "100000", "SAI_ENI_ATTR_ADMIN_STATE", "True", + "SAI_ENI_ATTR_HA_SCOPE_ID", + "0", "SAI_ENI_ATTR_VM_UNDERLAY_DIP", "10.10.1.10", "SAI_ENI_ATTR_VM_VNI", diff --git a/test/test-cases/functional/saic/sai-api/test_sai_api_vnet_in_route.py b/test/test-cases/functional/saic/sai-api/test_sai_api_vnet_in_route.py index 73bc878fa..dba18594e 100644 --- a/test/test-cases/functional/saic/sai-api/test_sai_api_vnet_in_route.py +++ b/test/test-cases/functional/saic/sai-api/test_sai_api_vnet_in_route.py @@ -45,6 +45,8 @@ def test_vnet_inbound_routing_entry_create_setup(self, dpu): "100000", "SAI_ENI_ATTR_ADMIN_STATE", "True", + "SAI_ENI_ATTR_HA_SCOPE_ID", + "0", "SAI_ENI_ATTR_VM_UNDERLAY_DIP", "10.10.2.10", "SAI_ENI_ATTR_VM_VNI", diff --git a/test/test-cases/functional/saic/sai-api/test_sai_api_vnet_out_route.py b/test/test-cases/functional/saic/sai-api/test_sai_api_vnet_out_route.py index 2c07e1d8d..ecf73212e 100644 --- a/test/test-cases/functional/saic/sai-api/test_sai_api_vnet_out_route.py +++ b/test/test-cases/functional/saic/sai-api/test_sai_api_vnet_out_route.py @@ -44,6 +44,8 @@ def test_vnet_outbound_routing_entry_create(self, dpu): "100000", "SAI_ENI_ATTR_ADMIN_STATE", "True", + "SAI_ENI_ATTR_HA_SCOPE_ID", + "0", "SAI_ENI_ATTR_VM_UNDERLAY_DIP", "10.10.9.10", "SAI_ENI_ATTR_VM_VNI", diff --git a/test/test-cases/functional/saic/tutorial/test_sai_vnet_outbound_small_scale_config_via_dpugen.py b/test/test-cases/functional/saic/tutorial/test_sai_vnet_outbound_small_scale_config_via_dpugen.py index a0c33ade6..5376fa3d1 100755 --- a/test/test-cases/functional/saic/tutorial/test_sai_vnet_outbound_small_scale_config_via_dpugen.py +++ b/test/test-cases/functional/saic/tutorial/test_sai_vnet_outbound_small_scale_config_via_dpugen.py @@ -63,7 +63,7 @@ def make_create_commands(self): 'SAI_ENI_ATTR_PL_SIP', '2001:0db8:85a3:0000:0000:8a2e:0370:7334', 'SAI_ENI_ATTR_PL_SIP_MASK', '2001:0db8:85a3:0000:0000:0000:0000:0000', 'SAI_ENI_ATTR_PL_UNDERLAY_SIP', '10.0.0.18', "SAI_ENI_ATTR_DASH_TUNNEL_DSCP_MODE", "SAI_DASH_TUNNEL_DSCP_MODE_PRESERVE_MODEL", "SAI_ENI_ATTR_DSCP", "0", - "SAI_ENI_ATTR_DISABLE_FAST_PATH_ICMP_FLOW_REDIRECTION", "False"]) + "SAI_ENI_ATTR_DISABLE_FAST_PATH_ICMP_FLOW_REDIRECTION", "False", "SAI_ENI_ATTR_HA_SCOPE_ID", "0"]) ret = add_extra_attrs('SAI_OBJECT_TYPE_OUTBOUND_CA_TO_PA_ENTRY', ret, [ 'SAI_OUTBOUND_CA_TO_PA_ENTRY_ATTR_METER_CLASS', '0', 'SAI_OUTBOUND_CA_TO_PA_ENTRY_ATTR_METER_CLASS_OVERRIDE', 'True' ]) diff --git a/test/test-cases/functional/saic/tutorial/test_sai_vnet_outbound_small_scale_config_via_dpugen_create.json b/test/test-cases/functional/saic/tutorial/test_sai_vnet_outbound_small_scale_config_via_dpugen_create.json index 93529a361..225113553 100644 --- a/test/test-cases/functional/saic/tutorial/test_sai_vnet_outbound_small_scale_config_via_dpugen_create.json +++ b/test/test-cases/functional/saic/tutorial/test_sai_vnet_outbound_small_scale_config_via_dpugen_create.json @@ -87,6 +87,8 @@ "100000", "SAI_ENI_ATTR_ADMIN_STATE", "True", + "SAI_ENI_ATTR_HA_SCOPE_ID", + "0", "SAI_ENI_ATTR_VM_UNDERLAY_DIP", "221.0.1.1", "SAI_ENI_ATTR_VM_VNI", @@ -164,6 +166,8 @@ "100000", "SAI_ENI_ATTR_ADMIN_STATE", "True", + "SAI_ENI_ATTR_HA_SCOPE_ID", + "0", "SAI_ENI_ATTR_VM_UNDERLAY_DIP", "221.0.1.2", "SAI_ENI_ATTR_VM_VNI", diff --git a/test/test-cases/scale/saic/test_sai_vnet_outbound_scale.py b/test/test-cases/scale/saic/test_sai_vnet_outbound_scale.py index cbabf50d2..e14bf8aaa 100755 --- a/test/test-cases/scale/saic/test_sai_vnet_outbound_scale.py +++ b/test/test-cases/scale/saic/test_sai_vnet_outbound_scale.py @@ -116,7 +116,7 @@ def make_create_vnet_config(self): 'SAI_ENI_ATTR_PL_SIP', '2001:0db8:85a3:0000:0000:8a2e:0370:7334', 'SAI_ENI_ATTR_PL_SIP_MASK', '2001:0db8:85a3:0000:0000:0000:0000:0000', 'SAI_ENI_ATTR_PL_UNDERLAY_SIP', '10.0.0.18', "SAI_ENI_ATTR_DASH_TUNNEL_DSCP_MODE", "SAI_DASH_TUNNEL_DSCP_MODE_PRESERVE_MODEL", "SAI_ENI_ATTR_DSCP", "0", - "SAI_ENI_ATTR_DISABLE_FAST_PATH_ICMP_FLOW_REDIRECTION", "False"]) + "SAI_ENI_ATTR_DISABLE_FAST_PATH_ICMP_FLOW_REDIRECTION", "False", "SAI_ENI_ATTR_HA_SCOPE_ID", "0"]) ret = add_extra_attrs('SAI_OBJECT_TYPE_OUTBOUND_CA_TO_PA_ENTRY', ret, [ 'SAI_OUTBOUND_CA_TO_PA_ENTRY_ATTR_METER_CLASS', '0', 'SAI_OUTBOUND_CA_TO_PA_ENTRY_ATTR_METER_CLASS_OVERRIDE', 'True' ]) diff --git a/test/test-cases/scale/saic/vnet_inbound_setup_commands.json b/test/test-cases/scale/saic/vnet_inbound_setup_commands.json index 98659ddba..8aac07855 100644 --- a/test/test-cases/scale/saic/vnet_inbound_setup_commands.json +++ b/test/test-cases/scale/saic/vnet_inbound_setup_commands.json @@ -65,6 +65,8 @@ "100000", "SAI_ENI_ATTR_ADMIN_STATE", "True", + "SAI_ENI_ATTR_HA_SCOPE_ID", + "True", "SAI_ENI_ATTR_VM_UNDERLAY_DIP", "10.10.2.10", "SAI_ENI_ATTR_VM_VNI", diff --git a/test/test-cases/scale/saic/vnet_outbound_setup_commands_scale.json b/test/test-cases/scale/saic/vnet_outbound_setup_commands_scale.json index 967061a6e..8013b9ca7 100644 --- a/test/test-cases/scale/saic/vnet_outbound_setup_commands_scale.json +++ b/test/test-cases/scale/saic/vnet_outbound_setup_commands_scale.json @@ -104,6 +104,7 @@ "SAI_ENI_ATTR_PPS", "100000", "SAI_ENI_ATTR_FLOWS", "100000", "SAI_ENI_ATTR_ADMIN_STATE", "True", + "SAI_ENI_ATTR_HA_SCOPE_ID", "0", "SAI_ENI_ATTR_VM_UNDERLAY_DIP", "172.16.1.1", "SAI_ENI_ATTR_VM_VNI", "9", "SAI_ENI_ATTR_VNET_ID", "$vnet_#4", @@ -148,6 +149,7 @@ "SAI_ENI_ATTR_PPS", "100000", "SAI_ENI_ATTR_FLOWS", "100000", "SAI_ENI_ATTR_ADMIN_STATE", "True", + "SAI_ENI_ATTR_HA_SCOPE_ID", "0", "SAI_ENI_ATTR_VM_UNDERLAY_DIP", "172.16.2.1", "SAI_ENI_ATTR_VM_VNI", "10", "SAI_ENI_ATTR_VNET_ID", "$vnet_#5", diff --git a/test/test-cases/scale/saic/vnet_outbound_setup_commands_simple.json b/test/test-cases/scale/saic/vnet_outbound_setup_commands_simple.json index d21474e5c..96b942a0e 100644 --- a/test/test-cases/scale/saic/vnet_outbound_setup_commands_simple.json +++ b/test/test-cases/scale/saic/vnet_outbound_setup_commands_simple.json @@ -56,6 +56,7 @@ "SAI_ENI_ATTR_PPS", "100000", "SAI_ENI_ATTR_FLOWS", "100000", "SAI_ENI_ATTR_ADMIN_STATE", "True", + "SAI_ENI_ATTR_HA_SCOPE_ID", "0", "SAI_ENI_ATTR_VM_UNDERLAY_DIP", "172.16.1.1", "SAI_ENI_ATTR_VM_VNI", "9", "SAI_ENI_ATTR_VNET_ID", "$vnet", diff --git a/test/test-cases/scale/saic/vnet_route_setup_commands_bidirectional.json b/test/test-cases/scale/saic/vnet_route_setup_commands_bidirectional.json index 4d951d3b4..a7be524e9 100644 --- a/test/test-cases/scale/saic/vnet_route_setup_commands_bidirectional.json +++ b/test/test-cases/scale/saic/vnet_route_setup_commands_bidirectional.json @@ -126,6 +126,8 @@ "100000", "SAI_ENI_ATTR_ADMIN_STATE", "True", + "SAI_ENI_ATTR_HA_SCOPE_ID", + "0", "SAI_ENI_ATTR_VM_UNDERLAY_DIP", "172.16.1.1", "SAI_ENI_ATTR_VM_VNI", @@ -203,6 +205,8 @@ "100000", "SAI_ENI_ATTR_ADMIN_STATE", "True", + "SAI_ENI_ATTR_HA_SCOPE_ID", + "0", "SAI_ENI_ATTR_VM_UNDERLAY_DIP", "10.10.2.10", "SAI_ENI_ATTR_VM_VNI", @@ -280,6 +284,8 @@ "100000", "SAI_ENI_ATTR_ADMIN_STATE", "True", + "SAI_ENI_ATTR_HA_SCOPE_ID", + "0", "SAI_ENI_ATTR_VM_UNDERLAY_DIP", "10.11.1.10", "SAI_ENI_ATTR_VM_VNI", @@ -357,6 +363,8 @@ "100000", "SAI_ENI_ATTR_ADMIN_STATE", "True", + "SAI_ENI_ATTR_HA_SCOPE_ID", + "0", "SAI_ENI_ATTR_VM_UNDERLAY_DIP", "170.16.1.1", "SAI_ENI_ATTR_VM_VNI", diff --git a/test/test-cases/scale/saic/vnet_route_setup_commands_unidirectional.json b/test/test-cases/scale/saic/vnet_route_setup_commands_unidirectional.json index fd8a093bb..1d76e1551 100644 --- a/test/test-cases/scale/saic/vnet_route_setup_commands_unidirectional.json +++ b/test/test-cases/scale/saic/vnet_route_setup_commands_unidirectional.json @@ -65,6 +65,8 @@ "100000", "SAI_ENI_ATTR_ADMIN_STATE", "True", + "SAI_ENI_ATTR_HA_SCOPE_ID", + "0", "SAI_ENI_ATTR_VM_UNDERLAY_DIP", "172.16.1.1", "SAI_ENI_ATTR_VM_VNI", From c549942cf8f4df043210f06d228d725191449991 Mon Sep 17 00:00:00 2001 From: r12f Date: Thu, 7 Mar 2024 22:20:10 +0000 Subject: [PATCH 06/22] Update HA doc. --- documentation/high-avail/ha-api-hld.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/documentation/high-avail/ha-api-hld.md b/documentation/high-avail/ha-api-hld.md index bd19004d4..7d3934ea1 100644 --- a/documentation/high-avail/ha-api-hld.md +++ b/documentation/high-avail/ha-api-hld.md @@ -411,17 +411,17 @@ sequenceDiagram SDN->>S0N: Create ENI with HA set ID as dead state SDN->>S1N: Create ENI with HA set ID as dead state - S0N->>S0D: Create ENI with HA set ID as dead role
(sai: create_eni) - S1N->>S1D: Create ENI with HA set ID as dead role
(sai: create_eni) + S0N->>S0D: Create ENI with HA set ID as dead role
(sai: create_ha_scope / create_eni) + S1N->>S1D: Create ENI with HA set ID as dead role
(sai: create_ha_scope / create_eni) Note over SDN,S1D: SDN controller programs the SDN policy for both ENI. SDN->>S0N: Update ENI with desired state as active. SDN->>S1N: Update ENI with desired state as empty. Note over S0N,S1N: hamgrd driving HA state machine - S1N->>S1D: Update ENI with standby role.
(sai: set_eni_attribute) + S1N->>S1D: Update ENI HA scope with standby role.
(sai: set_ha_scope_attribute) Note over S0N,S1N: hamgrd continue to drive
HA state machine - S0N->>S0D: Update ENI with active role.
(sai: set_eni_attribute) + S0N->>S0D: Update ENI HA scope with active role.
(sai: set_ha_scope_attribute) Note over S0N,S1N: hamgrd continue to drive HA
state machine and update
nexthop on all switches. ``` @@ -448,11 +448,11 @@ sequenceDiagram SDN->>S1N: Update ENI with desired state as active. Note over S0N,S1N: hamgrd gets approval from
upstream service and drives
HA state machine - S1N->>S1D: Update ENI with SwitchingToActive role.
(sai: set_eni_attribute) + S1N->>S1D: Update ENI HA scope with SwitchingToActive role.
(sai: set_ha_scope_attribute) Note over S0N,S1N: hamgrd continue to drive
HA state machine - S0N->>S0D: Update ENI with standby role.
(sai: set_eni_attribute) + S0N->>S0D: Update ENI HA scope with standby role.
(sai: set_ha_scope_attribute) Note over S0N,S1N: hamgrd continue to drive
HA state machine - S1N->>S1D: Update ENI with active role.
(sai: set_eni_attribute) + S1N->>S1D: Update ENI HA scope with active role.
(sai: set_ha_scope_attribute) Note over S0N,S1N: hamgrd continue to drive
HA state machine and
update nexthop on all switches. ``` @@ -477,7 +477,7 @@ sequenceDiagram S0N->>S0N: PMON detects DPU0 is dead. Note over S0N,S1N: hamgrd receives the health
signal and drives HA state
machine - S1N->>S1D: Update ENI with Standalone role.
(sai: set_eni_attribute) + S1N->>S1D: Update ENI HA scope with Standalone role.
(sai: set_ha_scope_attribute) Note over S0N,S1N: hamgrd continue to drive
HA state machine and
update nexthop on all switches. ``` @@ -505,12 +505,12 @@ sequenceDiagram Note over S0D,S1D: DPU starts data path probe to peer DPU. SDN->>S1N: Reconcile SDN policy for the new DPU. - S1N->>S1D: Create ENI with HA set ID as dead role
(sai: create_eni) + S1N->>S1D: Create ENI with HA set ID as dead role
(sai: create_ha_scope / create_eni) Note over S0N,S1N: hamgrd driving HA state machine. - S1N->>S1D: Update ENI with standby role.
(sai: set_eni_attribute) + S1N->>S1D: Update ENI HA scope with standby role.
(sai: set_ha_scope_attribute) Note over S0N,S1N: hamgrd continue to drive
HA state machine - S0N->>S0D: Update ENI with active role.
(sai: set_eni_attribute) + S0N->>S0D: Update ENI HA scope with active role.
(sai: set_ha_scope_attribute) Note over S0N,S1N: hamgrd continue to drive HA
state machine and update
nexthop on all switches. ``` From 781ff2420b0f731139e527bbbd25c4d1c98b2ea8 Mon Sep 17 00:00:00 2001 From: r12f Date: Mon, 11 Mar 2024 18:51:46 +0000 Subject: [PATCH 07/22] Add comments. --- dash-pipeline/bmv2/dash_metadata.p4 | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/dash-pipeline/bmv2/dash_metadata.p4 b/dash-pipeline/bmv2/dash_metadata.p4 index 8ce43f05e..9927abaaa 100644 --- a/dash-pipeline/bmv2/dash_metadata.p4 +++ b/dash-pipeline/bmv2/dash_metadata.p4 @@ -20,14 +20,16 @@ enum bit<16> dash_direction_t { }; enum bit<8> dash_packet_source_t { - EXTERNAL = 0, - DPAPP = 1 + EXTERNAL = 0, // Packets from external sources. + DPAPP = 1 // Packets from data plane app. }; enum bit<8> dash_packet_type_t { - REGULAR = 0, - FLOW_SYNC_REQ = 1, - FLOW_SYNC_ACK = 2 + REGULAR = 0, // Regular packets from external sources. + FLOW_SYNC_REQ = 1, // Flow sync request packet. + FLOW_SYNC_ACK = 2, // Flow sync ack packet. + DP_PROBE_REQ = 3, // Data plane probe packet. + DP_PROBE_ACK = 4 // Data plane probe ack packet. }; // Pipeline stages: @@ -89,7 +91,8 @@ struct overlay_rewrite_data_t { IPv6Address sip_mask; IPv6Address dip_mask; } - + +// HA roles enum bit<8> dash_ha_role_t { DEAD = 0, ACTIVE = 1, @@ -98,6 +101,7 @@ enum bit<8> dash_ha_role_t { SWITCHING_TO_ACTIVE = 4 }; +// Flow sync state enum bit<8> dash_ha_flow_sync_state_t { FLOW_MISS = 0, FLOW_CREATED = 1, @@ -105,23 +109,20 @@ enum bit<8> dash_ha_flow_sync_state_t { FLOW_PENDING_DELETE = 3 }; +// HA flow sync operations enum bit<8> dash_ha_flow_sync_op_t { - FLOW_CREATE = 0, - FLOW_UPDATE = 1, - FLOW_DELETE = 2 + FLOW_CREATE = 0, // New flow creation. + FLOW_UPDATE = 1, // Flow resimulation or any other reason causing existing flow to be updated. + FLOW_DELETE = 2 // Flow deletion. }; struct ha_data_t { - // // HA scope settings - // bit<16> ha_scope_id; bit<16> ha_set_id; dash_ha_role_t ha_role; - // // HA set settings - // bit<1> local_ip_is_v6; IPv4ORv6Address local_ip; bit<1> peer_ip_is_v6; @@ -130,9 +131,7 @@ struct ha_data_t { bit<16> dp_channel_src_port_min; bit<16> dp_channel_src_port_max; - // // HA packet/flow state - // dash_ha_flow_sync_state_t flow_sync_state; } From 592d2c0e5f936548983e94ebbb6d1c77b89ff977 Mon Sep 17 00:00:00 2001 From: r12f Date: Mon, 11 Mar 2024 18:59:31 +0000 Subject: [PATCH 08/22] minor update on the doc. --- documentation/high-avail/ha-api-hld.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/documentation/high-avail/ha-api-hld.md b/documentation/high-avail/ha-api-hld.md index 7d3934ea1..f9ee07f40 100644 --- a/documentation/high-avail/ha-api-hld.md +++ b/documentation/high-avail/ha-api-hld.md @@ -344,9 +344,11 @@ enum bit<8> dash_packet_source_t { // Packet type enum bit<8> dash_packet_type_t { - REGULAR = 0, - FLOW_SYNC_REQ = 1, - FLOW_SYNC_ACK = 2 + REGULAR = 0, // Regular packets from external sources. + FLOW_SYNC_REQ = 1, // Flow sync request packet. + FLOW_SYNC_ACK = 2, // Flow sync ack packet. + DP_PROBE_REQ = 3, // Data plane probe packet. + DP_PROBE_ACK = 4 // Data plane probe ack packet. }; // HA flow sync operations From ee2e49f33d85d4f224a09d7766b3fe21aaacb993 Mon Sep 17 00:00:00 2001 From: r12f Date: Mon, 11 Mar 2024 19:00:30 +0000 Subject: [PATCH 09/22] minor update. --- dash-pipeline/bmv2/dash_metadata.p4 | 3 ++- documentation/high-avail/ha-api-hld.md | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/dash-pipeline/bmv2/dash_metadata.p4 b/dash-pipeline/bmv2/dash_metadata.p4 index 9927abaaa..68ac53263 100644 --- a/dash-pipeline/bmv2/dash_metadata.p4 +++ b/dash-pipeline/bmv2/dash_metadata.p4 @@ -21,7 +21,8 @@ enum bit<16> dash_direction_t { enum bit<8> dash_packet_source_t { EXTERNAL = 0, // Packets from external sources. - DPAPP = 1 // Packets from data plane app. + DPAPP = 1, // Packets from data plane app. + PEER = 2 // Packets from the paired DPU. }; enum bit<8> dash_packet_type_t { diff --git a/documentation/high-avail/ha-api-hld.md b/documentation/high-avail/ha-api-hld.md index f9ee07f40..52b040fe8 100644 --- a/documentation/high-avail/ha-api-hld.md +++ b/documentation/high-avail/ha-api-hld.md @@ -337,9 +337,9 @@ To support the flow HA, due to the nature of having different type of packets, s ```c // Packet source enum bit<8> dash_packet_source_t { - EXTERNAL = 0, - DPAPP = 1, - PEER = 2, + EXTERNAL = 0, // Packets from external sources. + DPAPP = 1, // Packets from data plane app. + PEER = 2 // Packets from the paired DPU. }; // Packet type From 3cf763cc8e29aaaf9dd6b13d889f497fbe530210 Mon Sep 17 00:00:00 2001 From: r12f Date: Mon, 11 Mar 2024 23:09:03 +0000 Subject: [PATCH 10/22] add pending resimulatin state. --- dash-pipeline/bmv2/dash_metadata.p4 | 9 +++++---- documentation/high-avail/ha-api-hld.md | 8 ++++++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/dash-pipeline/bmv2/dash_metadata.p4 b/dash-pipeline/bmv2/dash_metadata.p4 index 68ac53263..bc4e782a3 100644 --- a/dash-pipeline/bmv2/dash_metadata.p4 +++ b/dash-pipeline/bmv2/dash_metadata.p4 @@ -104,10 +104,11 @@ enum bit<8> dash_ha_role_t { // Flow sync state enum bit<8> dash_ha_flow_sync_state_t { - FLOW_MISS = 0, - FLOW_CREATED = 1, - FLOW_SYNCED = 2, - FLOW_PENDING_DELETE = 3 + FLOW_MISS = 0, // Flow not created yet + FLOW_CREATED = 1, // Flow is created but not synched or waiting for ack + FLOW_SYNCED = 2, // Flow has been synched to its peer + FLOW_PENDING_DELETE = 3, // Flow is pending deletion, waiting for ack + FLOW_PENDING_RESIMULATION = 4 // Flow is marked as pending resimulation }; // HA flow sync operations diff --git a/documentation/high-avail/ha-api-hld.md b/documentation/high-avail/ha-api-hld.md index 52b040fe8..53f67fc3c 100644 --- a/documentation/high-avail/ha-api-hld.md +++ b/documentation/high-avail/ha-api-hld.md @@ -136,7 +136,8 @@ typedef enum _sai_dash_ha_flow_sync_state_t SAI_DASH_HA_FLOW_SYNC_STATE_FLOW_MISS, SAI_DASH_HA_FLOW_SYNC_STATE_FLOW_CREATED, SAI_DASH_HA_FLOW_SYNC_STATE_FLOW_SYNCED, - SAI_DASH_HA_FLOW_SYNC_STATE_FLOW_PENDING_DELETE + SAI_DASH_HA_FLOW_SYNC_STATE_FLOW_PENDING_DELETE, + SAI_DASH_HA_FLOW_SYNC_STATE_FLOW_PENDING_RESIMULATION } sai_dash_ha_flow_sync_state_t; ``` @@ -148,15 +149,18 @@ stateDiagram-v2 C: FLOW_CREATED S: FLOW_SYNCED D: FLOW_PENDING_DELETE + R: FLOW_PENDING_RESIMULATION M --> C: Flow created M --> S: Flow sync request
received on
standby node C --> S: Flow sync ack received - S --> C: Flow resimulated + S --> R: Flow resimulation requested + R --> C: Flow updated C --> D: Flow deleted S --> D: Flow deleted + R --> D: Flow deleted D --> M: Flow sync ack received ``` From 07de19f715c6c84f2d817c34fdf70e2fe168cf50 Mon Sep 17 00:00:00 2001 From: r12f Date: Fri, 15 Mar 2024 22:18:49 +0000 Subject: [PATCH 11/22] Add HA set notification for data plane channel alive state change. --- dash-pipeline/bmv2/stages/ha.p4 | 3 +- documentation/high-avail/ha-api-hld.md | 67 ++++++++++++++++++++++++-- 2 files changed, 66 insertions(+), 4 deletions(-) diff --git a/dash-pipeline/bmv2/stages/ha.p4 b/dash-pipeline/bmv2/stages/ha.p4 index 15308c51d..dcfeb901b 100644 --- a/dash-pipeline/bmv2/stages/ha.p4 +++ b/dash-pipeline/bmv2/stages/ha.p4 @@ -92,7 +92,8 @@ control ha_stage(inout headers_t hdr, bit<16> dp_channel_min_src_port, bit<16> dp_channel_max_src_port, bit<32> dp_channel_probe_interval_ms, - bit<32> dp_channel_probe_fail_threshold + bit<32> dp_channel_probe_fail_threshold, + @SaiVal[isreadonly="true"] bit<1> dp_channel_is_alive ) { meta.ha.peer_ip_is_v6 = peer_ip_is_v6; meta.ha.peer_ip = peer_ip; diff --git a/documentation/high-avail/ha-api-hld.md b/documentation/high-avail/ha-api-hld.md index 53f67fc3c..d2d351c77 100644 --- a/documentation/high-avail/ha-api-hld.md +++ b/documentation/high-avail/ha-api-hld.md @@ -14,6 +14,8 @@ 4. [4.4. Flow](#44-flow) 5. [4.5. ENI](#45-eni) 6. [4.6. Event notifications](#46-event-notifications) + 1. [4.6.1. HA set event notifications](#461-ha-set-event-notifications) + 2. [4.6.2. HA scope event notifications](#462-ha-scope-event-notifications) 7. [4.7. Counters](#47-counters) 1. [4.7.1. HA set stats](#471-ha-set-stats) 2. [4.7.2. ENI stats](#472-eni-stats) @@ -89,6 +91,7 @@ HA set is defined as a SAI object and contains the following SAI attributes: | SAI_HA_SET_ATTR_DP_CHANNEL_SRC_PORT_MAX | `sai_uint16_t` | The maximum source port of the data plane channel. | | SAI_HA_SET_ATTR_DP_CHANNEL_PROBE_INTERVAL_MS | `sai_uint32_t` | The interval of the data plane channel probe. | | SAI_HA_SET_ATTR_DP_CHANNEL_PROBE_FAIL_THRESHOLD | `sai_uint32_t` | The threshold of the data plane channel probe fail. | +| SAI_HA_SET_ATTR_DP_CHANNEL_IS_ALIVE | `bool` | (Readonly) Is data plane channel alive. | ### 4.2. HA Scope @@ -177,13 +180,71 @@ To provide the ENI-level HA control, each ENI will have the following SAI attrib ### 4.6. Event notifications -To receive the HA state updates from the DASH implementation, the following SAI notification attributes are added on the switch object: +To receive the HA related updates from the DASH implementation, the following SAI notification attributes are added on the switch object: | Attribute name | Type | Description | | -------------- | ---- | ----------- | +| SAI_SWITCH_ATTR_HA_SET_EVENT_NOTIFY | `sai_ha_set_event_notification_fn` | The callback function for receiving events on the HA set. | | SAI_SWITCH_ATTR_HA_SCOPE_EVENT_NOTIFY | `sai_ha_scope_event_notification_fn` | The callback function for receiving events on the HA scope. | -And the callback function and HA state changed event is defined as below: +#### 4.6.1. HA set event notifications + +Whenever a HA set state is changed, it will be reported back via HA set event notification, such as data plane channel goes down. The detailed definition is shown as below: + +```c +/** + * @brief HA set event type + */ +typedef enum _sai_ha_set_event_t +{ + /** Any HA set state is changed, such as data plane channel goes down. */ + SAI_HA_SET_STATE_CHANGED, + +} sai_ha_set_event_t; + +/** + * @brief Notification data format received from SAI HA set callback + * + * @count attr[attr_count] + */ +typedef struct _sai_ha_set_event_data_t +{ + /** Event type */ + sai_ha_set_event_t event_type; + + /** HA set id */ + sai_object_id_t ha_set_id; + + /** Attributes count */ + uint32_t attr_count; + + /** + * @brief Attributes + * + * @objects SAI_OBJECT_TYPE_HA_SET + */ + sai_attribute_t *attr; + +} sai_ha_set_event_data_t; + +/** + * @brief HA set event notification + * + * Passed as a parameter into sai_initialize_switch() + * + * @count data[count] + * + * @param[in] count Number of notifications + * @param[in] data Array of HA set events + */ +typedef void (*sai_ha_set_event_notification_fn)( + _In_ uint32_t count, + _In_ const sai_ha_set_event_data_t *ha_set_event_data); +``` + +#### 4.6.2. HA scope event notifications + +Similar to HA set, whenever any HA scope state is changed, it will be reported back via HA scope event notification. The detailed definition is shown as below: ```c /** @@ -191,7 +252,7 @@ And the callback function and HA state changed event is defined as below: */ typedef enum _sai_ha_scope_event_t { - /** Moved to a new HA state. */ + /** Any HA scope state is changed, such as HA state. */ SAI_HA_SCOPE_STATE_CHANGED, } sai_ha_scope_event_t; From e37901b3194bcf163b0ce969af4dfb3eff224b42 Mon Sep 17 00:00:00 2001 From: r12f Date: Fri, 15 Mar 2024 22:49:37 +0000 Subject: [PATCH 12/22] fix spelling. --- documentation/high-avail/ha-api-hld.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/high-avail/ha-api-hld.md b/documentation/high-avail/ha-api-hld.md index d2d351c77..a27b9d207 100644 --- a/documentation/high-avail/ha-api-hld.md +++ b/documentation/high-avail/ha-api-hld.md @@ -91,7 +91,7 @@ HA set is defined as a SAI object and contains the following SAI attributes: | SAI_HA_SET_ATTR_DP_CHANNEL_SRC_PORT_MAX | `sai_uint16_t` | The maximum source port of the data plane channel. | | SAI_HA_SET_ATTR_DP_CHANNEL_PROBE_INTERVAL_MS | `sai_uint32_t` | The interval of the data plane channel probe. | | SAI_HA_SET_ATTR_DP_CHANNEL_PROBE_FAIL_THRESHOLD | `sai_uint32_t` | The threshold of the data plane channel probe fail. | -| SAI_HA_SET_ATTR_DP_CHANNEL_IS_ALIVE | `bool` | (Readonly) Is data plane channel alive. | +| SAI_HA_SET_ATTR_DP_CHANNEL_IS_ALIVE | `bool` | (Read-only) Is data plane channel alive. | ### 4.2. HA Scope From 2aa0f434db30f5d57d536336adba02b250f24c38 Mon Sep 17 00:00:00 2001 From: r12f Date: Thu, 21 Mar 2024 20:54:26 +0000 Subject: [PATCH 13/22] Adding capabilities. --- documentation/high-avail/ha-api-hld.md | 34 ++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/documentation/high-avail/ha-api-hld.md b/documentation/high-avail/ha-api-hld.md index a27b9d207..a37801bc2 100644 --- a/documentation/high-avail/ha-api-hld.md +++ b/documentation/high-avail/ha-api-hld.md @@ -23,6 +23,9 @@ 2. [4.7.2.2. ENI-level flow operation counters](#4722-eni-level-flow-operation-counters) 3. [4.7.2.3. ENI-level flow sync packet counters](#4723-eni-level-flow-sync-packet-counters) 4. [4.7.2.4. ENI-level flow sync operations counters](#4724-eni-level-flow-sync-operations-counters) + 8. [4.8. Capability](#48-capability) + 1. [4.8.1. Topology related capabilities](#481-topology-related-capabilities) + 2. [4.8.2. Stats related capabilities](#482-stats-related-capabilities) 5. [5. HA in DASH behavior model](#5-ha-in-dash-behavior-model) 1. [5.1. HA stage](#51-ha-stage) 2. [5.2. Packet type and flow operations](#52-packet-type-and-flow-operations) @@ -374,6 +377,37 @@ Here are the new stats added for monitoring flow sync operations on each ENI: | SAI_ENI_STAT_(INLINE/TIMED)\_FLOW\_(CREATE/UPDATE/DELETE)_ACK_FAILED | The number of inline/timed flow create/update/delete ack that the ENI is received but failed to process. | | SAI_ENI_STAT_(INLINE/TIMED)\_FLOW\_(CREATE/UPDATE/DELETE)_ACK_IGNORED | The number of inline/timed flow create/update/delete ack that the ENI is received but its flow operation is processed as ignored. | +### 4.8. Capability + +To check which type of topology is supported in the DASH implementation, the following read only attributes is added on the switch level for capability queries: + +#### 4.8.1. Topology related capabilities + +| Attribute name | Type | Description | +| -------------- | ---- | ----------- | +| SAI_SWITCH_ATTR_DASH_CAPS_MAX_HA_SET_COUNT | `sai_uint32_t` | The max number of HA set can be created. | +| SAI_SWITCH_ATTR_DASH_CAPS_MAX_HA_SCOPE_COUNT_PER_HA_SET | `sai_uint32_t` | The max number of HA scope that can be created within a single HA set. | +| SAI_SWITCH_ATTR_DASH_CAPS_MAX_FLOW_TABLE_COUNT | `sai_uint32_t` | The max number of flow tables that can be created. | + +Here are some examples of how to use these capability to represent the topologies: + +| Topology | MAX_HA_SET_COUNT | MAX_HA_SCOPE_COUNT_PER_HA_SET | MAX_FLOW_TABLE_COUNT | +| -------- | ---------------- | ----------------------------- | -------------------- | +| ENI level HA with DPU level pairing | 1 | (max # of ENI supported) | 1 | +| DPU level HA | 1 | 1 | 1 | + +#### 4.8.2. Stats related capabilities + +Stats related capabilities can help with 2 things: + +1. Specify which stats is supported now, so we can start pulling to avoid errors. +2. Return internal debug stats to help with troubleshooting. + +| Attribute name | Type | Description | +| -------------- | ---- | ----------- | +| SAI_SWITCH_ATTR_DASH_CAPS_SUPPORTED_HA_SET_STATS | `sai_s32_list_t` | The list of supported HA set stats. | +| SAI_SWITCH_ATTR_DASH_CAPS_SUPPORTED_ENI_STATS | `sai_s32_list_t` | The list of supported ENI stats. | + ## 5. HA in DASH behavior model With these concepts, we can now use them to build a pipeline with flow HA supported. From 84ee3c3cfb3ad34cc48b543bed1ee9c55ca9688b Mon Sep 17 00:00:00 2001 From: r12f Date: Thu, 21 Mar 2024 20:56:38 +0000 Subject: [PATCH 14/22] spellcheck. --- documentation/high-avail/ha-api-hld.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/documentation/high-avail/ha-api-hld.md b/documentation/high-avail/ha-api-hld.md index a37801bc2..7f1d64f9d 100644 --- a/documentation/high-avail/ha-api-hld.md +++ b/documentation/high-avail/ha-api-hld.md @@ -75,7 +75,7 @@ Or, with even more complicated topology, such as each ENI owns 2 dedicated flow ![](./images/ha-components-complex.svg) -> Note: The graphs here is to show the flexibiilty of these concepts, but not require all the possible topologies to be supported by the DASH providers. +> Note: The graphs here is to show the flexibiilty of these concepts, but not require all the possible topology to be supported by the DASH providers. ## 4. SAI APIs @@ -389,7 +389,7 @@ To check which type of topology is supported in the DASH implementation, the fol | SAI_SWITCH_ATTR_DASH_CAPS_MAX_HA_SCOPE_COUNT_PER_HA_SET | `sai_uint32_t` | The max number of HA scope that can be created within a single HA set. | | SAI_SWITCH_ATTR_DASH_CAPS_MAX_FLOW_TABLE_COUNT | `sai_uint32_t` | The max number of flow tables that can be created. | -Here are some examples of how to use these capability to represent the topologies: +Here are some examples of how to use these capability to represent the topology: | Topology | MAX_HA_SET_COUNT | MAX_HA_SCOPE_COUNT_PER_HA_SET | MAX_FLOW_TABLE_COUNT | | -------- | ---------------- | ----------------------------- | -------------------- | From 8ac028739b606c81695567924d165f28c3cfca9b Mon Sep 17 00:00:00 2001 From: r12f Date: Tue, 26 Mar 2024 18:21:06 +0000 Subject: [PATCH 15/22] Update event notification. --- dash-pipeline/SAI/SAI | 2 +- documentation/high-avail/ha-api-hld.md | 23 ++++++----------------- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/dash-pipeline/SAI/SAI b/dash-pipeline/SAI/SAI index 6198db544..3fe490d83 160000 --- a/dash-pipeline/SAI/SAI +++ b/dash-pipeline/SAI/SAI @@ -1 +1 @@ -Subproject commit 6198db544f1d03163c8f8cac1fbf607ce81b4149 +Subproject commit 3fe490d836be6ed1d7c4a8763d5e33053f9e957a diff --git a/documentation/high-avail/ha-api-hld.md b/documentation/high-avail/ha-api-hld.md index 7f1d64f9d..dcdd8473a 100644 --- a/documentation/high-avail/ha-api-hld.md +++ b/documentation/high-avail/ha-api-hld.md @@ -218,15 +218,8 @@ typedef struct _sai_ha_set_event_data_t /** HA set id */ sai_object_id_t ha_set_id; - /** Attributes count */ - uint32_t attr_count; - - /** - * @brief Attributes - * - * @objects SAI_OBJECT_TYPE_HA_SET - */ - sai_attribute_t *attr; + /** Is data plane channel alive from data plane channel probing */ + bool dp_channel_is_alive; } sai_ha_set_event_data_t; @@ -273,15 +266,11 @@ typedef struct _sai_ha_scope_event_data_t /** HA scope id */ sai_object_id_t ha_scope_id; - /** Attributes count */ - uint32_t attr_count; + /** HA role */ + sai_dash_ha_role_t ha_role; - /** - * @brief Attributes - * - * @objects SAI_OBJECT_TYPE_HA_SCOPE - */ - sai_attribute_t *attr; + /** Flow version */ + sai_uint32_t flow_version; } sai_ha_scope_event_data_t; From 4930e3abca45820b9638ca89af3b667be636c75f Mon Sep 17 00:00:00 2001 From: r12f Date: Thu, 28 Mar 2024 18:44:37 +0000 Subject: [PATCH 16/22] Reuse the SAI object APIs for stats capabilities. --- documentation/high-avail/ha-api-hld.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/documentation/high-avail/ha-api-hld.md b/documentation/high-avail/ha-api-hld.md index dcdd8473a..cd37074fe 100644 --- a/documentation/high-avail/ha-api-hld.md +++ b/documentation/high-avail/ha-api-hld.md @@ -392,10 +392,7 @@ Stats related capabilities can help with 2 things: 1. Specify which stats is supported now, so we can start pulling to avoid errors. 2. Return internal debug stats to help with troubleshooting. -| Attribute name | Type | Description | -| -------------- | ---- | ----------- | -| SAI_SWITCH_ATTR_DASH_CAPS_SUPPORTED_HA_SET_STATS | `sai_s32_list_t` | The list of supported HA set stats. | -| SAI_SWITCH_ATTR_DASH_CAPS_SUPPORTED_ENI_STATS | `sai_s32_list_t` | The list of supported ENI stats. | +To query the HA stats related capabilities, we will use the existing SAI stats capabilities. For more details on the API, please refer to the [SAI object APIs](https://github.com/opencomputeproject/SAI/blob/master/inc/saiobject.h). ## 5. HA in DASH behavior model From 5a03dbec499f7b9c2f0e854e564ad90d6e1a651e Mon Sep 17 00:00:00 2001 From: r12f Date: Mon, 1 Apr 2024 23:09:24 +0000 Subject: [PATCH 17/22] Added capabilities for HA owner, simplified capabilities for HA topology. --- documentation/high-avail/ha-api-hld.md | 41 ++++++++++++++++++++------ 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/documentation/high-avail/ha-api-hld.md b/documentation/high-avail/ha-api-hld.md index cd37074fe..78eaea177 100644 --- a/documentation/high-avail/ha-api-hld.md +++ b/documentation/high-avail/ha-api-hld.md @@ -2,7 +2,10 @@ | Rev | Date | Author | Change Description | | --- | ---- | ------ | ------------------ | -| 0.1 | 03/02/2024 | Riff Jiang | Initial version | +| 0.1 | 03/02/2024 | Riff Jiang | Initial version. | +| 0.2 | 03/15/2024 | Riff Jiang | Added HA set notification. | +| 0.3 | 03/21/2024 | Riff Jiang | Added capabilities for HA topology and stats. | +| 0.4 | 04/01/2024 | Riff Jiang | Added capabilities for HA owner, simplified capabilities for HA topology. | 1. [1. Terminology](#1-terminology) 2. [2. Background](#2-background) @@ -372,18 +375,38 @@ To check which type of topology is supported in the DASH implementation, the fol #### 4.8.1. Topology related capabilities +To describe how the topology looks like, we need to use at least 4 different attributes: + +- \# of HA set that can be created +- \# of HA scope per HA set that can be created +- \# of ENI per HA scope that can be created +- \# of flow table that can be created + +However, since we only support DPU level pairing with a single flow table and the topologies we supported at this moment is limited, the topology related capability is added as high level modes, instead of individual attributes: + | Attribute name | Type | Description | | -------------- | ---- | ----------- | -| SAI_SWITCH_ATTR_DASH_CAPS_MAX_HA_SET_COUNT | `sai_uint32_t` | The max number of HA set can be created. | -| SAI_SWITCH_ATTR_DASH_CAPS_MAX_HA_SCOPE_COUNT_PER_HA_SET | `sai_uint32_t` | The max number of HA scope that can be created within a single HA set. | -| SAI_SWITCH_ATTR_DASH_CAPS_MAX_FLOW_TABLE_COUNT | `sai_uint32_t` | The max number of flow tables that can be created. | +| SAI_SWITCH_ATTR_DASH_CAPS_HA_CONTROLLER_NEEDED | `bool` | If `true`, the DASH host will own driving the HA state machine. | +| SAI_SWITCH_ATTR_DASH_CAPS_HA_SCOPE | `sai_dash_caps_ha_scope_t` | HA scope that supported by the DASH implementation. | + +The HA scope capability enum is defined as below: + +```c++ +typedef enum _sai_dash_caps_ha_scope_t +{ + DPU, + ENI, +} sai_dash_caps_ha_scope_t; +``` -Here are some examples of how to use these capability to represent the topology: +And there is how the topology looks like for each mode: -| Topology | MAX_HA_SET_COUNT | MAX_HA_SCOPE_COUNT_PER_HA_SET | MAX_FLOW_TABLE_COUNT | -| -------- | ---------------- | ----------------------------- | -------------------- | -| ENI level HA with DPU level pairing | 1 | (max # of ENI supported) | 1 | -| DPU level HA | 1 | 1 | 1 | +| HA scope | DPU | ENI | +| --- | --- | --- | +| \# of HA set | 1 | 1 | +| \# of HA scope per HA set | 1 | (Max \# of ENI) | +| \# of ENI per HA scope | (Max \# of ENI) | 1 | +| \# of flow table | 1 | 1 | #### 4.8.2. Stats related capabilities From ef90c474ae8a2f3d25d1e762cb808aae539fc4eb Mon Sep 17 00:00:00 2001 From: r12f Date: Mon, 1 Apr 2024 23:12:24 +0000 Subject: [PATCH 18/22] minor update. --- documentation/high-avail/ha-api-hld.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/high-avail/ha-api-hld.md b/documentation/high-avail/ha-api-hld.md index 78eaea177..0c0e7ee82 100644 --- a/documentation/high-avail/ha-api-hld.md +++ b/documentation/high-avail/ha-api-hld.md @@ -386,7 +386,7 @@ However, since we only support DPU level pairing with a single flow table and th | Attribute name | Type | Description | | -------------- | ---- | ----------- | -| SAI_SWITCH_ATTR_DASH_CAPS_HA_CONTROLLER_NEEDED | `bool` | If `true`, the DASH host will own driving the HA state machine. | +| SAI_SWITCH_ATTR_DASH_CAPS_HA_OWNER_NEEDED | `bool` | If `true`, the DASH host will own driving the HA state machine. | | SAI_SWITCH_ATTR_DASH_CAPS_HA_SCOPE | `sai_dash_caps_ha_scope_t` | HA scope that supported by the DASH implementation. | The HA scope capability enum is defined as below: From cb5e8cae0f6b27868f2ea3ea2f19a60d1c880fea Mon Sep 17 00:00:00 2001 From: r12f Date: Mon, 1 Apr 2024 23:13:09 +0000 Subject: [PATCH 19/22] minor update. --- documentation/high-avail/ha-api-hld.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/high-avail/ha-api-hld.md b/documentation/high-avail/ha-api-hld.md index 0c0e7ee82..3c7d634e6 100644 --- a/documentation/high-avail/ha-api-hld.md +++ b/documentation/high-avail/ha-api-hld.md @@ -399,7 +399,7 @@ typedef enum _sai_dash_caps_ha_scope_t } sai_dash_caps_ha_scope_t; ``` -And there is how the topology looks like for each mode: +And here is how the topology looks like for type of HA scope: | HA scope | DPU | ENI | | --- | --- | --- | From 9b7176379d6401e3ecd0b5dc9fa4f5cdb63e6c41 Mon Sep 17 00:00:00 2001 From: r12f Date: Mon, 1 Apr 2024 23:18:41 +0000 Subject: [PATCH 20/22] spellcheck. --- documentation/high-avail/ha-api-hld.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/high-avail/ha-api-hld.md b/documentation/high-avail/ha-api-hld.md index 3c7d634e6..f660d93fc 100644 --- a/documentation/high-avail/ha-api-hld.md +++ b/documentation/high-avail/ha-api-hld.md @@ -382,7 +382,7 @@ To describe how the topology looks like, we need to use at least 4 different att - \# of ENI per HA scope that can be created - \# of flow table that can be created -However, since we only support DPU level pairing with a single flow table and the topologies we supported at this moment is limited, the topology related capability is added as high level modes, instead of individual attributes: +However, since we only support DPU level pairing with a single flow table and the topology we supported at this moment is limited, the topology related capability is added as high level modes, instead of individual attributes: | Attribute name | Type | Description | | -------------- | ---- | ----------- | From 571c8ae8158606b92129fea184ed8bc2c172acbc Mon Sep 17 00:00:00 2001 From: r12f Date: Tue, 2 Apr 2024 17:49:20 +0000 Subject: [PATCH 21/22] minor rename. --- documentation/high-avail/ha-api-hld.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/documentation/high-avail/ha-api-hld.md b/documentation/high-avail/ha-api-hld.md index f660d93fc..9df735163 100644 --- a/documentation/high-avail/ha-api-hld.md +++ b/documentation/high-avail/ha-api-hld.md @@ -387,16 +387,16 @@ However, since we only support DPU level pairing with a single flow table and th | Attribute name | Type | Description | | -------------- | ---- | ----------- | | SAI_SWITCH_ATTR_DASH_CAPS_HA_OWNER_NEEDED | `bool` | If `true`, the DASH host will own driving the HA state machine. | -| SAI_SWITCH_ATTR_DASH_CAPS_HA_SCOPE | `sai_dash_caps_ha_scope_t` | HA scope that supported by the DASH implementation. | +| SAI_SWITCH_ATTR_DASH_CAPS_HA_SCOPE_LEVEL | `sai_dash_caps_ha_scope_level_t` | HA scope that supported by the DASH implementation. | The HA scope capability enum is defined as below: ```c++ -typedef enum _sai_dash_caps_ha_scope_t +typedef enum _sai_dash_caps_ha_scope_level_t { - DPU, + Card, ENI, -} sai_dash_caps_ha_scope_t; +} sai_dash_caps_ha_scope_level_t; ``` And here is how the topology looks like for type of HA scope: From abe48f1c5d2ee1d386458072e983bc4e2cc982e5 Mon Sep 17 00:00:00 2001 From: r12f Date: Thu, 4 Apr 2024 19:06:54 +0000 Subject: [PATCH 22/22] Addressing comments. --- documentation/high-avail/ha-api-hld.md | 27 +++++--------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/documentation/high-avail/ha-api-hld.md b/documentation/high-avail/ha-api-hld.md index 9df735163..043fff32b 100644 --- a/documentation/high-avail/ha-api-hld.md +++ b/documentation/high-avail/ha-api-hld.md @@ -203,15 +203,16 @@ Whenever a HA set state is changed, it will be reported back via HA set event no */ typedef enum _sai_ha_set_event_t { - /** Any HA set state is changed, such as data plane channel goes down. */ - SAI_HA_SET_STATE_CHANGED, + /** Data plane channel goes up. */ + SAI_HA_SET_DP_CHANNEL_UP, + + /** Data plane channel goes down. */ + SAI_HA_SET_DP_CHANNEL_DOWN, } sai_ha_set_event_t; /** * @brief Notification data format received from SAI HA set callback - * - * @count attr[attr_count] */ typedef struct _sai_ha_set_event_data_t { @@ -221,9 +222,6 @@ typedef struct _sai_ha_set_event_data_t /** HA set id */ sai_object_id_t ha_set_id; - /** Is data plane channel alive from data plane channel probing */ - bool dp_channel_is_alive; - } sai_ha_set_event_data_t; /** @@ -246,26 +244,11 @@ typedef void (*sai_ha_set_event_notification_fn)( Similar to HA set, whenever any HA scope state is changed, it will be reported back via HA scope event notification. The detailed definition is shown as below: ```c -/** - * @brief HA scope event type - */ -typedef enum _sai_ha_scope_event_t -{ - /** Any HA scope state is changed, such as HA state. */ - SAI_HA_SCOPE_STATE_CHANGED, - -} sai_ha_scope_event_t; - /** * @brief Notification data format received from SAI HA scope callback - * - * @count attr[attr_count] */ typedef struct _sai_ha_scope_event_data_t { - /** Event type */ - sai_ha_scope_event_t event_type; - /** HA scope id */ sai_object_id_t ha_scope_id;